tcp/dccp: install syn_recv requests into ehash table
In this patch, we insert request sockets into TCP/DCCP regular ehash table (where ESTABLISHED and TIMEWAIT sockets are) instead of using the per listener hash table. ACK packets find SYN_RECV pseudo sockets without having to find and lock the listener. In nominal conditions, this halves pressure on listener lock. Note that this will allow for SO_REUSEPORT refinements, so that we can select a listener using cpu/numa affinities instead of the prior 'consistent hash', since only SYN packets will apply this selection logic. We will shrink listen_sock in the following patch to ease code review. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Ying Cai <ycai@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

zatwierdzone przez
David S. Miller

rodzic
2feda34192
commit
079096f103
@@ -444,36 +444,6 @@ put_and_exit:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
|
||||
|
||||
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
const struct dccp_hdr *dh = dccp_hdr(skb);
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
struct sock *nsk;
|
||||
/* Find possible connection requests. */
|
||||
struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
|
||||
iph->saddr, iph->daddr);
|
||||
if (req) {
|
||||
nsk = dccp_check_req(sk, skb, req);
|
||||
if (!nsk)
|
||||
reqsk_put(req);
|
||||
return nsk;
|
||||
}
|
||||
nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
|
||||
iph->saddr, dh->dccph_sport,
|
||||
iph->daddr, dh->dccph_dport,
|
||||
inet_iif(skb));
|
||||
if (nsk != NULL) {
|
||||
if (nsk->sk_state != DCCP_TIME_WAIT) {
|
||||
bh_lock_sock(nsk);
|
||||
return nsk;
|
||||
}
|
||||
inet_twsk_put(inet_twsk(nsk));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sk;
|
||||
}
|
||||
|
||||
static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
@@ -705,18 +675,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
* NOTE: the check for the packet types is done in
|
||||
* dccp_rcv_state_process
|
||||
*/
|
||||
if (sk->sk_state == DCCP_LISTEN) {
|
||||
struct sock *nsk = dccp_v4_hnd_req(sk, skb);
|
||||
|
||||
if (nsk == NULL)
|
||||
goto discard;
|
||||
|
||||
if (nsk != sk) {
|
||||
if (dccp_child_process(sk, nsk, skb))
|
||||
goto reset;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (dccp_rcv_state_process(sk, skb, dh, skb->len))
|
||||
goto reset;
|
||||
@@ -724,7 +682,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
reset:
|
||||
dccp_v4_ctl_send_reset(sk, skb);
|
||||
discard:
|
||||
kfree_skb(skb);
|
||||
return 0;
|
||||
}
|
||||
@@ -868,6 +825,27 @@ static int dccp_v4_rcv(struct sk_buff *skb)
|
||||
goto no_dccp_socket;
|
||||
}
|
||||
|
||||
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
|
||||
struct request_sock *req = inet_reqsk(sk);
|
||||
struct sock *nsk = NULL;
|
||||
|
||||
sk = req->rsk_listener;
|
||||
if (sk->sk_state == DCCP_LISTEN)
|
||||
nsk = dccp_check_req(sk, skb, req);
|
||||
if (!nsk) {
|
||||
reqsk_put(req);
|
||||
goto discard_it;
|
||||
}
|
||||
if (nsk == sk) {
|
||||
sock_hold(sk);
|
||||
reqsk_put(req);
|
||||
} else if (dccp_child_process(sk, nsk, skb)) {
|
||||
dccp_v4_ctl_send_reset(sk, skb);
|
||||
goto discard_it;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
|
||||
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted
|
||||
|
@@ -290,37 +290,6 @@ static struct request_sock_ops dccp6_request_sock_ops = {
|
||||
.syn_ack_timeout = dccp_syn_ack_timeout,
|
||||
};
|
||||
|
||||
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
|
||||
{
|
||||
const struct dccp_hdr *dh = dccp_hdr(skb);
|
||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||
struct request_sock *req;
|
||||
struct sock *nsk;
|
||||
|
||||
req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
|
||||
&iph->daddr, inet6_iif(skb));
|
||||
if (req) {
|
||||
nsk = dccp_check_req(sk, skb, req);
|
||||
if (!nsk)
|
||||
reqsk_put(req);
|
||||
return nsk;
|
||||
}
|
||||
nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
|
||||
&iph->saddr, dh->dccph_sport,
|
||||
&iph->daddr, ntohs(dh->dccph_dport),
|
||||
inet6_iif(skb));
|
||||
if (nsk != NULL) {
|
||||
if (nsk->sk_state != DCCP_TIME_WAIT) {
|
||||
bh_lock_sock(nsk);
|
||||
return nsk;
|
||||
}
|
||||
inet_twsk_put(inet_twsk(nsk));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sk;
|
||||
}
|
||||
|
||||
static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct request_sock *req;
|
||||
@@ -398,7 +367,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
if (dccp_v6_send_response(sk, req))
|
||||
goto drop_and_free;
|
||||
|
||||
inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
|
||||
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
|
||||
return 0;
|
||||
|
||||
drop_and_free:
|
||||
@@ -641,24 +610,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
||||
* NOTE: the check for the packet types is done in
|
||||
* dccp_rcv_state_process
|
||||
*/
|
||||
if (sk->sk_state == DCCP_LISTEN) {
|
||||
struct sock *nsk = dccp_v6_hnd_req(sk, skb);
|
||||
|
||||
if (nsk == NULL)
|
||||
goto discard;
|
||||
/*
|
||||
* Queue it on the new socket if the new socket is active,
|
||||
* otherwise we just shortcircuit this and continue with
|
||||
* the new socket..
|
||||
*/
|
||||
if (nsk != sk) {
|
||||
if (dccp_child_process(sk, nsk, skb))
|
||||
goto reset;
|
||||
if (opt_skb != NULL)
|
||||
__kfree_skb(opt_skb);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
|
||||
goto reset;
|
||||
@@ -732,6 +683,27 @@ static int dccp_v6_rcv(struct sk_buff *skb)
|
||||
goto no_dccp_socket;
|
||||
}
|
||||
|
||||
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
|
||||
struct request_sock *req = inet_reqsk(sk);
|
||||
struct sock *nsk = NULL;
|
||||
|
||||
sk = req->rsk_listener;
|
||||
if (sk->sk_state == DCCP_LISTEN)
|
||||
nsk = dccp_check_req(sk, skb, req);
|
||||
if (!nsk) {
|
||||
reqsk_put(req);
|
||||
goto discard_it;
|
||||
}
|
||||
if (nsk == sk) {
|
||||
sock_hold(sk);
|
||||
reqsk_put(req);
|
||||
} else if (dccp_child_process(sk, nsk, skb)) {
|
||||
dccp_v6_ctl_send_reset(sk, skb);
|
||||
goto discard_it;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
|
||||
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted
|
||||
|
Reference in New Issue
Block a user