inet: get rid of central tcp/dccp listener timer

One of the major issue for TCP is the SYNACK rtx handling,
done by inet_csk_reqsk_queue_prune(), fired by the keepalive
timer of a TCP_LISTEN socket.

This function runs for awful long times, with socket lock held,
meaning that other cpus needing this lock have to spin for hundred of ms.

SYNACK are sent in huge bursts, likely to cause severe drops anyway.

This model was OK 15 years ago when memory was very tight.

We now can afford to have a timer per request sock.

Timer invocations no longer need to lock the listener,
and can be run from all cpus in parallel.

With following patch increasing somaxconn width to 32 bits,
I tested a listener with more than 4 million active request sockets,
and a steady SYNFLOOD of ~200,000 SYN per second.
Host was sending ~830,000 SYNACK per second.

This is ~100 times more what we could achieve before this patch.

Later, we will get rid of the listener hash and use ehash instead.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet
2015-03-19 19:04:20 -07:00
committed by David S. Miller
parent 52452c5425
commit fa76ce7328
18 changed files with 177 additions and 202 deletions

View File

@@ -306,6 +306,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
if (!between48(seq, dccp_rsk(req)->dreq_iss,
dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
reqsk_put(req);
goto out;
}
/*
@@ -315,6 +316,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
* errors returned from accept().
*/
inet_csk_reqsk_queue_drop(sk, req);
reqsk_put(req);
goto out;
case DCCP_REQUESTING:
@@ -451,9 +453,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
/* Find possible connection requests. */
struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
iph->saddr, iph->daddr);
if (req)
return dccp_check_req(sk, skb, req);
if (req) {
nsk = dccp_check_req(sk, skb, req);
reqsk_put(req);
return nsk;
}
nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
iph->saddr, dh->dccph_sport,
iph->daddr, dh->dccph_dport,

View File

@@ -157,7 +157,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
req = inet6_csk_search_req(sk, dh->dccph_dport,
&hdr->daddr, &hdr->saddr,
inet6_iif(skb));
if (req == NULL)
if (!req)
goto out;
/*
@@ -169,10 +169,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!between48(seq, dccp_rsk(req)->dreq_iss,
dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
reqsk_put(req);
goto out;
}
inet_csk_reqsk_queue_drop(sk, req);
reqsk_put(req);
goto out;
case DCCP_REQUESTING:
@@ -322,9 +324,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
&iph->daddr, inet6_iif(skb));
if (req != NULL)
return dccp_check_req(sk, skb, req);
if (req) {
nsk = dccp_check_req(sk, skb, req);
reqsk_put(req);
return nsk;
}
nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
&iph->saddr, dh->dccph_sport,
&iph->daddr, ntohs(dh->dccph_dport),

View File

@@ -161,33 +161,11 @@ out:
sock_put(sk);
}
/*
* Timer for listening sockets
*/
static void dccp_response_timer(struct sock *sk)
{
inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
DCCP_RTO_MAX);
}
static void dccp_keepalive_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
if (sk->sk_state == DCCP_LISTEN) {
dccp_response_timer(sk);
goto out;
}
out:
bh_unlock_sock(sk);
pr_err("dccp should not use a keepalive timer !\n");
sock_put(sk);
}