ipv4: tcp: remove per net tcp_sock

tcp_v4_send_reset() and tcp_v4_send_ack() use a single socket
per network namespace.

This leads to bad behavior on multiqueue NICS, because many cpus
contend for the socket lock and once socket lock is acquired, extra
false sharing on various socket fields slow down the operations.

To better resist to attacks, we use a percpu socket. Each cpu can
run without contention, using appropriate memory (local node)

Additional features :

1) We also mirror the queue_mapping of the incoming skb, so that
answers use the same queue if possible.

2) Setting SOCK_USE_WRITE_QUEUE socket flag speedup sock_wfree()

3) We now limit the number of in-flight RST/ACK [1] packets
per cpu, instead of per namespace, and we honor the sysctl_wmem_default
limit dynamically. (Prior to this patch, sysctl_wmem_default value was
copied at boot time, so any further change would not affect tcp_sock
limit)

[1] These packets are only generated when no socket was matched for
the incoming packet.

Reported-by: Bill Sommerfeld <wsommerfeld@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet
2012-07-19 07:34:03 +00:00
committed by David S. Miller
parent aee06da672
commit be9f4a44e7
4 changed files with 36 additions and 25 deletions

View File

@@ -688,7 +688,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -771,7 +771,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -2624,13 +2624,11 @@ EXPORT_SYMBOL(tcp_prot);
static int __net_init tcp_sk_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv4.tcp_sock,
PF_INET, SOCK_RAW, IPPROTO_TCP, net);
return 0;
}
static void __net_exit tcp_sk_exit(struct net *net)
{
inet_ctl_sock_destroy(net->ipv4.tcp_sock);
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)