net: Add sysctl to toggle early demux for tcp and udp

Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.

By disabling UDP demux, we see these slight gains on an ARM64 system-
782 -> 788Mbps unconnected single stream UDPv4
633 -> 654Mbps unconnected UDPv4 different sources

The performance impact can change based on CPU architecure and cache
sizes. There will not much difference seen if entire UDP hash table
is in cache.

Both sysctls are enabled by default to preserve existing behavior.

v1->v2: Change function pointer instead of adding conditional as
suggested by Stephen.

v2->v3: Read once in callers to avoid issues due to compiler
optimizations. Also update commit message with the tests.

v3->v4: Store and use read once result instead of querying pointer
again incorrectly.

v4->v5: Refactor to avoid errors due to compilation with IPV6={m,n}

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Suggested-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Tom Herbert <tom@herbertland.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
subashab@codeaurora.org
2017-03-23 13:34:16 -06:00
committed by David S. Miller
parent 8fa96e3bf6
commit dddb64bcb3
12 changed files with 103 additions and 14 deletions

View File

@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
static const struct net_protocol tcp_protocol = {
static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
static const struct net_protocol udp_protocol = {
static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
net->ipv4.sysctl_udp_early_demux = 1;
net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif

View File

@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux) {
ipprot->early_demux(skb);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}

View File

@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);

View File

@@ -24,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/protocol.h>
static int zero;
static int one = 1;
@@ -294,6 +295,58 @@ bad_key:
return ret;
}
static void proc_configure_early_demux(int enabled, int protocol)
{
struct net_protocol *ipprot;
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol *ip6prot;
#endif
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot)
ipprot->early_demux = enabled ? ipprot->early_demux_handler :
NULL;
#if IS_ENABLED(CONFIG_IPV6)
ip6prot = rcu_dereference(inet6_protos[protocol]);
if (ip6prot)
ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
NULL;
#endif
}
static int proc_tcp_early_demux(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && !ret) {
int enabled = init_net.ipv4.sysctl_tcp_early_demux;
proc_configure_early_demux(enabled, IPPROTO_TCP);
}
return ret;
}
static int proc_udp_early_demux(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (write && !ret) {
int enabled = init_net.ipv4.sysctl_udp_early_demux;
proc_configure_early_demux(enabled, IPPROTO_UDP);
}
return ret;
}
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -749,6 +802,20 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "udp_early_demux",
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_udp_early_demux
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_tcp_early_demux
},
{
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,