tcp/udp: Make early_demux back namespacified.
commit 11052589cf5c0bab3b4884d423d5f60c38fcf25d upstream. Commite21145a987
("ipv4: namespacify ip_early_demux sysctl knob") made it possible to enable/disable early_demux on a per-netns basis. Then, we introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for TCP/UDP in commitdddb64bcb3
("net: Add sysctl to toggle early demux for tcp and udp"). However, the .proc_handler() was wrong and actually disabled us from changing the behaviour in each netns. We can execute early_demux if net.ipv4.ip_early_demux is on and each proto .early_demux() handler is not NULL. When we toggle (tcp|udp)_early_demux, the change itself is saved in each netns variable, but the .early_demux() handler is a global variable, so the handler is switched based on the init_net's sysctl variable. Thus, netns (tcp|udp)_early_demux knobs have nothing to do with the logic. Whether we CAN execute proto .early_demux() is always decided by init_net's sysctl knob, and whether we DO it or not is by each netns ip_early_demux knob. This patch namespacifies (tcp|udp)_early_demux again. For now, the users of the .early_demux() handler are TCP and UDP only, and they are called directly to avoid retpoline. So, we can remove the .early_demux() handler from inet6?_protos and need not dereference them in ip6?_rcv_finish_core(). If another proto needs .early_demux(), we can restore it at that time. Fixes:dddb64bcb3
("net: Add sysctl to toggle early demux for tcp and udp") Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:

committed by
Greg Kroah-Hartman

parent
ea5f2fd464
commit
2bf33b5ea4
@@ -35,8 +35,6 @@
|
|||||||
|
|
||||||
/* This is used to register protocols. */
|
/* This is used to register protocols. */
|
||||||
struct net_protocol {
|
struct net_protocol {
|
||||||
int (*early_demux)(struct sk_buff *skb);
|
|
||||||
int (*early_demux_handler)(struct sk_buff *skb);
|
|
||||||
int (*handler)(struct sk_buff *skb);
|
int (*handler)(struct sk_buff *skb);
|
||||||
|
|
||||||
/* This returns an error if we weren't able to handle the error. */
|
/* This returns an error if we weren't able to handle the error. */
|
||||||
@@ -53,8 +51,6 @@ struct net_protocol {
|
|||||||
|
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
struct inet6_protocol {
|
struct inet6_protocol {
|
||||||
void (*early_demux)(struct sk_buff *skb);
|
|
||||||
void (*early_demux_handler)(struct sk_buff *skb);
|
|
||||||
int (*handler)(struct sk_buff *skb);
|
int (*handler)(struct sk_buff *skb);
|
||||||
|
|
||||||
/* This returns an error if we weren't able to handle the error. */
|
/* This returns an error if we weren't able to handle the error. */
|
||||||
|
@@ -934,7 +934,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
|
|||||||
|
|
||||||
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
|
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
|
||||||
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
|
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
|
||||||
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
|
void tcp_v6_early_demux(struct sk_buff *skb);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -176,6 +176,7 @@ INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
|
|||||||
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
||||||
struct udphdr *uh, struct sock *sk);
|
struct udphdr *uh, struct sock *sk);
|
||||||
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
|
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
|
||||||
|
void udp_v6_early_demux(struct sk_buff *skb);
|
||||||
|
|
||||||
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
|
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
|
||||||
netdev_features_t features, bool is_ipv6);
|
netdev_features_t features, bool is_ipv6);
|
||||||
|
@@ -1726,12 +1726,7 @@ static const struct net_protocol igmp_protocol = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* thinking of making this const? Don't.
|
static const struct net_protocol tcp_protocol = {
|
||||||
* early_demux can change based on sysctl.
|
|
||||||
*/
|
|
||||||
static struct net_protocol tcp_protocol = {
|
|
||||||
.early_demux = tcp_v4_early_demux,
|
|
||||||
.early_demux_handler = tcp_v4_early_demux,
|
|
||||||
.handler = tcp_v4_rcv,
|
.handler = tcp_v4_rcv,
|
||||||
.err_handler = tcp_v4_err,
|
.err_handler = tcp_v4_err,
|
||||||
.no_policy = 1,
|
.no_policy = 1,
|
||||||
@@ -1739,12 +1734,7 @@ static struct net_protocol tcp_protocol = {
|
|||||||
.icmp_strict_tag_validation = 1,
|
.icmp_strict_tag_validation = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* thinking of making this const? Don't.
|
static const struct net_protocol udp_protocol = {
|
||||||
* early_demux can change based on sysctl.
|
|
||||||
*/
|
|
||||||
static struct net_protocol udp_protocol = {
|
|
||||||
.early_demux = udp_v4_early_demux,
|
|
||||||
.early_demux_handler = udp_v4_early_demux,
|
|
||||||
.handler = udp_rcv,
|
.handler = udp_rcv,
|
||||||
.err_handler = udp_err,
|
.err_handler = udp_err,
|
||||||
.no_policy = 1,
|
.no_policy = 1,
|
||||||
|
@@ -309,14 +309,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
|
|||||||
ip_hdr(hint)->tos == iph->tos;
|
ip_hdr(hint)->tos == iph->tos;
|
||||||
}
|
}
|
||||||
|
|
||||||
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
|
int tcp_v4_early_demux(struct sk_buff *skb);
|
||||||
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
|
int udp_v4_early_demux(struct sk_buff *skb);
|
||||||
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
|
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
|
||||||
struct sk_buff *skb, struct net_device *dev,
|
struct sk_buff *skb, struct net_device *dev,
|
||||||
const struct sk_buff *hint)
|
const struct sk_buff *hint)
|
||||||
{
|
{
|
||||||
const struct iphdr *iph = ip_hdr(skb);
|
const struct iphdr *iph = ip_hdr(skb);
|
||||||
int (*edemux)(struct sk_buff *skb);
|
|
||||||
struct rtable *rt;
|
struct rtable *rt;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
@@ -327,21 +326,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
|
|||||||
goto drop_error;
|
goto drop_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (net->ipv4.sysctl_ip_early_demux &&
|
if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
|
||||||
!skb_dst(skb) &&
|
!skb_dst(skb) &&
|
||||||
!skb->sk &&
|
!skb->sk &&
|
||||||
!ip_is_fragment(iph)) {
|
!ip_is_fragment(iph)) {
|
||||||
const struct net_protocol *ipprot;
|
switch (iph->protocol) {
|
||||||
int protocol = iph->protocol;
|
case IPPROTO_TCP:
|
||||||
|
if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
|
||||||
|
tcp_v4_early_demux(skb);
|
||||||
|
|
||||||
ipprot = rcu_dereference(inet_protos[protocol]);
|
/* must reload iph, skb->head might have changed */
|
||||||
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
|
iph = ip_hdr(skb);
|
||||||
err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
|
}
|
||||||
udp_v4_early_demux, skb);
|
break;
|
||||||
if (unlikely(err))
|
case IPPROTO_UDP:
|
||||||
goto drop_error;
|
if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
|
||||||
/* must reload iph, skb->head might have changed */
|
err = udp_v4_early_demux(skb);
|
||||||
iph = ip_hdr(skb);
|
if (unlikely(err))
|
||||||
|
goto drop_error;
|
||||||
|
|
||||||
|
/* must reload iph, skb->head might have changed */
|
||||||
|
iph = ip_hdr(skb);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -361,61 +361,6 @@ bad_key:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void proc_configure_early_demux(int enabled, int protocol)
|
|
||||||
{
|
|
||||||
struct net_protocol *ipprot;
|
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
|
||||||
struct inet6_protocol *ip6prot;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
|
|
||||||
ipprot = rcu_dereference(inet_protos[protocol]);
|
|
||||||
if (ipprot)
|
|
||||||
ipprot->early_demux = enabled ? ipprot->early_demux_handler :
|
|
||||||
NULL;
|
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
|
||||||
ip6prot = rcu_dereference(inet6_protos[protocol]);
|
|
||||||
if (ip6prot)
|
|
||||||
ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
|
|
||||||
NULL;
|
|
||||||
#endif
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int proc_tcp_early_demux(struct ctl_table *table, int write,
|
|
||||||
void *buffer, size_t *lenp, loff_t *ppos)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
|
||||||
|
|
||||||
if (write && !ret) {
|
|
||||||
int enabled = init_net.ipv4.sysctl_tcp_early_demux;
|
|
||||||
|
|
||||||
proc_configure_early_demux(enabled, IPPROTO_TCP);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int proc_udp_early_demux(struct ctl_table *table, int write,
|
|
||||||
void *buffer, size_t *lenp, loff_t *ppos)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
|
||||||
|
|
||||||
if (write && !ret) {
|
|
||||||
int enabled = init_net.ipv4.sysctl_udp_early_demux;
|
|
||||||
|
|
||||||
proc_configure_early_demux(enabled, IPPROTO_UDP);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
|
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
|
||||||
int write, void *buffer,
|
int write, void *buffer,
|
||||||
size_t *lenp, loff_t *ppos)
|
size_t *lenp, loff_t *ppos)
|
||||||
@@ -685,14 +630,14 @@ static struct ctl_table ipv4_net_table[] = {
|
|||||||
.data = &init_net.ipv4.sysctl_udp_early_demux,
|
.data = &init_net.ipv4.sysctl_udp_early_demux,
|
||||||
.maxlen = sizeof(int),
|
.maxlen = sizeof(int),
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_udp_early_demux
|
.proc_handler = proc_douintvec_minmax,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.procname = "tcp_early_demux",
|
.procname = "tcp_early_demux",
|
||||||
.data = &init_net.ipv4.sysctl_tcp_early_demux,
|
.data = &init_net.ipv4.sysctl_tcp_early_demux,
|
||||||
.maxlen = sizeof(int),
|
.maxlen = sizeof(int),
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_tcp_early_demux
|
.proc_handler = proc_douintvec_minmax,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.procname = "nexthop_compat_mode",
|
.procname = "nexthop_compat_mode",
|
||||||
|
@@ -44,21 +44,25 @@
|
|||||||
#include <net/inet_ecn.h>
|
#include <net/inet_ecn.h>
|
||||||
#include <net/dst_metadata.h>
|
#include <net/dst_metadata.h>
|
||||||
|
|
||||||
INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
|
void udp_v6_early_demux(struct sk_buff *);
|
||||||
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
|
void tcp_v6_early_demux(struct sk_buff *);
|
||||||
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
|
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
|
||||||
struct sk_buff *skb)
|
struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
void (*edemux)(struct sk_buff *skb);
|
if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
|
||||||
|
!skb_dst(skb) && !skb->sk) {
|
||||||
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
|
switch (ipv6_hdr(skb)->nexthdr) {
|
||||||
const struct inet6_protocol *ipprot;
|
case IPPROTO_TCP:
|
||||||
|
if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
|
||||||
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
|
tcp_v6_early_demux(skb);
|
||||||
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
|
break;
|
||||||
INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
|
case IPPROTO_UDP:
|
||||||
udp_v6_early_demux, skb);
|
if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
|
||||||
|
udp_v6_early_demux(skb);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!skb_valid_dst(skb))
|
if (!skb_valid_dst(skb))
|
||||||
ip6_route_input(skb);
|
ip6_route_input(skb);
|
||||||
}
|
}
|
||||||
|
@@ -1818,7 +1818,7 @@ do_time_wait:
|
|||||||
goto discard_it;
|
goto discard_it;
|
||||||
}
|
}
|
||||||
|
|
||||||
INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
|
void tcp_v6_early_demux(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
const struct ipv6hdr *hdr;
|
const struct ipv6hdr *hdr;
|
||||||
const struct tcphdr *th;
|
const struct tcphdr *th;
|
||||||
@@ -2169,12 +2169,7 @@ struct proto tcpv6_prot = {
|
|||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(tcpv6_prot);
|
EXPORT_SYMBOL_GPL(tcpv6_prot);
|
||||||
|
|
||||||
/* thinking of making this const? Don't.
|
static const struct inet6_protocol tcpv6_protocol = {
|
||||||
* early_demux can change based on sysctl.
|
|
||||||
*/
|
|
||||||
static struct inet6_protocol tcpv6_protocol = {
|
|
||||||
.early_demux = tcp_v6_early_demux,
|
|
||||||
.early_demux_handler = tcp_v6_early_demux,
|
|
||||||
.handler = tcp_v6_rcv,
|
.handler = tcp_v6_rcv,
|
||||||
.err_handler = tcp_v6_err,
|
.err_handler = tcp_v6_err,
|
||||||
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
|
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
|
||||||
|
@@ -1027,7 +1027,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
|
void udp_v6_early_demux(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct net *net = dev_net(skb->dev);
|
struct net *net = dev_net(skb->dev);
|
||||||
const struct udphdr *uh;
|
const struct udphdr *uh;
|
||||||
@@ -1640,12 +1640,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
|
|||||||
return ipv6_getsockopt(sk, level, optname, optval, optlen);
|
return ipv6_getsockopt(sk, level, optname, optval, optlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* thinking of making this const? Don't.
|
static const struct inet6_protocol udpv6_protocol = {
|
||||||
* early_demux can change based on sysctl.
|
|
||||||
*/
|
|
||||||
static struct inet6_protocol udpv6_protocol = {
|
|
||||||
.early_demux = udp_v6_early_demux,
|
|
||||||
.early_demux_handler = udp_v6_early_demux,
|
|
||||||
.handler = udpv6_rcv,
|
.handler = udpv6_rcv,
|
||||||
.err_handler = udpv6_err,
|
.err_handler = udpv6_err,
|
||||||
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
|
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
|
||||||
|
Reference in New Issue
Block a user