inet: add RCU protection to inet->opt

We lack proper synchronization to manipulate inet->opt ip_options

Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.

Another thread can change inet->opt pointer and free old one under us.

Use RCU to protect inet->opt (changed to inet->inet_opt).

Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.

We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet
2011-04-21 09:45:37 +00:00
gecommit door David S. Miller
bovenliggende 0a14842f5a
commit f6d8bd051c
17 gewijzigde bestanden met toevoegingen van 241 en 168 verwijderingen

Bestand weergeven

@@ -451,6 +451,11 @@ out:
}
static void opt_kfree_rcu(struct rcu_head *head)
{
kfree(container_of(head, struct ip_options_rcu, rcu));
}
/*
* Socket option code for IP. This is the end of the line after any
* TCP,UDP etc options on an IP socket.
@@ -497,13 +502,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
switch (optname) {
case IP_OPTIONS:
{
struct ip_options *opt = NULL;
struct ip_options_rcu *old, *opt = NULL;
if (optlen > 40)
goto e_inval;
err = ip_options_get_from_user(sock_net(sk), &opt,
optval, optlen);
if (err)
break;
old = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -512,17 +520,18 @@ static int do_ip_setsockopt(struct sock *sk, int level,
(TCPF_LISTEN | TCPF_CLOSE)) &&
inet->inet_daddr != LOOPBACK4_IPV6)) {
#endif
if (inet->opt)
icsk->icsk_ext_hdr_len -= inet->opt->optlen;
if (old)
icsk->icsk_ext_hdr_len -= old->opt.optlen;
if (opt)
icsk->icsk_ext_hdr_len += opt->optlen;
icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
}
#endif
}
opt = xchg(&inet->opt, opt);
kfree(opt);
rcu_assign_pointer(inet->inet_opt, opt);
if (old)
call_rcu(&old->rcu, opt_kfree_rcu);
break;
}
case IP_PKTINFO:
@@ -1081,12 +1090,16 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
struct ip_options * opt = (struct ip_options *)optbuf;
struct ip_options *opt = (struct ip_options *)optbuf;
struct ip_options_rcu *inet_opt;
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
opt->optlen = 0;
if (inet->opt)
memcpy(optbuf, inet->opt,
sizeof(struct ip_options)+
inet->opt->optlen);
if (inet_opt)
memcpy(optbuf, &inet_opt->opt,
sizeof(struct ip_options) +
inet_opt->opt.optlen);
release_sock(sk);
if (opt->optlen == 0)