ipv6: Fix dst_entry refcnt bugs in ip6_tunnel

Problems in the current dst_entry cache in the ip6_tunnel:

1. ip6_tnl_dst_set is racy.  There is no lock to protect it:
   - One major problem is that the dst refcnt gets messed up. F.e.
     the same dst_cache can be released multiple times and then
     triggering the infamous dst refcnt < 0 warning message.
   - Another issue is the inconsistency between dst_cache and
     dst_cookie.

   It can be reproduced by adding and removing the ip6gre tunnel
   while running a super_netperf TCP_CRR test.

2. ip6_tnl_dst_get does not take the dst refcnt before returning
   the dst.

This patch:
1. Create a percpu dst_entry cache in ip6_tnl
2. Use a spinlock to protect the dst_cache operations
3. ip6_tnl_dst_get always takes the dst refcnt before returning

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Martin KaFai Lau
2015-09-15 14:30:07 -07:00
committed by David S. Miller
parent f230d1e891
commit cdf3464e6c
3 changed files with 122 additions and 47 deletions

View File

@@ -637,17 +637,17 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
dst = ip6_tnl_dst_get(tunnel);
if (!dst) {
ndst = ip6_route_output(net, NULL, fl6);
dst = ip6_route_output(net, NULL, fl6);
if (ndst->error)
if (dst->error)
goto tx_err_link_failure;
ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
if (IS_ERR(ndst)) {
err = PTR_ERR(ndst);
ndst = NULL;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
goto tx_err_link_failure;
}
dst = ndst;
ndst = dst;
}
tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb = new_skb;
}
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
} else {
skb_dst_set_noref(skb, dst);
}
if (!fl6->flowi6_mark && ndst)
ip6_tnl_dst_set(tunnel, ndst);
skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
if (encap_limit >= 0) {
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_set_inner_protocol(skb, protocol);
ip6tunnel_xmit(NULL, skb, dev);
if (ndst)
ip6_tnl_dst_set(tunnel, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
dst_release(ndst);
dst_release(dst);
return err;
}
@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
static void ip6gre_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
ip6_tnl_dst_destroy(t);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1248,6 +1246,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
int ret;
tunnel = netdev_priv(dev);
@@ -1259,6 +1258,13 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
ret = ip6_tnl_dst_init(tunnel);
if (ret) {
free_percpu(dev->tstats);
dev->tstats = NULL;
return ret;
}
return 0;
}