ipvlan_l3s.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Copyright (c) 2014 Mahesh Bandewar <[email protected]>
  3. */
  4. #include "ipvlan.h"
  5. static unsigned int ipvlan_netid __read_mostly;
  6. struct ipvlan_netns {
  7. unsigned int ipvl_nf_hook_refcnt;
  8. };
  9. static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
  10. struct net_device *dev)
  11. {
  12. struct ipvl_addr *addr = NULL;
  13. struct ipvl_port *port;
  14. int addr_type;
  15. void *lyr3h;
  16. if (!dev || !netif_is_ipvlan_port(dev))
  17. goto out;
  18. port = ipvlan_port_get_rcu(dev);
  19. if (!port || port->mode != IPVLAN_MODE_L3S)
  20. goto out;
  21. lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
  22. if (!lyr3h)
  23. goto out;
  24. addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
  25. out:
  26. return addr;
  27. }
  28. static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev,
  29. struct sk_buff *skb, u16 proto)
  30. {
  31. struct ipvl_addr *addr;
  32. struct net_device *sdev;
  33. addr = ipvlan_skb_to_addr(skb, dev);
  34. if (!addr)
  35. goto out;
  36. sdev = addr->master->dev;
  37. switch (proto) {
  38. case AF_INET:
  39. {
  40. struct iphdr *ip4h = ip_hdr(skb);
  41. int err;
  42. err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
  43. ip4h->tos, sdev);
  44. if (unlikely(err))
  45. goto out;
  46. break;
  47. }
  48. #if IS_ENABLED(CONFIG_IPV6)
  49. case AF_INET6:
  50. {
  51. struct dst_entry *dst;
  52. struct ipv6hdr *ip6h = ipv6_hdr(skb);
  53. int flags = RT6_LOOKUP_F_HAS_SADDR;
  54. struct flowi6 fl6 = {
  55. .flowi6_iif = sdev->ifindex,
  56. .daddr = ip6h->daddr,
  57. .saddr = ip6h->saddr,
  58. .flowlabel = ip6_flowinfo(ip6h),
  59. .flowi6_mark = skb->mark,
  60. .flowi6_proto = ip6h->nexthdr,
  61. };
  62. skb_dst_drop(skb);
  63. dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
  64. skb, flags);
  65. skb_dst_set(skb, dst);
  66. break;
  67. }
  68. #endif
  69. default:
  70. break;
  71. }
  72. out:
  73. return skb;
  74. }
  75. static const struct l3mdev_ops ipvl_l3mdev_ops = {
  76. .l3mdev_l3_rcv = ipvlan_l3_rcv,
  77. };
  78. static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
  79. const struct nf_hook_state *state)
  80. {
  81. struct ipvl_addr *addr;
  82. unsigned int len;
  83. addr = ipvlan_skb_to_addr(skb, skb->dev);
  84. if (!addr)
  85. goto out;
  86. skb->dev = addr->master->dev;
  87. skb->skb_iif = skb->dev->ifindex;
  88. #if IS_ENABLED(CONFIG_IPV6)
  89. if (addr->atype == IPVL_IPV6)
  90. IP6CB(skb)->iif = skb->dev->ifindex;
  91. #endif
  92. len = skb->len + ETH_HLEN;
  93. ipvlan_count_rx(addr->master, len, true, false);
  94. out:
  95. return NF_ACCEPT;
  96. }
  97. static const struct nf_hook_ops ipvl_nfops[] = {
  98. {
  99. .hook = ipvlan_nf_input,
  100. .pf = NFPROTO_IPV4,
  101. .hooknum = NF_INET_LOCAL_IN,
  102. .priority = INT_MAX,
  103. },
  104. #if IS_ENABLED(CONFIG_IPV6)
  105. {
  106. .hook = ipvlan_nf_input,
  107. .pf = NFPROTO_IPV6,
  108. .hooknum = NF_INET_LOCAL_IN,
  109. .priority = INT_MAX,
  110. },
  111. #endif
  112. };
  113. static int ipvlan_register_nf_hook(struct net *net)
  114. {
  115. struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  116. int err = 0;
  117. if (!vnet->ipvl_nf_hook_refcnt) {
  118. err = nf_register_net_hooks(net, ipvl_nfops,
  119. ARRAY_SIZE(ipvl_nfops));
  120. if (!err)
  121. vnet->ipvl_nf_hook_refcnt = 1;
  122. } else {
  123. vnet->ipvl_nf_hook_refcnt++;
  124. }
  125. return err;
  126. }
  127. static void ipvlan_unregister_nf_hook(struct net *net)
  128. {
  129. struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  130. if (WARN_ON(!vnet->ipvl_nf_hook_refcnt))
  131. return;
  132. vnet->ipvl_nf_hook_refcnt--;
  133. if (!vnet->ipvl_nf_hook_refcnt)
  134. nf_unregister_net_hooks(net, ipvl_nfops,
  135. ARRAY_SIZE(ipvl_nfops));
  136. }
  137. void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet)
  138. {
  139. struct ipvlan_netns *old_vnet;
  140. ASSERT_RTNL();
  141. old_vnet = net_generic(oldnet, ipvlan_netid);
  142. if (!old_vnet->ipvl_nf_hook_refcnt)
  143. return;
  144. ipvlan_register_nf_hook(newnet);
  145. ipvlan_unregister_nf_hook(oldnet);
  146. }
  147. static void ipvlan_ns_exit(struct net *net)
  148. {
  149. struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid);
  150. if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) {
  151. vnet->ipvl_nf_hook_refcnt = 0;
  152. nf_unregister_net_hooks(net, ipvl_nfops,
  153. ARRAY_SIZE(ipvl_nfops));
  154. }
  155. }
  156. static struct pernet_operations ipvlan_net_ops = {
  157. .id = &ipvlan_netid,
  158. .size = sizeof(struct ipvlan_netns),
  159. .exit = ipvlan_ns_exit,
  160. };
  161. int ipvlan_l3s_init(void)
  162. {
  163. return register_pernet_subsys(&ipvlan_net_ops);
  164. }
  165. void ipvlan_l3s_cleanup(void)
  166. {
  167. unregister_pernet_subsys(&ipvlan_net_ops);
  168. }
  169. int ipvlan_l3s_register(struct ipvl_port *port)
  170. {
  171. struct net_device *dev = port->dev;
  172. int ret;
  173. ASSERT_RTNL();
  174. ret = ipvlan_register_nf_hook(read_pnet(&port->pnet));
  175. if (!ret) {
  176. dev->l3mdev_ops = &ipvl_l3mdev_ops;
  177. dev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
  178. }
  179. return ret;
  180. }
  181. void ipvlan_l3s_unregister(struct ipvl_port *port)
  182. {
  183. struct net_device *dev = port->dev;
  184. ASSERT_RTNL();
  185. dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
  186. ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
  187. dev->l3mdev_ops = NULL;
  188. }