nat6to4.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * This code is taken from the Android Open Source Project and the author
  4. * (Maciej Żenczykowski) has gave permission to relicense it under the
  5. * GPLv2. Therefore this program is free software;
  6. * You can redistribute it and/or modify it under the terms of the GNU
  7. * General Public License version 2 as published by the Free Software
  8. * Foundation
  9. * The original headers, including the original license headers, are
  10. * included below for completeness.
  11. *
  12. * Copyright (C) 2019 The Android Open Source Project
  13. *
  14. * Licensed under the Apache License, Version 2.0 (the "License");
  15. * you may not use this file except in compliance with the License.
  16. * You may obtain a copy of the License at
  17. *
  18. * http://www.apache.org/licenses/LICENSE-2.0
  19. *
  20. * Unless required by applicable law or agreed to in writing, software
  21. * distributed under the License is distributed on an "AS IS" BASIS,
  22. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23. * See the License for the specific language governing permissions and
  24. * limitations under the License.
  25. */
  26. #include <linux/bpf.h>
  27. #include <linux/if.h>
  28. #include <linux/if_ether.h>
  29. #include <linux/if_packet.h>
  30. #include <linux/in.h>
  31. #include <linux/in6.h>
  32. #include <linux/ip.h>
  33. #include <linux/ipv6.h>
  34. #include <linux/pkt_cls.h>
  35. #include <linux/swab.h>
  36. #include <stdbool.h>
  37. #include <stdint.h>
  38. #include <linux/udp.h>
  39. #include <bpf/bpf_helpers.h>
  40. #include <bpf/bpf_endian.h>
  41. #define IP_DF 0x4000 // Flag: "Don't Fragment"
  42. SEC("schedcls/ingress6/nat_6")
  43. int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
  44. {
  45. const int l2_header_size = sizeof(struct ethhdr);
  46. void *data = (void *)(long)skb->data;
  47. const void *data_end = (void *)(long)skb->data_end;
  48. const struct ethhdr * const eth = data; // used iff is_ethernet
  49. const struct ipv6hdr * const ip6 = (void *)(eth + 1);
  50. // Require ethernet dst mac address to be our unicast address.
  51. if (skb->pkt_type != PACKET_HOST)
  52. return TC_ACT_OK;
  53. // Must be meta-ethernet IPv6 frame
  54. if (skb->protocol != bpf_htons(ETH_P_IPV6))
  55. return TC_ACT_OK;
  56. // Must have (ethernet and) ipv6 header
  57. if (data + l2_header_size + sizeof(*ip6) > data_end)
  58. return TC_ACT_OK;
  59. // Ethertype - if present - must be IPv6
  60. if (eth->h_proto != bpf_htons(ETH_P_IPV6))
  61. return TC_ACT_OK;
  62. // IP version must be 6
  63. if (ip6->version != 6)
  64. return TC_ACT_OK;
  65. // Maximum IPv6 payload length that can be translated to IPv4
  66. if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
  67. return TC_ACT_OK;
  68. switch (ip6->nexthdr) {
  69. case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
  70. case IPPROTO_UDP: // address means there is no need to update their checksums.
  71. case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers,
  72. case IPPROTO_ESP: // since there is never a checksum to update.
  73. break;
  74. default: // do not know how to handle anything else
  75. return TC_ACT_OK;
  76. }
  77. struct ethhdr eth2; // used iff is_ethernet
  78. eth2 = *eth; // Copy over the ethernet header (src/dst mac)
  79. eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype
  80. struct iphdr ip = {
  81. .version = 4, // u4
  82. .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4
  83. .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8
  84. .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16
  85. .id = 0, // u16
  86. .frag_off = bpf_htons(IP_DF), // u16
  87. .ttl = ip6->hop_limit, // u8
  88. .protocol = ip6->nexthdr, // u8
  89. .check = 0, // u16
  90. .saddr = 0x0201a8c0, // u32
  91. .daddr = 0x0101a8c0, // u32
  92. };
  93. // Calculate the IPv4 one's complement checksum of the IPv4 header.
  94. __wsum sum4 = 0;
  95. for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
  96. sum4 += ((__u16 *)&ip)[i];
  97. // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
  98. sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
  99. sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
  100. ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF
  101. // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
  102. __wsum sum6 = 0;
  103. // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
  104. for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
  105. sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation
  106. // Note that there is no L4 checksum update: we are relying on the checksum neutrality
  107. // of the ipv6 address chosen by netd's ClatdController.
  108. // Packet mutations begin - point of no return, but if this first modification fails
  109. // the packet is probably still pristine, so let clatd handle it.
  110. if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
  111. return TC_ACT_OK;
  112. bpf_csum_update(skb, sum6);
  113. data = (void *)(long)skb->data;
  114. data_end = (void *)(long)skb->data_end;
  115. if (data + l2_header_size + sizeof(struct iphdr) > data_end)
  116. return TC_ACT_SHOT;
  117. struct ethhdr *new_eth = data;
  118. // Copy over the updated ethernet header
  119. *new_eth = eth2;
  120. // Copy over the new ipv4 header.
  121. *(struct iphdr *)(new_eth + 1) = ip;
  122. return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
  123. }
  124. SEC("schedcls/egress4/snat4")
  125. int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
  126. {
  127. const int l2_header_size = sizeof(struct ethhdr);
  128. void *data = (void *)(long)skb->data;
  129. const void *data_end = (void *)(long)skb->data_end;
  130. const struct ethhdr *const eth = data; // used iff is_ethernet
  131. const struct iphdr *const ip4 = (void *)(eth + 1);
  132. // Must be meta-ethernet IPv4 frame
  133. if (skb->protocol != bpf_htons(ETH_P_IP))
  134. return TC_ACT_OK;
  135. // Must have ipv4 header
  136. if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
  137. return TC_ACT_OK;
  138. // Ethertype - if present - must be IPv4
  139. if (eth->h_proto != bpf_htons(ETH_P_IP))
  140. return TC_ACT_OK;
  141. // IP version must be 4
  142. if (ip4->version != 4)
  143. return TC_ACT_OK;
  144. // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
  145. if (ip4->ihl != 5)
  146. return TC_ACT_OK;
  147. // Maximum IPv6 payload length that can be translated to IPv4
  148. if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
  149. return TC_ACT_OK;
  150. // Calculate the IPv4 one's complement checksum of the IPv4 header.
  151. __wsum sum4 = 0;
  152. for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
  153. sum4 += ((__u16 *)ip4)[i];
  154. // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
  155. sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
  156. sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
  157. // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
  158. if (sum4 != 0xFFFF)
  159. return TC_ACT_OK;
  160. // Minimum IPv4 total length is the size of the header
  161. if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
  162. return TC_ACT_OK;
  163. // We are incapable of dealing with IPv4 fragments
  164. if (ip4->frag_off & ~bpf_htons(IP_DF))
  165. return TC_ACT_OK;
  166. switch (ip4->protocol) {
  167. case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
  168. case IPPROTO_GRE: // address means there is no need to update their checksums.
  169. case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers,
  170. break; // since there is never a checksum to update.
  171. case IPPROTO_UDP: // See above comment, but must also have UDP header...
  172. if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
  173. return TC_ACT_OK;
  174. const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
  175. // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
  176. // checksum. Otherwise the network or more likely the NAT64 gateway might
  177. // drop the packet because in most cases IPv6/UDP packets with a zero checksum
  178. // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff()
  179. if (!uh->check)
  180. return TC_ACT_OK;
  181. break;
  182. default: // do not know how to handle anything else
  183. return TC_ACT_OK;
  184. }
  185. struct ethhdr eth2; // used iff is_ethernet
  186. eth2 = *eth; // Copy over the ethernet header (src/dst mac)
  187. eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype
  188. struct ipv6hdr ip6 = {
  189. .version = 6, // __u8:4
  190. .priority = ip4->tos >> 4, // __u8:4
  191. .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3]
  192. .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16
  193. .nexthdr = ip4->protocol, // __u8
  194. .hop_limit = ip4->ttl, // __u8
  195. };
  196. ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
  197. ip6.saddr.in6_u.u6_addr32[1] = 0;
  198. ip6.saddr.in6_u.u6_addr32[2] = 0;
  199. ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
  200. ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
  201. ip6.daddr.in6_u.u6_addr32[1] = 0;
  202. ip6.daddr.in6_u.u6_addr32[2] = 0;
  203. ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
  204. // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
  205. __wsum sum6 = 0;
  206. // We'll end up with a non-zero sum due to ip6.version == 6
  207. for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
  208. sum6 += ((__u16 *)&ip6)[i];
  209. // Packet mutations begin - point of no return, but if this first modification fails
  210. // the packet is probably still pristine, so let clatd handle it.
  211. if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
  212. return TC_ACT_OK;
  213. // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
  214. // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
  215. // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
  216. // However, we've already verified the ipv4 checksum is correct and thus 0.
  217. // Thus we only need to add the ipv6 header's sum.
  218. //
  219. // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
  220. // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details).
  221. bpf_csum_update(skb, sum6);
  222. // bpf_skb_change_proto() invalidates all pointers - reload them.
  223. data = (void *)(long)skb->data;
  224. data_end = (void *)(long)skb->data_end;
  225. // I cannot think of any valid way for this error condition to trigger, however I do
  226. // believe the explicit check is required to keep the in kernel ebpf verifier happy.
  227. if (data + l2_header_size + sizeof(ip6) > data_end)
  228. return TC_ACT_SHOT;
  229. struct ethhdr *new_eth = data;
  230. // Copy over the updated ethernet header
  231. *new_eth = eth2;
  232. // Copy over the new ipv4 header.
  233. *(struct ipv6hdr *)(new_eth + 1) = ip6;
  234. return TC_ACT_OK;
  235. }
  236. char _license[] SEC("license") = ("GPL");