inet_ecn.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _INET_ECN_H_
  3. #define _INET_ECN_H_
  4. #include <linux/ip.h>
  5. #include <linux/skbuff.h>
  6. #include <linux/if_vlan.h>
  7. #include <net/inet_sock.h>
  8. #include <net/dsfield.h>
  9. #include <net/checksum.h>
  10. enum {
  11. INET_ECN_NOT_ECT = 0,
  12. INET_ECN_ECT_1 = 1,
  13. INET_ECN_ECT_0 = 2,
  14. INET_ECN_CE = 3,
  15. INET_ECN_MASK = 3,
  16. };
  17. extern int sysctl_tunnel_ecn_log;
  18. static inline int INET_ECN_is_ce(__u8 dsfield)
  19. {
  20. return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
  21. }
  22. static inline int INET_ECN_is_not_ect(__u8 dsfield)
  23. {
  24. return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
  25. }
  26. static inline int INET_ECN_is_capable(__u8 dsfield)
  27. {
  28. return dsfield & INET_ECN_ECT_0;
  29. }
  30. /*
  31. * RFC 3168 9.1.1
  32. * The full-functionality option for ECN encapsulation is to copy the
  33. * ECN codepoint of the inside header to the outside header on
  34. * encapsulation if the inside header is not-ECT or ECT, and to set the
  35. * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
  36. * the inside header is CE.
  37. */
  38. static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
  39. {
  40. outer &= ~INET_ECN_MASK;
  41. outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
  42. INET_ECN_ECT_0;
  43. return outer;
  44. }
  45. static inline void INET_ECN_xmit(struct sock *sk)
  46. {
  47. inet_sk(sk)->tos |= INET_ECN_ECT_0;
  48. if (inet6_sk(sk) != NULL)
  49. inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
  50. }
  51. static inline void INET_ECN_dontxmit(struct sock *sk)
  52. {
  53. inet_sk(sk)->tos &= ~INET_ECN_MASK;
  54. if (inet6_sk(sk) != NULL)
  55. inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
  56. }
  57. #define IP6_ECN_flow_init(label) do { \
  58. (label) &= ~htonl(INET_ECN_MASK << 20); \
  59. } while (0)
  60. #define IP6_ECN_flow_xmit(sk, label) do { \
  61. if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \
  62. (label) |= htonl(INET_ECN_ECT_0 << 20); \
  63. } while (0)
  64. static inline int IP_ECN_set_ce(struct iphdr *iph)
  65. {
  66. u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
  67. __be16 check_add;
  68. /*
  69. * After the last operation we have (in binary):
  70. * INET_ECN_NOT_ECT => 01
  71. * INET_ECN_ECT_1 => 10
  72. * INET_ECN_ECT_0 => 11
  73. * INET_ECN_CE => 00
  74. */
  75. if (!(ecn & 2))
  76. return !ecn;
  77. /*
  78. * The following gives us:
  79. * INET_ECN_ECT_1 => check += htons(0xFFFD)
  80. * INET_ECN_ECT_0 => check += htons(0xFFFE)
  81. */
  82. check_add = (__force __be16)((__force u16)htons(0xFFFB) +
  83. (__force u16)htons(ecn));
  84. iph->check = csum16_add(iph->check, check_add);
  85. iph->tos |= INET_ECN_CE;
  86. return 1;
  87. }
  88. static inline int IP_ECN_set_ect1(struct iphdr *iph)
  89. {
  90. if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
  91. return 0;
  92. iph->check = csum16_add(iph->check, htons(0x1));
  93. iph->tos ^= INET_ECN_MASK;
  94. return 1;
  95. }
  96. static inline void IP_ECN_clear(struct iphdr *iph)
  97. {
  98. iph->tos &= ~INET_ECN_MASK;
  99. }
  100. static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
  101. {
  102. dscp &= ~INET_ECN_MASK;
  103. ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
  104. }
  105. struct ipv6hdr;
  106. /* Note:
  107. * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
  108. * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
  109. * In IPv6 case, no checksum compensates the change in IPv6 header,
  110. * so we have to update skb->csum.
  111. */
  112. static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
  113. {
  114. __be32 from, to;
  115. if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
  116. return 0;
  117. from = *(__be32 *)iph;
  118. to = from | htonl(INET_ECN_CE << 20);
  119. *(__be32 *)iph = to;
  120. if (skb->ip_summed == CHECKSUM_COMPLETE)
  121. skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
  122. (__force __wsum)to);
  123. return 1;
  124. }
  125. static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
  126. {
  127. __be32 from, to;
  128. if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
  129. return 0;
  130. from = *(__be32 *)iph;
  131. to = from ^ htonl(INET_ECN_MASK << 20);
  132. *(__be32 *)iph = to;
  133. if (skb->ip_summed == CHECKSUM_COMPLETE)
  134. skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
  135. (__force __wsum)to);
  136. return 1;
  137. }
  138. static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
  139. {
  140. dscp &= ~INET_ECN_MASK;
  141. ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
  142. }
  143. static inline int INET_ECN_set_ce(struct sk_buff *skb)
  144. {
  145. switch (skb_protocol(skb, true)) {
  146. case cpu_to_be16(ETH_P_IP):
  147. if (skb_network_header(skb) + sizeof(struct iphdr) <=
  148. skb_tail_pointer(skb))
  149. return IP_ECN_set_ce(ip_hdr(skb));
  150. break;
  151. case cpu_to_be16(ETH_P_IPV6):
  152. if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
  153. skb_tail_pointer(skb))
  154. return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
  155. break;
  156. }
  157. return 0;
  158. }
  159. static inline int skb_get_dsfield(struct sk_buff *skb)
  160. {
  161. switch (skb_protocol(skb, true)) {
  162. case cpu_to_be16(ETH_P_IP):
  163. if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
  164. break;
  165. return ipv4_get_dsfield(ip_hdr(skb));
  166. case cpu_to_be16(ETH_P_IPV6):
  167. if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
  168. break;
  169. return ipv6_get_dsfield(ipv6_hdr(skb));
  170. }
  171. return -1;
  172. }
  173. static inline int INET_ECN_set_ect1(struct sk_buff *skb)
  174. {
  175. switch (skb_protocol(skb, true)) {
  176. case cpu_to_be16(ETH_P_IP):
  177. if (skb_network_header(skb) + sizeof(struct iphdr) <=
  178. skb_tail_pointer(skb))
  179. return IP_ECN_set_ect1(ip_hdr(skb));
  180. break;
  181. case cpu_to_be16(ETH_P_IPV6):
  182. if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
  183. skb_tail_pointer(skb))
  184. return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
  185. break;
  186. }
  187. return 0;
  188. }
  189. /*
  190. * RFC 6040 4.2
  191. * To decapsulate the inner header at the tunnel egress, a compliant
  192. * tunnel egress MUST set the outgoing ECN field to the codepoint at the
  193. * intersection of the appropriate arriving inner header (row) and outer
  194. * header (column) in Figure 4
  195. *
  196. * +---------+------------------------------------------------+
  197. * |Arriving | Arriving Outer Header |
  198. * | Inner +---------+------------+------------+------------+
  199. * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
  200. * +---------+---------+------------+------------+------------+
  201. * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
  202. * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE |
  203. * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE |
  204. * | CE | CE | CE | CE(!!!)| CE |
  205. * +---------+---------+------------+------------+------------+
  206. *
  207. * Figure 4: New IP in IP Decapsulation Behaviour
  208. *
  209. * returns 0 on success
  210. * 1 if something is broken and should be logged (!!! above)
  211. * 2 if packet should be dropped
  212. */
  213. static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
  214. {
  215. if (INET_ECN_is_not_ect(inner)) {
  216. switch (outer & INET_ECN_MASK) {
  217. case INET_ECN_NOT_ECT:
  218. return 0;
  219. case INET_ECN_ECT_0:
  220. case INET_ECN_ECT_1:
  221. return 1;
  222. case INET_ECN_CE:
  223. return 2;
  224. }
  225. }
  226. *set_ce = INET_ECN_is_ce(outer);
  227. return 0;
  228. }
  229. static inline int INET_ECN_decapsulate(struct sk_buff *skb,
  230. __u8 outer, __u8 inner)
  231. {
  232. bool set_ce = false;
  233. int rc;
  234. rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
  235. if (!rc) {
  236. if (set_ce)
  237. INET_ECN_set_ce(skb);
  238. else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
  239. INET_ECN_set_ect1(skb);
  240. }
  241. return rc;
  242. }
  243. static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
  244. struct sk_buff *skb)
  245. {
  246. __u8 inner;
  247. switch (skb_protocol(skb, true)) {
  248. case htons(ETH_P_IP):
  249. inner = ip_hdr(skb)->tos;
  250. break;
  251. case htons(ETH_P_IPV6):
  252. inner = ipv6_get_dsfield(ipv6_hdr(skb));
  253. break;
  254. default:
  255. return 0;
  256. }
  257. return INET_ECN_decapsulate(skb, oiph->tos, inner);
  258. }
  259. static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
  260. struct sk_buff *skb)
  261. {
  262. __u8 inner;
  263. switch (skb_protocol(skb, true)) {
  264. case htons(ETH_P_IP):
  265. inner = ip_hdr(skb)->tos;
  266. break;
  267. case htons(ETH_P_IPV6):
  268. inner = ipv6_get_dsfield(ipv6_hdr(skb));
  269. break;
  270. default:
  271. return 0;
  272. }
  273. return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
  274. }
  275. #endif