hbm_kern.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /* SPDX-License-Identifier: GPL-2.0
  2. *
  3. * Copyright (c) 2019 Facebook
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of version 2 of the GNU General Public
  7. * License as published by the Free Software Foundation.
  8. *
  9. * Include file for sample Host Bandwidth Manager (HBM) BPF programs
  10. */
  11. #define KBUILD_MODNAME "foo"
  12. #include <uapi/linux/bpf.h>
  13. #include <uapi/linux/if_ether.h>
  14. #include <uapi/linux/if_packet.h>
  15. #include <uapi/linux/ip.h>
  16. #include <uapi/linux/ipv6.h>
  17. #include <uapi/linux/in.h>
  18. #include <uapi/linux/tcp.h>
  19. #include <uapi/linux/filter.h>
  20. #include <uapi/linux/pkt_cls.h>
  21. #include <net/ipv6.h>
  22. #include <net/inet_ecn.h>
  23. #include <bpf/bpf_endian.h>
  24. #include <bpf/bpf_helpers.h>
  25. #include "hbm.h"
  26. #define DROP_PKT 0
  27. #define ALLOW_PKT 1
  28. #define TCP_ECN_OK 1
  29. #define CWR 2
  30. #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
  31. #undef bpf_printk
  32. #define bpf_printk(fmt, ...)
  33. #endif
  34. #define INITIAL_CREDIT_PACKETS 100
  35. #define MAX_BYTES_PER_PACKET 1500
  36. #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
  37. #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
  38. #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
  39. #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
  40. #define LARGE_PKT_THRESH 120
  41. #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
  42. #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
  43. // Time base accounting for fq's EDT
  44. #define BURST_SIZE_NS 100000 // 100us
  45. #define MARK_THRESH_NS 50000 // 50us
  46. #define DROP_THRESH_NS 500000 // 500us
  47. // Reserve 20us of queuing for small packets (less than 120 bytes)
  48. #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
  49. #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
  50. // rate in bytes per ns << 20
  51. #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
  52. #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
  53. #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
  54. struct {
  55. __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
  56. __type(key, struct bpf_cgroup_storage_key);
  57. __type(value, struct hbm_vqueue);
  58. } queue_state SEC(".maps");
  59. struct {
  60. __uint(type, BPF_MAP_TYPE_ARRAY);
  61. __uint(max_entries, 1);
  62. __type(key, u32);
  63. __type(value, struct hbm_queue_stats);
  64. } queue_stats SEC(".maps");
  65. struct hbm_pkt_info {
  66. int cwnd;
  67. int rtt;
  68. int packets_out;
  69. bool is_ip;
  70. bool is_tcp;
  71. short ecn;
  72. };
  73. static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
  74. {
  75. struct bpf_sock *sk;
  76. struct bpf_tcp_sock *tp;
  77. sk = skb->sk;
  78. if (sk) {
  79. sk = bpf_sk_fullsock(sk);
  80. if (sk) {
  81. if (sk->protocol == IPPROTO_TCP) {
  82. tp = bpf_tcp_sock(sk);
  83. if (tp) {
  84. pkti->cwnd = tp->snd_cwnd;
  85. pkti->rtt = tp->srtt_us >> 3;
  86. pkti->packets_out = tp->packets_out;
  87. return 0;
  88. }
  89. }
  90. }
  91. }
  92. pkti->cwnd = 0;
  93. pkti->rtt = 0;
  94. pkti->packets_out = 0;
  95. return 1;
  96. }
  97. static void hbm_get_pkt_info(struct __sk_buff *skb,
  98. struct hbm_pkt_info *pkti)
  99. {
  100. struct iphdr iph;
  101. struct ipv6hdr *ip6h;
  102. pkti->cwnd = 0;
  103. pkti->rtt = 0;
  104. bpf_skb_load_bytes(skb, 0, &iph, 12);
  105. if (iph.version == 6) {
  106. ip6h = (struct ipv6hdr *)&iph;
  107. pkti->is_ip = true;
  108. pkti->is_tcp = (ip6h->nexthdr == 6);
  109. pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
  110. } else if (iph.version == 4) {
  111. pkti->is_ip = true;
  112. pkti->is_tcp = (iph.protocol == 6);
  113. pkti->ecn = iph.tos & INET_ECN_MASK;
  114. } else {
  115. pkti->is_ip = false;
  116. pkti->is_tcp = false;
  117. pkti->ecn = 0;
  118. }
  119. if (pkti->is_tcp)
  120. get_tcp_info(skb, pkti);
  121. }
  122. static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
  123. {
  124. bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
  125. qdp->lasttime = bpf_ktime_get_ns();
  126. qdp->credit = INIT_CREDIT;
  127. qdp->rate = rate * 128;
  128. }
  129. static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
  130. int rate)
  131. {
  132. unsigned long long curtime;
  133. curtime = bpf_ktime_get_ns();
  134. bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
  135. qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
  136. qdp->credit = 0; // not used
  137. qdp->rate = rate * 128;
  138. }
  139. static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
  140. int len,
  141. unsigned long long curtime,
  142. bool congestion_flag,
  143. bool drop_flag,
  144. bool cwr_flag,
  145. bool ecn_ce_flag,
  146. struct hbm_pkt_info *pkti,
  147. int credit)
  148. {
  149. int rv = ALLOW_PKT;
  150. if (qsp != NULL) {
  151. // Following is needed for work conserving
  152. __sync_add_and_fetch(&(qsp->bytes_total), len);
  153. if (qsp->stats) {
  154. // Optionally update statistics
  155. if (qsp->firstPacketTime == 0)
  156. qsp->firstPacketTime = curtime;
  157. qsp->lastPacketTime = curtime;
  158. __sync_add_and_fetch(&(qsp->pkts_total), 1);
  159. if (congestion_flag) {
  160. __sync_add_and_fetch(&(qsp->pkts_marked), 1);
  161. __sync_add_and_fetch(&(qsp->bytes_marked), len);
  162. }
  163. if (drop_flag) {
  164. __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
  165. __sync_add_and_fetch(&(qsp->bytes_dropped),
  166. len);
  167. }
  168. if (ecn_ce_flag)
  169. __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
  170. if (pkti->cwnd) {
  171. __sync_add_and_fetch(&(qsp->sum_cwnd),
  172. pkti->cwnd);
  173. __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
  174. }
  175. if (pkti->rtt)
  176. __sync_add_and_fetch(&(qsp->sum_rtt),
  177. pkti->rtt);
  178. __sync_add_and_fetch(&(qsp->sum_credit), credit);
  179. if (drop_flag)
  180. rv = DROP_PKT;
  181. if (cwr_flag)
  182. rv |= 2;
  183. if (rv == DROP_PKT)
  184. __sync_add_and_fetch(&(qsp->returnValCount[0]),
  185. 1);
  186. else if (rv == ALLOW_PKT)
  187. __sync_add_and_fetch(&(qsp->returnValCount[1]),
  188. 1);
  189. else if (rv == 2)
  190. __sync_add_and_fetch(&(qsp->returnValCount[2]),
  191. 1);
  192. else if (rv == 3)
  193. __sync_add_and_fetch(&(qsp->returnValCount[3]),
  194. 1);
  195. }
  196. }
  197. }