xdp_sample.bpf.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
  3. #include "xdp_sample.bpf.h"
  4. #include <bpf/bpf_tracing.h>
  5. #include <bpf/bpf_core_read.h>
  6. #include <bpf/bpf_helpers.h>
  7. array_map rx_cnt SEC(".maps");
  8. array_map redir_err_cnt SEC(".maps");
  9. array_map cpumap_enqueue_cnt SEC(".maps");
  10. array_map cpumap_kthread_cnt SEC(".maps");
  11. array_map exception_cnt SEC(".maps");
  12. array_map devmap_xmit_cnt SEC(".maps");
  13. struct {
  14. __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
  15. __uint(max_entries, 32 * 32);
  16. __type(key, u64);
  17. __type(value, struct datarec);
  18. } devmap_xmit_cnt_multi SEC(".maps");
  19. const volatile int nr_cpus = 0;
  20. /* These can be set before loading so that redundant comparisons can be DCE'd by
  21. * the verifier, and only actual matches are tried after loading tp_btf program.
  22. * This allows sample to filter tracepoint stats based on net_device.
  23. */
  24. const volatile int from_match[32] = {};
  25. const volatile int to_match[32] = {};
  26. int cpumap_map_id = 0;
  27. /* Find if b is part of set a, but if a is empty set then evaluate to true */
  28. #define IN_SET(a, b) \
  29. ({ \
  30. bool __res = !(a)[0]; \
  31. for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
  32. __res = (a)[i] == (b); \
  33. if (__res) \
  34. break; \
  35. } \
  36. __res; \
  37. })
  38. static __always_inline __u32 xdp_get_err_key(int err)
  39. {
  40. switch (err) {
  41. case 0:
  42. return 0;
  43. case -EINVAL:
  44. return 2;
  45. case -ENETDOWN:
  46. return 3;
  47. case -EMSGSIZE:
  48. return 4;
  49. case -EOPNOTSUPP:
  50. return 5;
  51. case -ENOSPC:
  52. return 6;
  53. default:
  54. return 1;
  55. }
  56. }
  57. static __always_inline int xdp_redirect_collect_stat(int from, int err)
  58. {
  59. u32 cpu = bpf_get_smp_processor_id();
  60. u32 key = XDP_REDIRECT_ERROR;
  61. struct datarec *rec;
  62. u32 idx;
  63. if (!IN_SET(from_match, from))
  64. return 0;
  65. key = xdp_get_err_key(err);
  66. idx = key * nr_cpus + cpu;
  67. rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
  68. if (!rec)
  69. return 0;
  70. if (key)
  71. NO_TEAR_INC(rec->dropped);
  72. else
  73. NO_TEAR_INC(rec->processed);
  74. return 0; /* Indicate event was filtered (no further processing)*/
  75. /*
  76. * Returning 1 here would allow e.g. a perf-record tracepoint
  77. * to see and record these events, but it doesn't work well
  78. * in-practice as stopping perf-record also unload this
  79. * bpf_prog. Plus, there is additional overhead of doing so.
  80. */
  81. }
  82. SEC("tp_btf/xdp_redirect_err")
  83. int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
  84. const struct bpf_prog *xdp, const void *tgt, int err,
  85. const struct bpf_map *map, u32 index)
  86. {
  87. return xdp_redirect_collect_stat(dev->ifindex, err);
  88. }
  89. SEC("tp_btf/xdp_redirect_map_err")
  90. int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
  91. const struct bpf_prog *xdp, const void *tgt, int err,
  92. const struct bpf_map *map, u32 index)
  93. {
  94. return xdp_redirect_collect_stat(dev->ifindex, err);
  95. }
  96. SEC("tp_btf/xdp_redirect")
  97. int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
  98. const struct bpf_prog *xdp, const void *tgt, int err,
  99. const struct bpf_map *map, u32 index)
  100. {
  101. return xdp_redirect_collect_stat(dev->ifindex, err);
  102. }
  103. SEC("tp_btf/xdp_redirect_map")
  104. int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
  105. const struct bpf_prog *xdp, const void *tgt, int err,
  106. const struct bpf_map *map, u32 index)
  107. {
  108. return xdp_redirect_collect_stat(dev->ifindex, err);
  109. }
  110. SEC("tp_btf/xdp_cpumap_enqueue")
  111. int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
  112. unsigned int drops, int to_cpu)
  113. {
  114. u32 cpu = bpf_get_smp_processor_id();
  115. struct datarec *rec;
  116. u32 idx;
  117. if (cpumap_map_id && cpumap_map_id != map_id)
  118. return 0;
  119. idx = to_cpu * nr_cpus + cpu;
  120. rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
  121. if (!rec)
  122. return 0;
  123. NO_TEAR_ADD(rec->processed, processed);
  124. NO_TEAR_ADD(rec->dropped, drops);
  125. /* Record bulk events, then userspace can calc average bulk size */
  126. if (processed > 0)
  127. NO_TEAR_INC(rec->issue);
  128. /* Inception: It's possible to detect overload situations, via
  129. * this tracepoint. This can be used for creating a feedback
  130. * loop to XDP, which can take appropriate actions to mitigate
  131. * this overload situation.
  132. */
  133. return 0;
  134. }
  135. SEC("tp_btf/xdp_cpumap_kthread")
  136. int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
  137. unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
  138. {
  139. struct datarec *rec;
  140. u32 cpu;
  141. if (cpumap_map_id && cpumap_map_id != map_id)
  142. return 0;
  143. cpu = bpf_get_smp_processor_id();
  144. rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
  145. if (!rec)
  146. return 0;
  147. NO_TEAR_ADD(rec->processed, processed);
  148. NO_TEAR_ADD(rec->dropped, drops);
  149. NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
  150. NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
  151. NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
  152. /* Count times kthread yielded CPU via schedule call */
  153. if (sched)
  154. NO_TEAR_INC(rec->issue);
  155. return 0;
  156. }
  157. SEC("tp_btf/xdp_exception")
  158. int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
  159. const struct bpf_prog *xdp, u32 act)
  160. {
  161. u32 cpu = bpf_get_smp_processor_id();
  162. struct datarec *rec;
  163. u32 key = act, idx;
  164. if (!IN_SET(from_match, dev->ifindex))
  165. return 0;
  166. if (!IN_SET(to_match, dev->ifindex))
  167. return 0;
  168. if (key > XDP_REDIRECT)
  169. key = XDP_REDIRECT + 1;
  170. idx = key * nr_cpus + cpu;
  171. rec = bpf_map_lookup_elem(&exception_cnt, &idx);
  172. if (!rec)
  173. return 0;
  174. NO_TEAR_INC(rec->dropped);
  175. return 0;
  176. }
  177. SEC("tp_btf/xdp_devmap_xmit")
  178. int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
  179. const struct net_device *to_dev, int sent, int drops, int err)
  180. {
  181. struct datarec *rec;
  182. int idx_in, idx_out;
  183. u32 cpu;
  184. idx_in = from_dev->ifindex;
  185. idx_out = to_dev->ifindex;
  186. if (!IN_SET(from_match, idx_in))
  187. return 0;
  188. if (!IN_SET(to_match, idx_out))
  189. return 0;
  190. cpu = bpf_get_smp_processor_id();
  191. rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
  192. if (!rec)
  193. return 0;
  194. NO_TEAR_ADD(rec->processed, sent);
  195. NO_TEAR_ADD(rec->dropped, drops);
  196. /* Record bulk events, then userspace can calc average bulk size */
  197. NO_TEAR_INC(rec->info);
  198. /* Record error cases, where no frame were sent */
  199. /* Catch API error of drv ndo_xdp_xmit sent more than count */
  200. if (err || drops < 0)
  201. NO_TEAR_INC(rec->issue);
  202. return 0;
  203. }
  204. SEC("tp_btf/xdp_devmap_xmit")
  205. int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
  206. const struct net_device *to_dev, int sent, int drops, int err)
  207. {
  208. struct datarec empty = {};
  209. struct datarec *rec;
  210. int idx_in, idx_out;
  211. u64 idx;
  212. idx_in = from_dev->ifindex;
  213. idx_out = to_dev->ifindex;
  214. idx = idx_in;
  215. idx = idx << 32 | idx_out;
  216. if (!IN_SET(from_match, idx_in))
  217. return 0;
  218. if (!IN_SET(to_match, idx_out))
  219. return 0;
  220. bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
  221. rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
  222. if (!rec)
  223. return 0;
  224. NO_TEAR_ADD(rec->processed, sent);
  225. NO_TEAR_ADD(rec->dropped, drops);
  226. NO_TEAR_INC(rec->info);
  227. if (err || drops < 0)
  228. NO_TEAR_INC(rec->issue);
  229. return 0;
  230. }