trampoline.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2019 Facebook */
  3. #include <linux/hash.h>
  4. #include <linux/bpf.h>
  5. #include <linux/filter.h>
  6. #include <linux/ftrace.h>
  7. #include <linux/rbtree_latch.h>
  8. #include <linux/perf_event.h>
  9. #include <linux/btf.h>
  10. #include <linux/rcupdate_trace.h>
  11. #include <linux/rcupdate_wait.h>
  12. #include <linux/module.h>
  13. #include <linux/static_call.h>
  14. #include <linux/bpf_verifier.h>
  15. #include <linux/bpf_lsm.h>
  16. #include <linux/delay.h>
  17. /* dummy _ops. The verifier will operate on target program's ops. */
  18. const struct bpf_verifier_ops bpf_extension_verifier_ops = {
  19. };
  20. const struct bpf_prog_ops bpf_extension_prog_ops = {
  21. };
  22. /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
  23. #define TRAMPOLINE_HASH_BITS 10
  24. #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
  25. static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
  26. /* serializes access to trampoline_table */
  27. static DEFINE_MUTEX(trampoline_mutex);
  28. #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  29. static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
  30. static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
  31. {
  32. struct bpf_trampoline *tr = ops->private;
  33. int ret = 0;
  34. if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
  35. /* This is called inside register_ftrace_direct_multi(), so
  36. * tr->mutex is already locked.
  37. */
  38. lockdep_assert_held_once(&tr->mutex);
  39. /* Instead of updating the trampoline here, we propagate
  40. * -EAGAIN to register_ftrace_direct_multi(). Then we can
  41. * retry register_ftrace_direct_multi() after updating the
  42. * trampoline.
  43. */
  44. if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  45. !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
  46. if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
  47. return -EBUSY;
  48. tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  49. return -EAGAIN;
  50. }
  51. return 0;
  52. }
  53. /* The normal locking order is
  54. * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
  55. *
  56. * The following two commands are called from
  57. *
  58. * prepare_direct_functions_for_ipmodify
  59. * cleanup_direct_functions_after_ipmodify
  60. *
  61. * In both cases, direct_mutex is already locked. Use
  62. * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
  63. * (something else is making changes to this same trampoline).
  64. */
  65. if (!mutex_trylock(&tr->mutex)) {
  66. /* sleep 1 ms to make sure whatever holding tr->mutex makes
  67. * some progress.
  68. */
  69. msleep(1);
  70. return -EAGAIN;
  71. }
  72. switch (cmd) {
  73. case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
  74. tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  75. if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  76. !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
  77. ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  78. break;
  79. case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
  80. tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
  81. if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
  82. ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  83. break;
  84. default:
  85. ret = -EINVAL;
  86. break;
  87. }
  88. mutex_unlock(&tr->mutex);
  89. return ret;
  90. }
  91. #endif
  92. bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
  93. {
  94. enum bpf_attach_type eatype = prog->expected_attach_type;
  95. enum bpf_prog_type ptype = prog->type;
  96. return (ptype == BPF_PROG_TYPE_TRACING &&
  97. (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
  98. eatype == BPF_MODIFY_RETURN)) ||
  99. (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
  100. }
  101. void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
  102. {
  103. ksym->start = (unsigned long) data;
  104. ksym->end = ksym->start + PAGE_SIZE;
  105. bpf_ksym_add(ksym);
  106. perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
  107. PAGE_SIZE, false, ksym->name);
  108. }
  109. void bpf_image_ksym_del(struct bpf_ksym *ksym)
  110. {
  111. bpf_ksym_del(ksym);
  112. perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
  113. PAGE_SIZE, true, ksym->name);
  114. }
  115. static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
  116. {
  117. struct bpf_trampoline *tr;
  118. struct hlist_head *head;
  119. int i;
  120. mutex_lock(&trampoline_mutex);
  121. head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
  122. hlist_for_each_entry(tr, head, hlist) {
  123. if (tr->key == key) {
  124. refcount_inc(&tr->refcnt);
  125. goto out;
  126. }
  127. }
  128. tr = kzalloc(sizeof(*tr), GFP_KERNEL);
  129. if (!tr)
  130. goto out;
  131. #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  132. tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
  133. if (!tr->fops) {
  134. kfree(tr);
  135. tr = NULL;
  136. goto out;
  137. }
  138. tr->fops->private = tr;
  139. tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
  140. #endif
  141. tr->key = key;
  142. INIT_HLIST_NODE(&tr->hlist);
  143. hlist_add_head(&tr->hlist, head);
  144. refcount_set(&tr->refcnt, 1);
  145. mutex_init(&tr->mutex);
  146. for (i = 0; i < BPF_TRAMP_MAX; i++)
  147. INIT_HLIST_HEAD(&tr->progs_hlist[i]);
  148. out:
  149. mutex_unlock(&trampoline_mutex);
  150. return tr;
  151. }
  152. static int bpf_trampoline_module_get(struct bpf_trampoline *tr)
  153. {
  154. struct module *mod;
  155. int err = 0;
  156. preempt_disable();
  157. mod = __module_text_address((unsigned long) tr->func.addr);
  158. if (mod && !try_module_get(mod))
  159. err = -ENOENT;
  160. preempt_enable();
  161. tr->mod = mod;
  162. return err;
  163. }
  164. static void bpf_trampoline_module_put(struct bpf_trampoline *tr)
  165. {
  166. module_put(tr->mod);
  167. tr->mod = NULL;
  168. }
  169. static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
  170. {
  171. void *ip = tr->func.addr;
  172. int ret;
  173. if (tr->func.ftrace_managed)
  174. ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr);
  175. else
  176. ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
  177. if (!ret)
  178. bpf_trampoline_module_put(tr);
  179. return ret;
  180. }
  181. static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
  182. bool lock_direct_mutex)
  183. {
  184. void *ip = tr->func.addr;
  185. int ret;
  186. if (tr->func.ftrace_managed) {
  187. if (lock_direct_mutex)
  188. ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr);
  189. else
  190. ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr);
  191. } else {
  192. ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
  193. }
  194. return ret;
  195. }
  196. /* first time registering */
  197. static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
  198. {
  199. void *ip = tr->func.addr;
  200. unsigned long faddr;
  201. int ret;
  202. faddr = ftrace_location((unsigned long)ip);
  203. if (faddr) {
  204. if (!tr->fops)
  205. return -ENOTSUPP;
  206. tr->func.ftrace_managed = true;
  207. }
  208. if (bpf_trampoline_module_get(tr))
  209. return -ENOENT;
  210. if (tr->func.ftrace_managed) {
  211. ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
  212. ret = register_ftrace_direct_multi(tr->fops, (long)new_addr);
  213. } else {
  214. ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
  215. }
  216. if (ret)
  217. bpf_trampoline_module_put(tr);
  218. return ret;
  219. }
  220. static struct bpf_tramp_links *
  221. bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
  222. {
  223. struct bpf_tramp_link *link;
  224. struct bpf_tramp_links *tlinks;
  225. struct bpf_tramp_link **links;
  226. int kind;
  227. *total = 0;
  228. tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
  229. if (!tlinks)
  230. return ERR_PTR(-ENOMEM);
  231. for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
  232. tlinks[kind].nr_links = tr->progs_cnt[kind];
  233. *total += tr->progs_cnt[kind];
  234. links = tlinks[kind].links;
  235. hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
  236. *ip_arg |= link->link.prog->call_get_func_ip;
  237. *links++ = link;
  238. }
  239. }
  240. return tlinks;
  241. }
  242. static void bpf_tramp_image_free(struct bpf_tramp_image *im)
  243. {
  244. bpf_image_ksym_del(&im->ksym);
  245. bpf_jit_free_exec(im->image);
  246. bpf_jit_uncharge_modmem(PAGE_SIZE);
  247. percpu_ref_exit(&im->pcref);
  248. kfree_rcu(im, rcu);
  249. }
  250. static void __bpf_tramp_image_put_deferred(struct work_struct *work)
  251. {
  252. struct bpf_tramp_image *im;
  253. im = container_of(work, struct bpf_tramp_image, work);
  254. bpf_tramp_image_free(im);
  255. }
  256. /* callback, fexit step 3 or fentry step 2 */
  257. static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
  258. {
  259. struct bpf_tramp_image *im;
  260. im = container_of(rcu, struct bpf_tramp_image, rcu);
  261. INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
  262. schedule_work(&im->work);
  263. }
  264. /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
  265. static void __bpf_tramp_image_release(struct percpu_ref *pcref)
  266. {
  267. struct bpf_tramp_image *im;
  268. im = container_of(pcref, struct bpf_tramp_image, pcref);
  269. call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
  270. }
  271. /* callback, fexit or fentry step 1 */
  272. static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
  273. {
  274. struct bpf_tramp_image *im;
  275. im = container_of(rcu, struct bpf_tramp_image, rcu);
  276. if (im->ip_after_call)
  277. /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
  278. percpu_ref_kill(&im->pcref);
  279. else
  280. /* the case of fentry trampoline */
  281. call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
  282. }
  283. static void bpf_tramp_image_put(struct bpf_tramp_image *im)
  284. {
  285. /* The trampoline image that calls original function is using:
  286. * rcu_read_lock_trace to protect sleepable bpf progs
  287. * rcu_read_lock to protect normal bpf progs
  288. * percpu_ref to protect trampoline itself
  289. * rcu tasks to protect trampoline asm not covered by percpu_ref
  290. * (which are few asm insns before __bpf_tramp_enter and
  291. * after __bpf_tramp_exit)
  292. *
  293. * The trampoline is unreachable before bpf_tramp_image_put().
  294. *
  295. * First, patch the trampoline to avoid calling into fexit progs.
  296. * The progs will be freed even if the original function is still
  297. * executing or sleeping.
  298. * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
  299. * first few asm instructions to execute and call into
  300. * __bpf_tramp_enter->percpu_ref_get.
  301. * Then use percpu_ref_kill to wait for the trampoline and the original
  302. * function to finish.
  303. * Then use call_rcu_tasks() to make sure few asm insns in
  304. * the trampoline epilogue are done as well.
  305. *
  306. * In !PREEMPT case the task that got interrupted in the first asm
  307. * insns won't go through an RCU quiescent state which the
  308. * percpu_ref_kill will be waiting for. Hence the first
  309. * call_rcu_tasks() is not necessary.
  310. */
  311. if (im->ip_after_call) {
  312. int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
  313. NULL, im->ip_epilogue);
  314. WARN_ON(err);
  315. if (IS_ENABLED(CONFIG_PREEMPTION))
  316. call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
  317. else
  318. percpu_ref_kill(&im->pcref);
  319. return;
  320. }
  321. /* The trampoline without fexit and fmod_ret progs doesn't call original
  322. * function and doesn't use percpu_ref.
  323. * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
  324. * Then use call_rcu_tasks() to wait for the rest of trampoline asm
  325. * and normal progs.
  326. */
  327. call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
  328. }
  329. static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
  330. {
  331. struct bpf_tramp_image *im;
  332. struct bpf_ksym *ksym;
  333. void *image;
  334. int err = -ENOMEM;
  335. im = kzalloc(sizeof(*im), GFP_KERNEL);
  336. if (!im)
  337. goto out;
  338. err = bpf_jit_charge_modmem(PAGE_SIZE);
  339. if (err)
  340. goto out_free_im;
  341. err = -ENOMEM;
  342. im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
  343. if (!image)
  344. goto out_uncharge;
  345. set_vm_flush_reset_perms(image);
  346. err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
  347. if (err)
  348. goto out_free_image;
  349. ksym = &im->ksym;
  350. INIT_LIST_HEAD_RCU(&ksym->lnode);
  351. snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
  352. bpf_image_ksym_add(image, ksym);
  353. return im;
  354. out_free_image:
  355. bpf_jit_free_exec(im->image);
  356. out_uncharge:
  357. bpf_jit_uncharge_modmem(PAGE_SIZE);
  358. out_free_im:
  359. kfree(im);
  360. out:
  361. return ERR_PTR(err);
  362. }
  363. static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
  364. {
  365. struct bpf_tramp_image *im;
  366. struct bpf_tramp_links *tlinks;
  367. u32 orig_flags = tr->flags;
  368. bool ip_arg = false;
  369. int err, total;
  370. tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
  371. if (IS_ERR(tlinks))
  372. return PTR_ERR(tlinks);
  373. if (total == 0) {
  374. err = unregister_fentry(tr, tr->cur_image->image);
  375. bpf_tramp_image_put(tr->cur_image);
  376. tr->cur_image = NULL;
  377. tr->selector = 0;
  378. goto out;
  379. }
  380. im = bpf_tramp_image_alloc(tr->key, tr->selector);
  381. if (IS_ERR(im)) {
  382. err = PTR_ERR(im);
  383. goto out;
  384. }
  385. /* clear all bits except SHARE_IPMODIFY */
  386. tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
  387. if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
  388. tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
  389. /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
  390. * should not be set together.
  391. */
  392. tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
  393. } else {
  394. tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
  395. }
  396. if (ip_arg)
  397. tr->flags |= BPF_TRAMP_F_IP_ARG;
  398. #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  399. again:
  400. if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
  401. (tr->flags & BPF_TRAMP_F_CALL_ORIG))
  402. tr->flags |= BPF_TRAMP_F_ORIG_STACK;
  403. #endif
  404. err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
  405. &tr->func.model, tr->flags, tlinks,
  406. tr->func.addr);
  407. if (err < 0)
  408. goto out_free;
  409. set_memory_ro((long)im->image, 1);
  410. set_memory_x((long)im->image, 1);
  411. WARN_ON(tr->cur_image && tr->selector == 0);
  412. WARN_ON(!tr->cur_image && tr->selector);
  413. if (tr->cur_image)
  414. /* progs already running at this address */
  415. err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
  416. else
  417. /* first time registering */
  418. err = register_fentry(tr, im->image);
  419. #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  420. if (err == -EAGAIN) {
  421. /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
  422. * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
  423. * trampoline again, and retry register.
  424. */
  425. /* reset fops->func and fops->trampoline for re-register */
  426. tr->fops->func = NULL;
  427. tr->fops->trampoline = 0;
  428. /* reset im->image memory attr for arch_prepare_bpf_trampoline */
  429. set_memory_nx((long)im->image, 1);
  430. set_memory_rw((long)im->image, 1);
  431. goto again;
  432. }
  433. #endif
  434. if (err)
  435. goto out_free;
  436. if (tr->cur_image)
  437. bpf_tramp_image_put(tr->cur_image);
  438. tr->cur_image = im;
  439. tr->selector++;
  440. out:
  441. /* If any error happens, restore previous flags */
  442. if (err)
  443. tr->flags = orig_flags;
  444. kfree(tlinks);
  445. return err;
  446. out_free:
  447. bpf_tramp_image_free(im);
  448. goto out;
  449. }
  450. static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
  451. {
  452. switch (prog->expected_attach_type) {
  453. case BPF_TRACE_FENTRY:
  454. return BPF_TRAMP_FENTRY;
  455. case BPF_MODIFY_RETURN:
  456. return BPF_TRAMP_MODIFY_RETURN;
  457. case BPF_TRACE_FEXIT:
  458. return BPF_TRAMP_FEXIT;
  459. case BPF_LSM_MAC:
  460. if (!prog->aux->attach_func_proto->type)
  461. /* The function returns void, we cannot modify its
  462. * return value.
  463. */
  464. return BPF_TRAMP_FEXIT;
  465. else
  466. return BPF_TRAMP_MODIFY_RETURN;
  467. default:
  468. return BPF_TRAMP_REPLACE;
  469. }
  470. }
  471. static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
  472. {
  473. enum bpf_tramp_prog_type kind;
  474. struct bpf_tramp_link *link_exiting;
  475. int err = 0;
  476. int cnt = 0, i;
  477. kind = bpf_attach_type_to_tramp(link->link.prog);
  478. if (tr->extension_prog)
  479. /* cannot attach fentry/fexit if extension prog is attached.
  480. * cannot overwrite extension prog either.
  481. */
  482. return -EBUSY;
  483. for (i = 0; i < BPF_TRAMP_MAX; i++)
  484. cnt += tr->progs_cnt[i];
  485. if (kind == BPF_TRAMP_REPLACE) {
  486. /* Cannot attach extension if fentry/fexit are in use. */
  487. if (cnt)
  488. return -EBUSY;
  489. tr->extension_prog = link->link.prog;
  490. return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
  491. link->link.prog->bpf_func);
  492. }
  493. if (cnt >= BPF_MAX_TRAMP_LINKS)
  494. return -E2BIG;
  495. if (!hlist_unhashed(&link->tramp_hlist))
  496. /* prog already linked */
  497. return -EBUSY;
  498. hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
  499. if (link_exiting->link.prog != link->link.prog)
  500. continue;
  501. /* prog already linked */
  502. return -EBUSY;
  503. }
  504. hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
  505. tr->progs_cnt[kind]++;
  506. err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
  507. if (err) {
  508. hlist_del_init(&link->tramp_hlist);
  509. tr->progs_cnt[kind]--;
  510. }
  511. return err;
  512. }
  513. int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
  514. {
  515. int err;
  516. mutex_lock(&tr->mutex);
  517. err = __bpf_trampoline_link_prog(link, tr);
  518. mutex_unlock(&tr->mutex);
  519. return err;
  520. }
  521. static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
  522. {
  523. enum bpf_tramp_prog_type kind;
  524. int err;
  525. kind = bpf_attach_type_to_tramp(link->link.prog);
  526. if (kind == BPF_TRAMP_REPLACE) {
  527. WARN_ON_ONCE(!tr->extension_prog);
  528. err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
  529. tr->extension_prog->bpf_func, NULL);
  530. tr->extension_prog = NULL;
  531. return err;
  532. }
  533. hlist_del_init(&link->tramp_hlist);
  534. tr->progs_cnt[kind]--;
  535. return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
  536. }
  537. /* bpf_trampoline_unlink_prog() should never fail. */
  538. int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
  539. {
  540. int err;
  541. mutex_lock(&tr->mutex);
  542. err = __bpf_trampoline_unlink_prog(link, tr);
  543. mutex_unlock(&tr->mutex);
  544. return err;
  545. }
  546. #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
  547. static void bpf_shim_tramp_link_release(struct bpf_link *link)
  548. {
  549. struct bpf_shim_tramp_link *shim_link =
  550. container_of(link, struct bpf_shim_tramp_link, link.link);
  551. /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
  552. if (!shim_link->trampoline)
  553. return;
  554. WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
  555. bpf_trampoline_put(shim_link->trampoline);
  556. }
  557. static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
  558. {
  559. struct bpf_shim_tramp_link *shim_link =
  560. container_of(link, struct bpf_shim_tramp_link, link.link);
  561. kfree(shim_link);
  562. }
  563. static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
  564. .release = bpf_shim_tramp_link_release,
  565. .dealloc = bpf_shim_tramp_link_dealloc,
  566. };
  567. static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
  568. bpf_func_t bpf_func,
  569. int cgroup_atype)
  570. {
  571. struct bpf_shim_tramp_link *shim_link = NULL;
  572. struct bpf_prog *p;
  573. shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
  574. if (!shim_link)
  575. return NULL;
  576. p = bpf_prog_alloc(1, 0);
  577. if (!p) {
  578. kfree(shim_link);
  579. return NULL;
  580. }
  581. p->jited = false;
  582. p->bpf_func = bpf_func;
  583. p->aux->cgroup_atype = cgroup_atype;
  584. p->aux->attach_func_proto = prog->aux->attach_func_proto;
  585. p->aux->attach_btf_id = prog->aux->attach_btf_id;
  586. p->aux->attach_btf = prog->aux->attach_btf;
  587. btf_get(p->aux->attach_btf);
  588. p->type = BPF_PROG_TYPE_LSM;
  589. p->expected_attach_type = BPF_LSM_MAC;
  590. bpf_prog_inc(p);
  591. bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
  592. &bpf_shim_tramp_link_lops, p);
  593. bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
  594. return shim_link;
  595. }
  596. static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
  597. bpf_func_t bpf_func)
  598. {
  599. struct bpf_tramp_link *link;
  600. int kind;
  601. for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
  602. hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
  603. struct bpf_prog *p = link->link.prog;
  604. if (p->bpf_func == bpf_func)
  605. return container_of(link, struct bpf_shim_tramp_link, link);
  606. }
  607. }
  608. return NULL;
  609. }
  610. int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
  611. int cgroup_atype)
  612. {
  613. struct bpf_shim_tramp_link *shim_link = NULL;
  614. struct bpf_attach_target_info tgt_info = {};
  615. struct bpf_trampoline *tr;
  616. bpf_func_t bpf_func;
  617. u64 key;
  618. int err;
  619. err = bpf_check_attach_target(NULL, prog, NULL,
  620. prog->aux->attach_btf_id,
  621. &tgt_info);
  622. if (err)
  623. return err;
  624. key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
  625. prog->aux->attach_btf_id);
  626. bpf_lsm_find_cgroup_shim(prog, &bpf_func);
  627. tr = bpf_trampoline_get(key, &tgt_info);
  628. if (!tr)
  629. return -ENOMEM;
  630. mutex_lock(&tr->mutex);
  631. shim_link = cgroup_shim_find(tr, bpf_func);
  632. if (shim_link) {
  633. /* Reusing existing shim attached by the other program. */
  634. bpf_link_inc(&shim_link->link.link);
  635. mutex_unlock(&tr->mutex);
  636. bpf_trampoline_put(tr); /* bpf_trampoline_get above */
  637. return 0;
  638. }
  639. /* Allocate and install new shim. */
  640. shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
  641. if (!shim_link) {
  642. err = -ENOMEM;
  643. goto err;
  644. }
  645. err = __bpf_trampoline_link_prog(&shim_link->link, tr);
  646. if (err)
  647. goto err;
  648. shim_link->trampoline = tr;
  649. /* note, we're still holding tr refcnt from above */
  650. mutex_unlock(&tr->mutex);
  651. return 0;
  652. err:
  653. mutex_unlock(&tr->mutex);
  654. if (shim_link)
  655. bpf_link_put(&shim_link->link.link);
  656. /* have to release tr while _not_ holding its mutex */
  657. bpf_trampoline_put(tr); /* bpf_trampoline_get above */
  658. return err;
  659. }
  660. void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
  661. {
  662. struct bpf_shim_tramp_link *shim_link = NULL;
  663. struct bpf_trampoline *tr;
  664. bpf_func_t bpf_func;
  665. u64 key;
  666. key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
  667. prog->aux->attach_btf_id);
  668. bpf_lsm_find_cgroup_shim(prog, &bpf_func);
  669. tr = bpf_trampoline_lookup(key);
  670. if (WARN_ON_ONCE(!tr))
  671. return;
  672. mutex_lock(&tr->mutex);
  673. shim_link = cgroup_shim_find(tr, bpf_func);
  674. mutex_unlock(&tr->mutex);
  675. if (shim_link)
  676. bpf_link_put(&shim_link->link.link);
  677. bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
  678. }
  679. #endif
  680. struct bpf_trampoline *bpf_trampoline_get(u64 key,
  681. struct bpf_attach_target_info *tgt_info)
  682. {
  683. struct bpf_trampoline *tr;
  684. tr = bpf_trampoline_lookup(key);
  685. if (!tr)
  686. return NULL;
  687. mutex_lock(&tr->mutex);
  688. if (tr->func.addr)
  689. goto out;
  690. memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
  691. tr->func.addr = (void *)tgt_info->tgt_addr;
  692. out:
  693. mutex_unlock(&tr->mutex);
  694. return tr;
  695. }
  696. void bpf_trampoline_put(struct bpf_trampoline *tr)
  697. {
  698. int i;
  699. if (!tr)
  700. return;
  701. mutex_lock(&trampoline_mutex);
  702. if (!refcount_dec_and_test(&tr->refcnt))
  703. goto out;
  704. WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
  705. for (i = 0; i < BPF_TRAMP_MAX; i++)
  706. if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
  707. goto out;
  708. /* This code will be executed even when the last bpf_tramp_image
  709. * is alive. All progs are detached from the trampoline and the
  710. * trampoline image is patched with jmp into epilogue to skip
  711. * fexit progs. The fentry-only trampoline will be freed via
  712. * multiple rcu callbacks.
  713. */
  714. hlist_del(&tr->hlist);
  715. if (tr->fops) {
  716. ftrace_free_filter(tr->fops);
  717. kfree(tr->fops);
  718. }
  719. kfree(tr);
  720. out:
  721. mutex_unlock(&trampoline_mutex);
  722. }
  723. #define NO_START_TIME 1
  724. static __always_inline u64 notrace bpf_prog_start_time(void)
  725. {
  726. u64 start = NO_START_TIME;
  727. if (static_branch_unlikely(&bpf_stats_enabled_key)) {
  728. start = sched_clock();
  729. if (unlikely(!start))
  730. start = NO_START_TIME;
  731. }
  732. return start;
  733. }
  734. /* The logic is similar to bpf_prog_run(), but with an explicit
  735. * rcu_read_lock() and migrate_disable() which are required
  736. * for the trampoline. The macro is split into
  737. * call __bpf_prog_enter
  738. * call prog->bpf_func
  739. * call __bpf_prog_exit
  740. *
  741. * __bpf_prog_enter returns:
  742. * 0 - skip execution of the bpf prog
  743. * 1 - execute bpf prog
  744. * [2..MAX_U64] - execute bpf prog and record execution time.
  745. * This is start time.
  746. */
  747. static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
  748. __acquires(RCU)
  749. {
  750. rcu_read_lock();
  751. migrate_disable();
  752. run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
  753. if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
  754. bpf_prog_inc_misses_counter(prog);
  755. return 0;
  756. }
  757. return bpf_prog_start_time();
  758. }
  759. static void notrace update_prog_stats(struct bpf_prog *prog,
  760. u64 start)
  761. {
  762. struct bpf_prog_stats *stats;
  763. if (static_branch_unlikely(&bpf_stats_enabled_key) &&
  764. /* static_key could be enabled in __bpf_prog_enter*
  765. * and disabled in __bpf_prog_exit*.
  766. * And vice versa.
  767. * Hence check that 'start' is valid.
  768. */
  769. start > NO_START_TIME) {
  770. unsigned long flags;
  771. stats = this_cpu_ptr(prog->stats);
  772. flags = u64_stats_update_begin_irqsave(&stats->syncp);
  773. u64_stats_inc(&stats->cnt);
  774. u64_stats_add(&stats->nsecs, sched_clock() - start);
  775. u64_stats_update_end_irqrestore(&stats->syncp, flags);
  776. }
  777. }
  778. static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
  779. struct bpf_tramp_run_ctx *run_ctx)
  780. __releases(RCU)
  781. {
  782. bpf_reset_run_ctx(run_ctx->saved_run_ctx);
  783. update_prog_stats(prog, start);
  784. this_cpu_dec(*(prog->active));
  785. migrate_enable();
  786. rcu_read_unlock();
  787. }
  788. static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
  789. struct bpf_tramp_run_ctx *run_ctx)
  790. __acquires(RCU)
  791. {
  792. /* Runtime stats are exported via actual BPF_LSM_CGROUP
  793. * programs, not the shims.
  794. */
  795. rcu_read_lock();
  796. migrate_disable();
  797. run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
  798. return NO_START_TIME;
  799. }
  800. static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
  801. struct bpf_tramp_run_ctx *run_ctx)
  802. __releases(RCU)
  803. {
  804. bpf_reset_run_ctx(run_ctx->saved_run_ctx);
  805. migrate_enable();
  806. rcu_read_unlock();
  807. }
  808. u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
  809. struct bpf_tramp_run_ctx *run_ctx)
  810. {
  811. rcu_read_lock_trace();
  812. migrate_disable();
  813. might_fault();
  814. run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
  815. if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
  816. bpf_prog_inc_misses_counter(prog);
  817. return 0;
  818. }
  819. return bpf_prog_start_time();
  820. }
  821. void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
  822. struct bpf_tramp_run_ctx *run_ctx)
  823. {
  824. bpf_reset_run_ctx(run_ctx->saved_run_ctx);
  825. update_prog_stats(prog, start);
  826. this_cpu_dec(*(prog->active));
  827. migrate_enable();
  828. rcu_read_unlock_trace();
  829. }
  830. static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
  831. struct bpf_tramp_run_ctx *run_ctx)
  832. {
  833. rcu_read_lock_trace();
  834. migrate_disable();
  835. might_fault();
  836. run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
  837. return bpf_prog_start_time();
  838. }
  839. static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
  840. struct bpf_tramp_run_ctx *run_ctx)
  841. {
  842. bpf_reset_run_ctx(run_ctx->saved_run_ctx);
  843. update_prog_stats(prog, start);
  844. migrate_enable();
  845. rcu_read_unlock_trace();
  846. }
  847. static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
  848. struct bpf_tramp_run_ctx *run_ctx)
  849. __acquires(RCU)
  850. {
  851. rcu_read_lock();
  852. migrate_disable();
  853. run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
  854. return bpf_prog_start_time();
  855. }
  856. static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
  857. struct bpf_tramp_run_ctx *run_ctx)
  858. __releases(RCU)
  859. {
  860. bpf_reset_run_ctx(run_ctx->saved_run_ctx);
  861. update_prog_stats(prog, start);
  862. migrate_enable();
  863. rcu_read_unlock();
  864. }
  865. void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
  866. {
  867. percpu_ref_get(&tr->pcref);
  868. }
  869. void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
  870. {
  871. percpu_ref_put(&tr->pcref);
  872. }
  873. bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
  874. {
  875. bool sleepable = prog->aux->sleepable;
  876. if (bpf_prog_check_recur(prog))
  877. return sleepable ? __bpf_prog_enter_sleepable_recur :
  878. __bpf_prog_enter_recur;
  879. if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
  880. prog->expected_attach_type == BPF_LSM_CGROUP)
  881. return __bpf_prog_enter_lsm_cgroup;
  882. return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
  883. }
  884. bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
  885. {
  886. bool sleepable = prog->aux->sleepable;
  887. if (bpf_prog_check_recur(prog))
  888. return sleepable ? __bpf_prog_exit_sleepable_recur :
  889. __bpf_prog_exit_recur;
  890. if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
  891. prog->expected_attach_type == BPF_LSM_CGROUP)
  892. return __bpf_prog_exit_lsm_cgroup;
  893. return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
  894. }
  895. int __weak
  896. arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
  897. const struct btf_func_model *m, u32 flags,
  898. struct bpf_tramp_links *tlinks,
  899. void *orig_call)
  900. {
  901. return -ENOTSUPP;
  902. }
  903. static int __init init_trampolines(void)
  904. {
  905. int i;
  906. for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
  907. INIT_HLIST_HEAD(&trampoline_table[i]);
  908. return 0;
  909. }
  910. late_initcall(init_trampolines);