task_iter.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2020 Facebook */
  3. #include <linux/init.h>
  4. #include <linux/namei.h>
  5. #include <linux/pid_namespace.h>
  6. #include <linux/fs.h>
  7. #include <linux/fdtable.h>
  8. #include <linux/filter.h>
  9. #include <linux/btf_ids.h>
  10. #include "mmap_unlock_work.h"
  11. static const char * const iter_task_type_names[] = {
  12. "ALL",
  13. "TID",
  14. "PID",
  15. };
  16. struct bpf_iter_seq_task_common {
  17. struct pid_namespace *ns;
  18. enum bpf_iter_task_type type;
  19. u32 pid;
  20. u32 pid_visiting;
  21. };
  22. struct bpf_iter_seq_task_info {
  23. /* The first field must be struct bpf_iter_seq_task_common.
  24. * this is assumed by {init, fini}_seq_pidns() callback functions.
  25. */
  26. struct bpf_iter_seq_task_common common;
  27. u32 tid;
  28. };
  29. static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
  30. u32 *tid,
  31. bool skip_if_dup_files)
  32. {
  33. struct task_struct *task, *next_task;
  34. struct pid *pid;
  35. u32 saved_tid;
  36. if (!*tid) {
  37. /* The first time, the iterator calls this function. */
  38. pid = find_pid_ns(common->pid, common->ns);
  39. if (!pid)
  40. return NULL;
  41. task = get_pid_task(pid, PIDTYPE_TGID);
  42. if (!task)
  43. return NULL;
  44. *tid = common->pid;
  45. common->pid_visiting = common->pid;
  46. return task;
  47. }
  48. /* If the control returns to user space and comes back to the
  49. * kernel again, *tid and common->pid_visiting should be the
  50. * same for task_seq_start() to pick up the correct task.
  51. */
  52. if (*tid == common->pid_visiting) {
  53. pid = find_pid_ns(common->pid_visiting, common->ns);
  54. task = get_pid_task(pid, PIDTYPE_PID);
  55. return task;
  56. }
  57. pid = find_pid_ns(common->pid_visiting, common->ns);
  58. if (!pid)
  59. return NULL;
  60. task = get_pid_task(pid, PIDTYPE_PID);
  61. if (!task)
  62. return NULL;
  63. retry:
  64. if (!pid_alive(task)) {
  65. put_task_struct(task);
  66. return NULL;
  67. }
  68. next_task = next_thread(task);
  69. put_task_struct(task);
  70. if (!next_task)
  71. return NULL;
  72. saved_tid = *tid;
  73. *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
  74. if (!*tid || *tid == common->pid) {
  75. /* Run out of tasks of a process. The tasks of a
  76. * thread_group are linked as circular linked list.
  77. */
  78. *tid = saved_tid;
  79. return NULL;
  80. }
  81. get_task_struct(next_task);
  82. common->pid_visiting = *tid;
  83. if (skip_if_dup_files && task->files == task->group_leader->files) {
  84. task = next_task;
  85. goto retry;
  86. }
  87. return next_task;
  88. }
  89. static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
  90. u32 *tid,
  91. bool skip_if_dup_files)
  92. {
  93. struct task_struct *task = NULL;
  94. struct pid *pid;
  95. if (common->type == BPF_TASK_ITER_TID) {
  96. if (*tid && *tid != common->pid)
  97. return NULL;
  98. rcu_read_lock();
  99. pid = find_pid_ns(common->pid, common->ns);
  100. if (pid) {
  101. task = get_pid_task(pid, PIDTYPE_TGID);
  102. *tid = common->pid;
  103. }
  104. rcu_read_unlock();
  105. return task;
  106. }
  107. if (common->type == BPF_TASK_ITER_TGID) {
  108. rcu_read_lock();
  109. task = task_group_seq_get_next(common, tid, skip_if_dup_files);
  110. rcu_read_unlock();
  111. return task;
  112. }
  113. rcu_read_lock();
  114. retry:
  115. pid = find_ge_pid(*tid, common->ns);
  116. if (pid) {
  117. *tid = pid_nr_ns(pid, common->ns);
  118. task = get_pid_task(pid, PIDTYPE_PID);
  119. if (!task) {
  120. ++*tid;
  121. goto retry;
  122. } else if (skip_if_dup_files && !thread_group_leader(task) &&
  123. task->files == task->group_leader->files) {
  124. put_task_struct(task);
  125. task = NULL;
  126. ++*tid;
  127. goto retry;
  128. }
  129. }
  130. rcu_read_unlock();
  131. return task;
  132. }
  133. static void *task_seq_start(struct seq_file *seq, loff_t *pos)
  134. {
  135. struct bpf_iter_seq_task_info *info = seq->private;
  136. struct task_struct *task;
  137. task = task_seq_get_next(&info->common, &info->tid, false);
  138. if (!task)
  139. return NULL;
  140. if (*pos == 0)
  141. ++*pos;
  142. return task;
  143. }
  144. static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  145. {
  146. struct bpf_iter_seq_task_info *info = seq->private;
  147. struct task_struct *task;
  148. ++*pos;
  149. ++info->tid;
  150. put_task_struct((struct task_struct *)v);
  151. task = task_seq_get_next(&info->common, &info->tid, false);
  152. if (!task)
  153. return NULL;
  154. return task;
  155. }
  156. struct bpf_iter__task {
  157. __bpf_md_ptr(struct bpf_iter_meta *, meta);
  158. __bpf_md_ptr(struct task_struct *, task);
  159. };
  160. DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
  161. static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
  162. bool in_stop)
  163. {
  164. struct bpf_iter_meta meta;
  165. struct bpf_iter__task ctx;
  166. struct bpf_prog *prog;
  167. meta.seq = seq;
  168. prog = bpf_iter_get_info(&meta, in_stop);
  169. if (!prog)
  170. return 0;
  171. ctx.meta = &meta;
  172. ctx.task = task;
  173. return bpf_iter_run_prog(prog, &ctx);
  174. }
  175. static int task_seq_show(struct seq_file *seq, void *v)
  176. {
  177. return __task_seq_show(seq, v, false);
  178. }
  179. static void task_seq_stop(struct seq_file *seq, void *v)
  180. {
  181. if (!v)
  182. (void)__task_seq_show(seq, v, true);
  183. else
  184. put_task_struct((struct task_struct *)v);
  185. }
  186. static int bpf_iter_attach_task(struct bpf_prog *prog,
  187. union bpf_iter_link_info *linfo,
  188. struct bpf_iter_aux_info *aux)
  189. {
  190. unsigned int flags;
  191. struct pid *pid;
  192. pid_t tgid;
  193. if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
  194. return -EINVAL;
  195. aux->task.type = BPF_TASK_ITER_ALL;
  196. if (linfo->task.tid != 0) {
  197. aux->task.type = BPF_TASK_ITER_TID;
  198. aux->task.pid = linfo->task.tid;
  199. }
  200. if (linfo->task.pid != 0) {
  201. aux->task.type = BPF_TASK_ITER_TGID;
  202. aux->task.pid = linfo->task.pid;
  203. }
  204. if (linfo->task.pid_fd != 0) {
  205. aux->task.type = BPF_TASK_ITER_TGID;
  206. pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
  207. if (IS_ERR(pid))
  208. return PTR_ERR(pid);
  209. tgid = pid_nr_ns(pid, task_active_pid_ns(current));
  210. aux->task.pid = tgid;
  211. put_pid(pid);
  212. }
  213. return 0;
  214. }
  215. static const struct seq_operations task_seq_ops = {
  216. .start = task_seq_start,
  217. .next = task_seq_next,
  218. .stop = task_seq_stop,
  219. .show = task_seq_show,
  220. };
  221. struct bpf_iter_seq_task_file_info {
  222. /* The first field must be struct bpf_iter_seq_task_common.
  223. * this is assumed by {init, fini}_seq_pidns() callback functions.
  224. */
  225. struct bpf_iter_seq_task_common common;
  226. struct task_struct *task;
  227. u32 tid;
  228. u32 fd;
  229. };
  230. static struct file *
  231. task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
  232. {
  233. u32 saved_tid = info->tid;
  234. struct task_struct *curr_task;
  235. unsigned int curr_fd = info->fd;
  236. /* If this function returns a non-NULL file object,
  237. * it held a reference to the task/file.
  238. * Otherwise, it does not hold any reference.
  239. */
  240. again:
  241. if (info->task) {
  242. curr_task = info->task;
  243. curr_fd = info->fd;
  244. } else {
  245. curr_task = task_seq_get_next(&info->common, &info->tid, true);
  246. if (!curr_task) {
  247. info->task = NULL;
  248. return NULL;
  249. }
  250. /* set info->task */
  251. info->task = curr_task;
  252. if (saved_tid == info->tid)
  253. curr_fd = info->fd;
  254. else
  255. curr_fd = 0;
  256. }
  257. rcu_read_lock();
  258. for (;; curr_fd++) {
  259. struct file *f;
  260. f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
  261. if (!f)
  262. break;
  263. if (!get_file_rcu(f))
  264. continue;
  265. /* set info->fd */
  266. info->fd = curr_fd;
  267. rcu_read_unlock();
  268. return f;
  269. }
  270. /* the current task is done, go to the next task */
  271. rcu_read_unlock();
  272. put_task_struct(curr_task);
  273. if (info->common.type == BPF_TASK_ITER_TID) {
  274. info->task = NULL;
  275. return NULL;
  276. }
  277. info->task = NULL;
  278. info->fd = 0;
  279. saved_tid = ++(info->tid);
  280. goto again;
  281. }
  282. static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
  283. {
  284. struct bpf_iter_seq_task_file_info *info = seq->private;
  285. struct file *file;
  286. info->task = NULL;
  287. file = task_file_seq_get_next(info);
  288. if (file && *pos == 0)
  289. ++*pos;
  290. return file;
  291. }
  292. static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  293. {
  294. struct bpf_iter_seq_task_file_info *info = seq->private;
  295. ++*pos;
  296. ++info->fd;
  297. fput((struct file *)v);
  298. return task_file_seq_get_next(info);
  299. }
  300. struct bpf_iter__task_file {
  301. __bpf_md_ptr(struct bpf_iter_meta *, meta);
  302. __bpf_md_ptr(struct task_struct *, task);
  303. u32 fd __aligned(8);
  304. __bpf_md_ptr(struct file *, file);
  305. };
  306. DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
  307. struct task_struct *task, u32 fd,
  308. struct file *file)
  309. static int __task_file_seq_show(struct seq_file *seq, struct file *file,
  310. bool in_stop)
  311. {
  312. struct bpf_iter_seq_task_file_info *info = seq->private;
  313. struct bpf_iter__task_file ctx;
  314. struct bpf_iter_meta meta;
  315. struct bpf_prog *prog;
  316. meta.seq = seq;
  317. prog = bpf_iter_get_info(&meta, in_stop);
  318. if (!prog)
  319. return 0;
  320. ctx.meta = &meta;
  321. ctx.task = info->task;
  322. ctx.fd = info->fd;
  323. ctx.file = file;
  324. return bpf_iter_run_prog(prog, &ctx);
  325. }
  326. static int task_file_seq_show(struct seq_file *seq, void *v)
  327. {
  328. return __task_file_seq_show(seq, v, false);
  329. }
  330. static void task_file_seq_stop(struct seq_file *seq, void *v)
  331. {
  332. struct bpf_iter_seq_task_file_info *info = seq->private;
  333. if (!v) {
  334. (void)__task_file_seq_show(seq, v, true);
  335. } else {
  336. fput((struct file *)v);
  337. put_task_struct(info->task);
  338. info->task = NULL;
  339. }
  340. }
  341. static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
  342. {
  343. struct bpf_iter_seq_task_common *common = priv_data;
  344. common->ns = get_pid_ns(task_active_pid_ns(current));
  345. common->type = aux->task.type;
  346. common->pid = aux->task.pid;
  347. return 0;
  348. }
  349. static void fini_seq_pidns(void *priv_data)
  350. {
  351. struct bpf_iter_seq_task_common *common = priv_data;
  352. put_pid_ns(common->ns);
  353. }
  354. static const struct seq_operations task_file_seq_ops = {
  355. .start = task_file_seq_start,
  356. .next = task_file_seq_next,
  357. .stop = task_file_seq_stop,
  358. .show = task_file_seq_show,
  359. };
  360. struct bpf_iter_seq_task_vma_info {
  361. /* The first field must be struct bpf_iter_seq_task_common.
  362. * this is assumed by {init, fini}_seq_pidns() callback functions.
  363. */
  364. struct bpf_iter_seq_task_common common;
  365. struct task_struct *task;
  366. struct mm_struct *mm;
  367. struct vm_area_struct *vma;
  368. u32 tid;
  369. unsigned long prev_vm_start;
  370. unsigned long prev_vm_end;
  371. };
  372. enum bpf_task_vma_iter_find_op {
  373. task_vma_iter_first_vma, /* use find_vma() with addr 0 */
  374. task_vma_iter_next_vma, /* use vma_next() with curr_vma */
  375. task_vma_iter_find_vma, /* use find_vma() to find next vma */
  376. };
  377. static struct vm_area_struct *
  378. task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
  379. {
  380. enum bpf_task_vma_iter_find_op op;
  381. struct vm_area_struct *curr_vma;
  382. struct task_struct *curr_task;
  383. struct mm_struct *curr_mm;
  384. u32 saved_tid = info->tid;
  385. /* If this function returns a non-NULL vma, it holds a reference to
  386. * the task_struct, holds a refcount on mm->mm_users, and holds
  387. * read lock on vma->mm->mmap_lock.
  388. * If this function returns NULL, it does not hold any reference or
  389. * lock.
  390. */
  391. if (info->task) {
  392. curr_task = info->task;
  393. curr_vma = info->vma;
  394. curr_mm = info->mm;
  395. /* In case of lock contention, drop mmap_lock to unblock
  396. * the writer.
  397. *
  398. * After relock, call find(mm, prev_vm_end - 1) to find
  399. * new vma to process.
  400. *
  401. * +------+------+-----------+
  402. * | VMA1 | VMA2 | VMA3 |
  403. * +------+------+-----------+
  404. * | | | |
  405. * 4k 8k 16k 400k
  406. *
  407. * For example, curr_vma == VMA2. Before unlock, we set
  408. *
  409. * prev_vm_start = 8k
  410. * prev_vm_end = 16k
  411. *
  412. * There are a few cases:
  413. *
  414. * 1) VMA2 is freed, but VMA3 exists.
  415. *
  416. * find_vma() will return VMA3, just process VMA3.
  417. *
  418. * 2) VMA2 still exists.
  419. *
  420. * find_vma() will return VMA2, process VMA2->next.
  421. *
  422. * 3) no more vma in this mm.
  423. *
  424. * Process the next task.
  425. *
  426. * 4) find_vma() returns a different vma, VMA2'.
  427. *
  428. * 4.1) If VMA2 covers same range as VMA2', skip VMA2',
  429. * because we already covered the range;
  430. * 4.2) VMA2 and VMA2' covers different ranges, process
  431. * VMA2'.
  432. */
  433. if (mmap_lock_is_contended(curr_mm)) {
  434. info->prev_vm_start = curr_vma->vm_start;
  435. info->prev_vm_end = curr_vma->vm_end;
  436. op = task_vma_iter_find_vma;
  437. mmap_read_unlock(curr_mm);
  438. if (mmap_read_lock_killable(curr_mm)) {
  439. mmput(curr_mm);
  440. goto finish;
  441. }
  442. } else {
  443. op = task_vma_iter_next_vma;
  444. }
  445. } else {
  446. again:
  447. curr_task = task_seq_get_next(&info->common, &info->tid, true);
  448. if (!curr_task) {
  449. info->tid++;
  450. goto finish;
  451. }
  452. if (saved_tid != info->tid) {
  453. /* new task, process the first vma */
  454. op = task_vma_iter_first_vma;
  455. } else {
  456. /* Found the same tid, which means the user space
  457. * finished data in previous buffer and read more.
  458. * We dropped mmap_lock before returning to user
  459. * space, so it is necessary to use find_vma() to
  460. * find the next vma to process.
  461. */
  462. op = task_vma_iter_find_vma;
  463. }
  464. curr_mm = get_task_mm(curr_task);
  465. if (!curr_mm)
  466. goto next_task;
  467. if (mmap_read_lock_killable(curr_mm)) {
  468. mmput(curr_mm);
  469. goto finish;
  470. }
  471. }
  472. switch (op) {
  473. case task_vma_iter_first_vma:
  474. curr_vma = find_vma(curr_mm, 0);
  475. break;
  476. case task_vma_iter_next_vma:
  477. curr_vma = find_vma(curr_mm, curr_vma->vm_end);
  478. break;
  479. case task_vma_iter_find_vma:
  480. /* We dropped mmap_lock so it is necessary to use find_vma
  481. * to find the next vma. This is similar to the mechanism
  482. * in show_smaps_rollup().
  483. */
  484. curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
  485. /* case 1) and 4.2) above just use curr_vma */
  486. /* check for case 2) or case 4.1) above */
  487. if (curr_vma &&
  488. curr_vma->vm_start == info->prev_vm_start &&
  489. curr_vma->vm_end == info->prev_vm_end)
  490. curr_vma = find_vma(curr_mm, curr_vma->vm_end);
  491. break;
  492. }
  493. if (!curr_vma) {
  494. /* case 3) above, or case 2) 4.1) with vma->next == NULL */
  495. mmap_read_unlock(curr_mm);
  496. mmput(curr_mm);
  497. goto next_task;
  498. }
  499. info->task = curr_task;
  500. info->vma = curr_vma;
  501. info->mm = curr_mm;
  502. return curr_vma;
  503. next_task:
  504. if (info->common.type == BPF_TASK_ITER_TID)
  505. goto finish;
  506. put_task_struct(curr_task);
  507. info->task = NULL;
  508. info->mm = NULL;
  509. info->tid++;
  510. goto again;
  511. finish:
  512. if (curr_task)
  513. put_task_struct(curr_task);
  514. info->task = NULL;
  515. info->vma = NULL;
  516. info->mm = NULL;
  517. return NULL;
  518. }
  519. static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
  520. {
  521. struct bpf_iter_seq_task_vma_info *info = seq->private;
  522. struct vm_area_struct *vma;
  523. vma = task_vma_seq_get_next(info);
  524. if (vma && *pos == 0)
  525. ++*pos;
  526. return vma;
  527. }
  528. static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  529. {
  530. struct bpf_iter_seq_task_vma_info *info = seq->private;
  531. ++*pos;
  532. return task_vma_seq_get_next(info);
  533. }
  534. struct bpf_iter__task_vma {
  535. __bpf_md_ptr(struct bpf_iter_meta *, meta);
  536. __bpf_md_ptr(struct task_struct *, task);
  537. __bpf_md_ptr(struct vm_area_struct *, vma);
  538. };
  539. DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
  540. struct task_struct *task, struct vm_area_struct *vma)
  541. static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
  542. {
  543. struct bpf_iter_seq_task_vma_info *info = seq->private;
  544. struct bpf_iter__task_vma ctx;
  545. struct bpf_iter_meta meta;
  546. struct bpf_prog *prog;
  547. meta.seq = seq;
  548. prog = bpf_iter_get_info(&meta, in_stop);
  549. if (!prog)
  550. return 0;
  551. ctx.meta = &meta;
  552. ctx.task = info->task;
  553. ctx.vma = info->vma;
  554. return bpf_iter_run_prog(prog, &ctx);
  555. }
  556. static int task_vma_seq_show(struct seq_file *seq, void *v)
  557. {
  558. return __task_vma_seq_show(seq, false);
  559. }
  560. static void task_vma_seq_stop(struct seq_file *seq, void *v)
  561. {
  562. struct bpf_iter_seq_task_vma_info *info = seq->private;
  563. if (!v) {
  564. (void)__task_vma_seq_show(seq, true);
  565. } else {
  566. /* info->vma has not been seen by the BPF program. If the
  567. * user space reads more, task_vma_seq_get_next should
  568. * return this vma again. Set prev_vm_start to ~0UL,
  569. * so that we don't skip the vma returned by the next
  570. * find_vma() (case task_vma_iter_find_vma in
  571. * task_vma_seq_get_next()).
  572. */
  573. info->prev_vm_start = ~0UL;
  574. info->prev_vm_end = info->vma->vm_end;
  575. mmap_read_unlock(info->mm);
  576. mmput(info->mm);
  577. info->mm = NULL;
  578. put_task_struct(info->task);
  579. info->task = NULL;
  580. }
  581. }
  582. static const struct seq_operations task_vma_seq_ops = {
  583. .start = task_vma_seq_start,
  584. .next = task_vma_seq_next,
  585. .stop = task_vma_seq_stop,
  586. .show = task_vma_seq_show,
  587. };
  588. static const struct bpf_iter_seq_info task_seq_info = {
  589. .seq_ops = &task_seq_ops,
  590. .init_seq_private = init_seq_pidns,
  591. .fini_seq_private = fini_seq_pidns,
  592. .seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
  593. };
  594. static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info)
  595. {
  596. switch (aux->task.type) {
  597. case BPF_TASK_ITER_TID:
  598. info->iter.task.tid = aux->task.pid;
  599. break;
  600. case BPF_TASK_ITER_TGID:
  601. info->iter.task.pid = aux->task.pid;
  602. break;
  603. default:
  604. break;
  605. }
  606. return 0;
  607. }
  608. static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq)
  609. {
  610. seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]);
  611. if (aux->task.type == BPF_TASK_ITER_TID)
  612. seq_printf(seq, "tid:\t%u\n", aux->task.pid);
  613. else if (aux->task.type == BPF_TASK_ITER_TGID)
  614. seq_printf(seq, "pid:\t%u\n", aux->task.pid);
  615. }
  616. static struct bpf_iter_reg task_reg_info = {
  617. .target = "task",
  618. .attach_target = bpf_iter_attach_task,
  619. .feature = BPF_ITER_RESCHED,
  620. .ctx_arg_info_size = 1,
  621. .ctx_arg_info = {
  622. { offsetof(struct bpf_iter__task, task),
  623. PTR_TO_BTF_ID_OR_NULL },
  624. },
  625. .seq_info = &task_seq_info,
  626. .fill_link_info = bpf_iter_fill_link_info,
  627. .show_fdinfo = bpf_iter_task_show_fdinfo,
  628. };
  629. static const struct bpf_iter_seq_info task_file_seq_info = {
  630. .seq_ops = &task_file_seq_ops,
  631. .init_seq_private = init_seq_pidns,
  632. .fini_seq_private = fini_seq_pidns,
  633. .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
  634. };
  635. static struct bpf_iter_reg task_file_reg_info = {
  636. .target = "task_file",
  637. .attach_target = bpf_iter_attach_task,
  638. .feature = BPF_ITER_RESCHED,
  639. .ctx_arg_info_size = 2,
  640. .ctx_arg_info = {
  641. { offsetof(struct bpf_iter__task_file, task),
  642. PTR_TO_BTF_ID_OR_NULL },
  643. { offsetof(struct bpf_iter__task_file, file),
  644. PTR_TO_BTF_ID_OR_NULL },
  645. },
  646. .seq_info = &task_file_seq_info,
  647. .fill_link_info = bpf_iter_fill_link_info,
  648. .show_fdinfo = bpf_iter_task_show_fdinfo,
  649. };
  650. static const struct bpf_iter_seq_info task_vma_seq_info = {
  651. .seq_ops = &task_vma_seq_ops,
  652. .init_seq_private = init_seq_pidns,
  653. .fini_seq_private = fini_seq_pidns,
  654. .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info),
  655. };
  656. static struct bpf_iter_reg task_vma_reg_info = {
  657. .target = "task_vma",
  658. .attach_target = bpf_iter_attach_task,
  659. .feature = BPF_ITER_RESCHED,
  660. .ctx_arg_info_size = 2,
  661. .ctx_arg_info = {
  662. { offsetof(struct bpf_iter__task_vma, task),
  663. PTR_TO_BTF_ID_OR_NULL },
  664. { offsetof(struct bpf_iter__task_vma, vma),
  665. PTR_TO_BTF_ID_OR_NULL },
  666. },
  667. .seq_info = &task_vma_seq_info,
  668. .fill_link_info = bpf_iter_fill_link_info,
  669. .show_fdinfo = bpf_iter_task_show_fdinfo,
  670. };
  671. BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
  672. bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
  673. {
  674. struct mmap_unlock_irq_work *work = NULL;
  675. struct vm_area_struct *vma;
  676. bool irq_work_busy = false;
  677. struct mm_struct *mm;
  678. int ret = -ENOENT;
  679. if (flags)
  680. return -EINVAL;
  681. if (!task)
  682. return -ENOENT;
  683. mm = task->mm;
  684. if (!mm)
  685. return -ENOENT;
  686. irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
  687. if (irq_work_busy || !mmap_read_trylock(mm))
  688. return -EBUSY;
  689. vma = find_vma(mm, start);
  690. if (vma && vma->vm_start <= start && vma->vm_end > start) {
  691. callback_fn((u64)(long)task, (u64)(long)vma,
  692. (u64)(long)callback_ctx, 0, 0);
  693. ret = 0;
  694. }
  695. bpf_mmap_unlock_mm(work, mm);
  696. return ret;
  697. }
  698. const struct bpf_func_proto bpf_find_vma_proto = {
  699. .func = bpf_find_vma,
  700. .ret_type = RET_INTEGER,
  701. .arg1_type = ARG_PTR_TO_BTF_ID,
  702. .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
  703. .arg2_type = ARG_ANYTHING,
  704. .arg3_type = ARG_PTR_TO_FUNC,
  705. .arg4_type = ARG_PTR_TO_STACK_OR_NULL,
  706. .arg5_type = ARG_ANYTHING,
  707. };
  708. DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
  709. static void do_mmap_read_unlock(struct irq_work *entry)
  710. {
  711. struct mmap_unlock_irq_work *work;
  712. if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
  713. return;
  714. work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
  715. mmap_read_unlock_non_owner(work->mm);
  716. }
  717. static int __init task_iter_init(void)
  718. {
  719. struct mmap_unlock_irq_work *work;
  720. int ret, cpu;
  721. for_each_possible_cpu(cpu) {
  722. work = per_cpu_ptr(&mmap_unlock_work, cpu);
  723. init_irq_work(&work->irq_work, do_mmap_read_unlock);
  724. }
  725. task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
  726. ret = bpf_iter_reg_target(&task_reg_info);
  727. if (ret)
  728. return ret;
  729. task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
  730. task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
  731. ret = bpf_iter_reg_target(&task_file_reg_info);
  732. if (ret)
  733. return ret;
  734. task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
  735. task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
  736. return bpf_iter_reg_target(&task_vma_reg_info);
  737. }
  738. late_initcall(task_iter_init);