profiler.inc.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright (c) 2020 Facebook */
  3. #include <vmlinux.h>
  4. #include <bpf/bpf_core_read.h>
  5. #include <bpf/bpf_helpers.h>
  6. #include <bpf/bpf_tracing.h>
  7. #include "profiler.h"
  8. #ifndef NULL
  9. #define NULL 0
  10. #endif
  11. #define O_WRONLY 00000001
  12. #define O_RDWR 00000002
  13. #define O_DIRECTORY 00200000
  14. #define __O_TMPFILE 020000000
  15. #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
  16. #define MAX_ERRNO 4095
  17. #define S_IFMT 00170000
  18. #define S_IFSOCK 0140000
  19. #define S_IFLNK 0120000
  20. #define S_IFREG 0100000
  21. #define S_IFBLK 0060000
  22. #define S_IFDIR 0040000
  23. #define S_IFCHR 0020000
  24. #define S_IFIFO 0010000
  25. #define S_ISUID 0004000
  26. #define S_ISGID 0002000
  27. #define S_ISVTX 0001000
  28. #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
  29. #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
  30. #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
  31. #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
  32. #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
  33. #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
  34. #define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
  35. #define KILL_DATA_ARRAY_SIZE 8
  36. struct var_kill_data_arr_t {
  37. struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
  38. };
  39. union any_profiler_data_t {
  40. struct var_exec_data_t var_exec;
  41. struct var_kill_data_t var_kill;
  42. struct var_sysctl_data_t var_sysctl;
  43. struct var_filemod_data_t var_filemod;
  44. struct var_fork_data_t var_fork;
  45. struct var_kill_data_arr_t var_kill_data_arr;
  46. };
  47. volatile struct profiler_config_struct bpf_config = {};
  48. #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
  49. #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
  50. #define CGROUP_LOGIN_SESSION_INODE \
  51. (bpf_config.cgroup_login_session_inode)
  52. #define KILL_SIGNALS (bpf_config.kill_signals_mask)
  53. #define STALE_INFO (bpf_config.stale_info_secs)
  54. #define INODE_FILTER (bpf_config.inode_filter)
  55. #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
  56. #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
  57. struct kernfs_iattrs___52 {
  58. struct iattr ia_iattr;
  59. };
  60. struct kernfs_node___52 {
  61. union /* kernfs_node_id */ {
  62. struct {
  63. u32 ino;
  64. u32 generation;
  65. };
  66. u64 id;
  67. } id;
  68. };
  69. struct {
  70. __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  71. __uint(max_entries, 1);
  72. __type(key, u32);
  73. __type(value, union any_profiler_data_t);
  74. } data_heap SEC(".maps");
  75. struct {
  76. __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
  77. __uint(key_size, sizeof(int));
  78. __uint(value_size, sizeof(int));
  79. } events SEC(".maps");
  80. struct {
  81. __uint(type, BPF_MAP_TYPE_HASH);
  82. __uint(max_entries, KILL_DATA_ARRAY_SIZE);
  83. __type(key, u32);
  84. __type(value, struct var_kill_data_arr_t);
  85. } var_tpid_to_data SEC(".maps");
  86. struct {
  87. __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  88. __uint(max_entries, profiler_bpf_max_function_id);
  89. __type(key, u32);
  90. __type(value, struct bpf_func_stats_data);
  91. } bpf_func_stats SEC(".maps");
  92. struct {
  93. __uint(type, BPF_MAP_TYPE_HASH);
  94. __type(key, u32);
  95. __type(value, bool);
  96. __uint(max_entries, 16);
  97. } allowed_devices SEC(".maps");
  98. struct {
  99. __uint(type, BPF_MAP_TYPE_HASH);
  100. __type(key, u64);
  101. __type(value, bool);
  102. __uint(max_entries, 1024);
  103. } allowed_file_inodes SEC(".maps");
  104. struct {
  105. __uint(type, BPF_MAP_TYPE_HASH);
  106. __type(key, u64);
  107. __type(value, bool);
  108. __uint(max_entries, 1024);
  109. } allowed_directory_inodes SEC(".maps");
  110. struct {
  111. __uint(type, BPF_MAP_TYPE_HASH);
  112. __type(key, u32);
  113. __type(value, bool);
  114. __uint(max_entries, 16);
  115. } disallowed_exec_inodes SEC(".maps");
  116. #ifndef ARRAY_SIZE
  117. #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
  118. #endif
  119. static INLINE bool IS_ERR(const void* ptr)
  120. {
  121. return IS_ERR_VALUE((unsigned long)ptr);
  122. }
  123. static INLINE u32 get_userspace_pid()
  124. {
  125. return bpf_get_current_pid_tgid() >> 32;
  126. }
  127. static INLINE bool is_init_process(u32 tgid)
  128. {
  129. return tgid == 1 || tgid == 0;
  130. }
  131. static INLINE unsigned long
  132. probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
  133. {
  134. len = len < max ? len : max;
  135. if (len > 1) {
  136. if (bpf_probe_read(dst, len, src))
  137. return 0;
  138. } else if (len == 1) {
  139. if (bpf_probe_read(dst, 1, src))
  140. return 0;
  141. }
  142. return len;
  143. }
  144. static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
  145. int spid)
  146. {
  147. #ifdef UNROLL
  148. #pragma unroll
  149. #endif
  150. for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
  151. if (arr_struct->array[i].meta.pid == spid)
  152. return i;
  153. return -1;
  154. }
  155. static INLINE void populate_ancestors(struct task_struct* task,
  156. struct ancestors_data_t* ancestors_data)
  157. {
  158. struct task_struct* parent = task;
  159. u32 num_ancestors, ppid;
  160. ancestors_data->num_ancestors = 0;
  161. #ifdef UNROLL
  162. #pragma unroll
  163. #endif
  164. for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
  165. parent = BPF_CORE_READ(parent, real_parent);
  166. if (parent == NULL)
  167. break;
  168. ppid = BPF_CORE_READ(parent, tgid);
  169. if (is_init_process(ppid))
  170. break;
  171. ancestors_data->ancestor_pids[num_ancestors] = ppid;
  172. ancestors_data->ancestor_exec_ids[num_ancestors] =
  173. BPF_CORE_READ(parent, self_exec_id);
  174. ancestors_data->ancestor_start_times[num_ancestors] =
  175. BPF_CORE_READ(parent, start_time);
  176. ancestors_data->num_ancestors = num_ancestors;
  177. }
  178. }
  179. static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
  180. struct kernfs_node* cgroup_root_node,
  181. void* payload,
  182. int* root_pos)
  183. {
  184. void* payload_start = payload;
  185. size_t filepart_length;
  186. #ifdef UNROLL
  187. #pragma unroll
  188. #endif
  189. for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
  190. filepart_length =
  191. bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
  192. if (!cgroup_node)
  193. return payload;
  194. if (cgroup_node == cgroup_root_node)
  195. *root_pos = payload - payload_start;
  196. if (filepart_length <= MAX_PATH) {
  197. barrier_var(filepart_length);
  198. payload += filepart_length;
  199. }
  200. cgroup_node = BPF_CORE_READ(cgroup_node, parent);
  201. }
  202. return payload;
  203. }
  204. static ino_t get_inode_from_kernfs(struct kernfs_node* node)
  205. {
  206. struct kernfs_node___52* node52 = (void*)node;
  207. if (bpf_core_field_exists(node52->id.ino)) {
  208. barrier_var(node52);
  209. return BPF_CORE_READ(node52, id.ino);
  210. } else {
  211. barrier_var(node);
  212. return (u64)BPF_CORE_READ(node, id);
  213. }
  214. }
  215. extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
  216. enum cgroup_subsys_id___local {
  217. pids_cgrp_id___local = 123, /* value doesn't matter */
  218. };
  219. static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
  220. struct task_struct* task,
  221. void* payload)
  222. {
  223. struct kernfs_node* root_kernfs =
  224. BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
  225. struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
  226. #if __has_builtin(__builtin_preserve_enum_value)
  227. if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
  228. int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
  229. pids_cgrp_id___local);
  230. #ifdef UNROLL
  231. #pragma unroll
  232. #endif
  233. for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
  234. struct cgroup_subsys_state* subsys =
  235. BPF_CORE_READ(task, cgroups, subsys[i]);
  236. if (subsys != NULL) {
  237. int subsys_id = BPF_CORE_READ(subsys, ss, id);
  238. if (subsys_id == cgrp_id) {
  239. proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
  240. root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
  241. break;
  242. }
  243. }
  244. }
  245. }
  246. #endif
  247. cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
  248. cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
  249. if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
  250. cgroup_data->cgroup_root_mtime =
  251. BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
  252. cgroup_data->cgroup_proc_mtime =
  253. BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
  254. } else {
  255. struct kernfs_iattrs___52* root_iattr =
  256. (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
  257. cgroup_data->cgroup_root_mtime =
  258. BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
  259. struct kernfs_iattrs___52* proc_iattr =
  260. (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
  261. cgroup_data->cgroup_proc_mtime =
  262. BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
  263. }
  264. cgroup_data->cgroup_root_length = 0;
  265. cgroup_data->cgroup_proc_length = 0;
  266. cgroup_data->cgroup_full_length = 0;
  267. size_t cgroup_root_length =
  268. bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
  269. barrier_var(cgroup_root_length);
  270. if (cgroup_root_length <= MAX_PATH) {
  271. barrier_var(cgroup_root_length);
  272. cgroup_data->cgroup_root_length = cgroup_root_length;
  273. payload += cgroup_root_length;
  274. }
  275. size_t cgroup_proc_length =
  276. bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
  277. barrier_var(cgroup_proc_length);
  278. if (cgroup_proc_length <= MAX_PATH) {
  279. barrier_var(cgroup_proc_length);
  280. cgroup_data->cgroup_proc_length = cgroup_proc_length;
  281. payload += cgroup_proc_length;
  282. }
  283. if (FETCH_CGROUPS_FROM_BPF) {
  284. cgroup_data->cgroup_full_path_root_pos = -1;
  285. void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
  286. &cgroup_data->cgroup_full_path_root_pos);
  287. cgroup_data->cgroup_full_length = payload_end_pos - payload;
  288. payload = payload_end_pos;
  289. }
  290. return (void*)payload;
  291. }
  292. static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
  293. struct task_struct* task,
  294. u32 pid, void* payload)
  295. {
  296. u64 uid_gid = bpf_get_current_uid_gid();
  297. metadata->uid = (u32)uid_gid;
  298. metadata->gid = uid_gid >> 32;
  299. metadata->pid = pid;
  300. metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
  301. metadata->start_time = BPF_CORE_READ(task, start_time);
  302. metadata->comm_length = 0;
  303. size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
  304. barrier_var(comm_length);
  305. if (comm_length <= TASK_COMM_LEN) {
  306. barrier_var(comm_length);
  307. metadata->comm_length = comm_length;
  308. payload += comm_length;
  309. }
  310. return (void*)payload;
  311. }
  312. static INLINE struct var_kill_data_t*
  313. get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
  314. {
  315. int zero = 0;
  316. struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
  317. if (kill_data == NULL)
  318. return NULL;
  319. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  320. void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
  321. payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
  322. size_t payload_length = payload - (void*)kill_data->payload;
  323. kill_data->payload_length = payload_length;
  324. populate_ancestors(task, &kill_data->ancestors_info);
  325. kill_data->meta.type = KILL_EVENT;
  326. kill_data->kill_target_pid = tpid;
  327. kill_data->kill_sig = sig;
  328. kill_data->kill_count = 1;
  329. kill_data->last_kill_time = bpf_ktime_get_ns();
  330. return kill_data;
  331. }
  332. static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
  333. {
  334. if ((KILL_SIGNALS & (1ULL << sig)) == 0)
  335. return 0;
  336. u32 spid = get_userspace_pid();
  337. struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
  338. if (arr_struct == NULL) {
  339. struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
  340. int zero = 0;
  341. if (kill_data == NULL)
  342. return 0;
  343. arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
  344. if (arr_struct == NULL)
  345. return 0;
  346. bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
  347. } else {
  348. int index = get_var_spid_index(arr_struct, spid);
  349. if (index == -1) {
  350. struct var_kill_data_t* kill_data =
  351. get_var_kill_data(ctx, spid, tpid, sig);
  352. if (kill_data == NULL)
  353. return 0;
  354. #ifdef UNROLL
  355. #pragma unroll
  356. #endif
  357. for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
  358. if (arr_struct->array[i].meta.pid == 0) {
  359. bpf_probe_read(&arr_struct->array[i],
  360. sizeof(arr_struct->array[i]), kill_data);
  361. bpf_map_update_elem(&var_tpid_to_data, &tpid,
  362. arr_struct, 0);
  363. return 0;
  364. }
  365. return 0;
  366. }
  367. struct var_kill_data_t* kill_data = &arr_struct->array[index];
  368. u64 delta_sec =
  369. (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
  370. if (delta_sec < STALE_INFO) {
  371. kill_data->kill_count++;
  372. kill_data->last_kill_time = bpf_ktime_get_ns();
  373. bpf_probe_read(&arr_struct->array[index],
  374. sizeof(arr_struct->array[index]),
  375. kill_data);
  376. } else {
  377. struct var_kill_data_t* kill_data =
  378. get_var_kill_data(ctx, spid, tpid, sig);
  379. if (kill_data == NULL)
  380. return 0;
  381. bpf_probe_read(&arr_struct->array[index],
  382. sizeof(arr_struct->array[index]),
  383. kill_data);
  384. }
  385. }
  386. bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
  387. return 0;
  388. }
  389. static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
  390. enum bpf_function_id func_id)
  391. {
  392. int func_id_key = func_id;
  393. bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
  394. bpf_stat_ctx->bpf_func_stats_data_val =
  395. bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
  396. if (bpf_stat_ctx->bpf_func_stats_data_val)
  397. bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
  398. }
  399. static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
  400. {
  401. if (bpf_stat_ctx->bpf_func_stats_data_val)
  402. bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
  403. bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
  404. }
  405. static INLINE void
  406. bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
  407. struct var_metadata_t* meta)
  408. {
  409. if (bpf_stat_ctx->bpf_func_stats_data_val) {
  410. bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
  411. meta->bpf_stats_num_perf_events =
  412. bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
  413. }
  414. meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
  415. meta->cpu_id = bpf_get_smp_processor_id();
  416. }
  417. static INLINE size_t
  418. read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
  419. {
  420. size_t length = 0;
  421. size_t filepart_length;
  422. struct dentry* parent_dentry;
  423. #ifdef UNROLL
  424. #pragma unroll
  425. #endif
  426. for (int i = 0; i < MAX_PATH_DEPTH; i++) {
  427. filepart_length = bpf_probe_read_str(payload, MAX_PATH,
  428. BPF_CORE_READ(filp_dentry, d_name.name));
  429. barrier_var(filepart_length);
  430. if (filepart_length > MAX_PATH)
  431. break;
  432. barrier_var(filepart_length);
  433. payload += filepart_length;
  434. length += filepart_length;
  435. parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
  436. if (filp_dentry == parent_dentry)
  437. break;
  438. filp_dentry = parent_dentry;
  439. }
  440. return length;
  441. }
  442. static INLINE bool
  443. is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
  444. {
  445. struct dentry* parent_dentry;
  446. #ifdef UNROLL
  447. #pragma unroll
  448. #endif
  449. for (int i = 0; i < MAX_PATH_DEPTH; i++) {
  450. u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
  451. bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
  452. if (allowed_dir != NULL)
  453. return true;
  454. parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
  455. if (filp_dentry == parent_dentry)
  456. break;
  457. filp_dentry = parent_dentry;
  458. }
  459. return false;
  460. }
  461. static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
  462. u32* device_id,
  463. u64* file_ino)
  464. {
  465. u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
  466. *device_id = dev_id;
  467. bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
  468. if (allowed_device == NULL)
  469. return false;
  470. u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
  471. *file_ino = ino;
  472. bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
  473. if (allowed_file == NULL)
  474. if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
  475. return false;
  476. return true;
  477. }
  478. SEC("kprobe/proc_sys_write")
  479. ssize_t BPF_KPROBE(kprobe__proc_sys_write,
  480. struct file* filp, const char* buf,
  481. size_t count, loff_t* ppos)
  482. {
  483. struct bpf_func_stats_ctx stats_ctx;
  484. bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
  485. u32 pid = get_userspace_pid();
  486. int zero = 0;
  487. struct var_sysctl_data_t* sysctl_data =
  488. bpf_map_lookup_elem(&data_heap, &zero);
  489. if (!sysctl_data)
  490. goto out;
  491. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  492. sysctl_data->meta.type = SYSCTL_EVENT;
  493. void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
  494. payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
  495. populate_ancestors(task, &sysctl_data->ancestors_info);
  496. sysctl_data->sysctl_val_length = 0;
  497. sysctl_data->sysctl_path_length = 0;
  498. size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
  499. barrier_var(sysctl_val_length);
  500. if (sysctl_val_length <= CTL_MAXNAME) {
  501. barrier_var(sysctl_val_length);
  502. sysctl_data->sysctl_val_length = sysctl_val_length;
  503. payload += sysctl_val_length;
  504. }
  505. size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
  506. BPF_CORE_READ(filp, f_path.dentry, d_name.name));
  507. barrier_var(sysctl_path_length);
  508. if (sysctl_path_length <= MAX_PATH) {
  509. barrier_var(sysctl_path_length);
  510. sysctl_data->sysctl_path_length = sysctl_path_length;
  511. payload += sysctl_path_length;
  512. }
  513. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
  514. unsigned long data_len = payload - (void*)sysctl_data;
  515. data_len = data_len > sizeof(struct var_sysctl_data_t)
  516. ? sizeof(struct var_sysctl_data_t)
  517. : data_len;
  518. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
  519. out:
  520. bpf_stats_exit(&stats_ctx);
  521. return 0;
  522. }
  523. SEC("tracepoint/syscalls/sys_enter_kill")
  524. int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
  525. {
  526. struct bpf_func_stats_ctx stats_ctx;
  527. bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
  528. int pid = ctx->args[0];
  529. int sig = ctx->args[1];
  530. int ret = trace_var_sys_kill(ctx, pid, sig);
  531. bpf_stats_exit(&stats_ctx);
  532. return ret;
  533. };
  534. SEC("raw_tracepoint/sched_process_exit")
  535. int raw_tracepoint__sched_process_exit(void* ctx)
  536. {
  537. int zero = 0;
  538. struct bpf_func_stats_ctx stats_ctx;
  539. bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
  540. u32 tpid = get_userspace_pid();
  541. struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
  542. struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
  543. if (arr_struct == NULL || kill_data == NULL)
  544. goto out;
  545. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  546. struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
  547. #ifdef UNROLL
  548. #pragma unroll
  549. #endif
  550. for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
  551. struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
  552. if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
  553. bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
  554. void* payload = kill_data->payload;
  555. size_t offset = kill_data->payload_length;
  556. if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
  557. return 0;
  558. payload += offset;
  559. kill_data->kill_target_name_length = 0;
  560. kill_data->kill_target_cgroup_proc_length = 0;
  561. size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
  562. barrier_var(comm_length);
  563. if (comm_length <= TASK_COMM_LEN) {
  564. barrier_var(comm_length);
  565. kill_data->kill_target_name_length = comm_length;
  566. payload += comm_length;
  567. }
  568. size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
  569. BPF_CORE_READ(proc_kernfs, name));
  570. barrier_var(cgroup_proc_length);
  571. if (cgroup_proc_length <= KILL_TARGET_LEN) {
  572. barrier_var(cgroup_proc_length);
  573. kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
  574. payload += cgroup_proc_length;
  575. }
  576. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
  577. unsigned long data_len = (void*)payload - (void*)kill_data;
  578. data_len = data_len > sizeof(struct var_kill_data_t)
  579. ? sizeof(struct var_kill_data_t)
  580. : data_len;
  581. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
  582. }
  583. }
  584. bpf_map_delete_elem(&var_tpid_to_data, &tpid);
  585. out:
  586. bpf_stats_exit(&stats_ctx);
  587. return 0;
  588. }
  589. SEC("raw_tracepoint/sched_process_exec")
  590. int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
  591. {
  592. struct bpf_func_stats_ctx stats_ctx;
  593. bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
  594. struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
  595. u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
  596. bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
  597. if (should_filter_binprm != NULL)
  598. goto out;
  599. int zero = 0;
  600. struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
  601. if (!proc_exec_data)
  602. goto out;
  603. if (INODE_FILTER && inode != INODE_FILTER)
  604. return 0;
  605. u32 pid = get_userspace_pid();
  606. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  607. proc_exec_data->meta.type = EXEC_EVENT;
  608. proc_exec_data->bin_path_length = 0;
  609. proc_exec_data->cmdline_length = 0;
  610. proc_exec_data->environment_length = 0;
  611. void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
  612. proc_exec_data->payload);
  613. payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
  614. struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
  615. proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
  616. proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
  617. proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
  618. proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
  619. const char* filename = BPF_CORE_READ(bprm, filename);
  620. size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
  621. barrier_var(bin_path_length);
  622. if (bin_path_length <= MAX_FILENAME_LEN) {
  623. barrier_var(bin_path_length);
  624. proc_exec_data->bin_path_length = bin_path_length;
  625. payload += bin_path_length;
  626. }
  627. void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
  628. void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
  629. unsigned int cmdline_length = probe_read_lim(payload, arg_start,
  630. arg_end - arg_start, MAX_ARGS_LEN);
  631. if (cmdline_length <= MAX_ARGS_LEN) {
  632. barrier_var(cmdline_length);
  633. proc_exec_data->cmdline_length = cmdline_length;
  634. payload += cmdline_length;
  635. }
  636. if (READ_ENVIRON_FROM_EXEC) {
  637. void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
  638. void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
  639. unsigned long env_len = probe_read_lim(payload, env_start,
  640. env_end - env_start, MAX_ENVIRON_LEN);
  641. if (cmdline_length <= MAX_ENVIRON_LEN) {
  642. proc_exec_data->environment_length = env_len;
  643. payload += env_len;
  644. }
  645. }
  646. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
  647. unsigned long data_len = payload - (void*)proc_exec_data;
  648. data_len = data_len > sizeof(struct var_exec_data_t)
  649. ? sizeof(struct var_exec_data_t)
  650. : data_len;
  651. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
  652. out:
  653. bpf_stats_exit(&stats_ctx);
  654. return 0;
  655. }
  656. SEC("kretprobe/do_filp_open")
  657. int kprobe_ret__do_filp_open(struct pt_regs* ctx)
  658. {
  659. struct bpf_func_stats_ctx stats_ctx;
  660. bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
  661. struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
  662. if (filp == NULL || IS_ERR(filp))
  663. goto out;
  664. unsigned int flags = BPF_CORE_READ(filp, f_flags);
  665. if ((flags & (O_RDWR | O_WRONLY)) == 0)
  666. goto out;
  667. if ((flags & O_TMPFILE) > 0)
  668. goto out;
  669. struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
  670. umode_t mode = BPF_CORE_READ(file_inode, i_mode);
  671. if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
  672. S_ISSOCK(mode))
  673. goto out;
  674. struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
  675. u32 device_id = 0;
  676. u64 file_ino = 0;
  677. if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
  678. goto out;
  679. int zero = 0;
  680. struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
  681. if (!filemod_data)
  682. goto out;
  683. u32 pid = get_userspace_pid();
  684. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  685. filemod_data->meta.type = FILEMOD_EVENT;
  686. filemod_data->fmod_type = FMOD_OPEN;
  687. filemod_data->dst_flags = flags;
  688. filemod_data->src_inode = 0;
  689. filemod_data->dst_inode = file_ino;
  690. filemod_data->src_device_id = 0;
  691. filemod_data->dst_device_id = device_id;
  692. filemod_data->src_filepath_length = 0;
  693. filemod_data->dst_filepath_length = 0;
  694. void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
  695. filemod_data->payload);
  696. payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
  697. size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
  698. barrier_var(len);
  699. if (len <= MAX_FILEPATH_LENGTH) {
  700. barrier_var(len);
  701. payload += len;
  702. filemod_data->dst_filepath_length = len;
  703. }
  704. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
  705. unsigned long data_len = payload - (void*)filemod_data;
  706. data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
  707. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
  708. out:
  709. bpf_stats_exit(&stats_ctx);
  710. return 0;
  711. }
  712. SEC("kprobe/vfs_link")
  713. int BPF_KPROBE(kprobe__vfs_link,
  714. struct dentry* old_dentry, struct user_namespace *mnt_userns,
  715. struct inode* dir, struct dentry* new_dentry,
  716. struct inode** delegated_inode)
  717. {
  718. struct bpf_func_stats_ctx stats_ctx;
  719. bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
  720. u32 src_device_id = 0;
  721. u64 src_file_ino = 0;
  722. u32 dst_device_id = 0;
  723. u64 dst_file_ino = 0;
  724. if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
  725. !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
  726. goto out;
  727. int zero = 0;
  728. struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
  729. if (!filemod_data)
  730. goto out;
  731. u32 pid = get_userspace_pid();
  732. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  733. filemod_data->meta.type = FILEMOD_EVENT;
  734. filemod_data->fmod_type = FMOD_LINK;
  735. filemod_data->dst_flags = 0;
  736. filemod_data->src_inode = src_file_ino;
  737. filemod_data->dst_inode = dst_file_ino;
  738. filemod_data->src_device_id = src_device_id;
  739. filemod_data->dst_device_id = dst_device_id;
  740. filemod_data->src_filepath_length = 0;
  741. filemod_data->dst_filepath_length = 0;
  742. void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
  743. filemod_data->payload);
  744. payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
  745. size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
  746. barrier_var(len);
  747. if (len <= MAX_FILEPATH_LENGTH) {
  748. barrier_var(len);
  749. payload += len;
  750. filemod_data->src_filepath_length = len;
  751. }
  752. len = read_absolute_file_path_from_dentry(new_dentry, payload);
  753. barrier_var(len);
  754. if (len <= MAX_FILEPATH_LENGTH) {
  755. barrier_var(len);
  756. payload += len;
  757. filemod_data->dst_filepath_length = len;
  758. }
  759. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
  760. unsigned long data_len = payload - (void*)filemod_data;
  761. data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
  762. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
  763. out:
  764. bpf_stats_exit(&stats_ctx);
  765. return 0;
  766. }
  767. SEC("kprobe/vfs_symlink")
  768. int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
  769. const char* oldname)
  770. {
  771. struct bpf_func_stats_ctx stats_ctx;
  772. bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
  773. u32 dst_device_id = 0;
  774. u64 dst_file_ino = 0;
  775. if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
  776. goto out;
  777. int zero = 0;
  778. struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
  779. if (!filemod_data)
  780. goto out;
  781. u32 pid = get_userspace_pid();
  782. struct task_struct* task = (struct task_struct*)bpf_get_current_task();
  783. filemod_data->meta.type = FILEMOD_EVENT;
  784. filemod_data->fmod_type = FMOD_SYMLINK;
  785. filemod_data->dst_flags = 0;
  786. filemod_data->src_inode = 0;
  787. filemod_data->dst_inode = dst_file_ino;
  788. filemod_data->src_device_id = 0;
  789. filemod_data->dst_device_id = dst_device_id;
  790. filemod_data->src_filepath_length = 0;
  791. filemod_data->dst_filepath_length = 0;
  792. void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
  793. filemod_data->payload);
  794. payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
  795. size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
  796. barrier_var(len);
  797. if (len <= MAX_FILEPATH_LENGTH) {
  798. barrier_var(len);
  799. payload += len;
  800. filemod_data->src_filepath_length = len;
  801. }
  802. len = read_absolute_file_path_from_dentry(dentry, payload);
  803. barrier_var(len);
  804. if (len <= MAX_FILEPATH_LENGTH) {
  805. barrier_var(len);
  806. payload += len;
  807. filemod_data->dst_filepath_length = len;
  808. }
  809. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
  810. unsigned long data_len = payload - (void*)filemod_data;
  811. data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
  812. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
  813. out:
  814. bpf_stats_exit(&stats_ctx);
  815. return 0;
  816. }
  817. SEC("raw_tracepoint/sched_process_fork")
  818. int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
  819. {
  820. struct bpf_func_stats_ctx stats_ctx;
  821. bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
  822. int zero = 0;
  823. struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
  824. if (!fork_data)
  825. goto out;
  826. struct task_struct* parent = (struct task_struct*)ctx->args[0];
  827. struct task_struct* child = (struct task_struct*)ctx->args[1];
  828. fork_data->meta.type = FORK_EVENT;
  829. void* payload = populate_var_metadata(&fork_data->meta, child,
  830. BPF_CORE_READ(child, pid), fork_data->payload);
  831. fork_data->parent_pid = BPF_CORE_READ(parent, pid);
  832. fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
  833. fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
  834. bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
  835. unsigned long data_len = payload - (void*)fork_data;
  836. data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
  837. bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
  838. out:
  839. bpf_stats_exit(&stats_ctx);
  840. return 0;
  841. }
  842. char _license[] SEC("license") = "GPL";