offwaketime_kern.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /* Copyright (c) 2016 Facebook
  2. *
  3. * This program is free software; you can redistribute it and/or
  4. * modify it under the terms of version 2 of the GNU General Public
  5. * License as published by the Free Software Foundation.
  6. */
  7. #include <uapi/linux/bpf.h>
  8. #include <uapi/linux/ptrace.h>
  9. #include <uapi/linux/perf_event.h>
  10. #include <linux/version.h>
  11. #include <linux/sched.h>
  12. #include <bpf/bpf_helpers.h>
  13. #include <bpf/bpf_tracing.h>
  14. #define _(P) \
  15. ({ \
  16. typeof(P) val; \
  17. bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
  18. val; \
  19. })
  20. #define MINBLOCK_US 1
  21. #define MAX_ENTRIES 10000
  22. struct key_t {
  23. char waker[TASK_COMM_LEN];
  24. char target[TASK_COMM_LEN];
  25. u32 wret;
  26. u32 tret;
  27. };
  28. struct {
  29. __uint(type, BPF_MAP_TYPE_HASH);
  30. __type(key, struct key_t);
  31. __type(value, u64);
  32. __uint(max_entries, MAX_ENTRIES);
  33. } counts SEC(".maps");
  34. struct {
  35. __uint(type, BPF_MAP_TYPE_HASH);
  36. __type(key, u32);
  37. __type(value, u64);
  38. __uint(max_entries, MAX_ENTRIES);
  39. } start SEC(".maps");
  40. struct wokeby_t {
  41. char name[TASK_COMM_LEN];
  42. u32 ret;
  43. };
  44. struct {
  45. __uint(type, BPF_MAP_TYPE_HASH);
  46. __type(key, u32);
  47. __type(value, struct wokeby_t);
  48. __uint(max_entries, MAX_ENTRIES);
  49. } wokeby SEC(".maps");
  50. struct {
  51. __uint(type, BPF_MAP_TYPE_STACK_TRACE);
  52. __uint(key_size, sizeof(u32));
  53. __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
  54. __uint(max_entries, MAX_ENTRIES);
  55. } stackmap SEC(".maps");
  56. #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
  57. SEC("kprobe/try_to_wake_up")
  58. int waker(struct pt_regs *ctx)
  59. {
  60. struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
  61. struct wokeby_t woke;
  62. u32 pid;
  63. pid = _(p->pid);
  64. bpf_get_current_comm(&woke.name, sizeof(woke.name));
  65. woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  66. bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
  67. return 0;
  68. }
  69. static inline int update_counts(void *ctx, u32 pid, u64 delta)
  70. {
  71. struct wokeby_t *woke;
  72. u64 zero = 0, *val;
  73. struct key_t key;
  74. __builtin_memset(&key.waker, 0, sizeof(key.waker));
  75. bpf_get_current_comm(&key.target, sizeof(key.target));
  76. key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  77. key.wret = 0;
  78. woke = bpf_map_lookup_elem(&wokeby, &pid);
  79. if (woke) {
  80. key.wret = woke->ret;
  81. __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
  82. bpf_map_delete_elem(&wokeby, &pid);
  83. }
  84. val = bpf_map_lookup_elem(&counts, &key);
  85. if (!val) {
  86. bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
  87. val = bpf_map_lookup_elem(&counts, &key);
  88. if (!val)
  89. return 0;
  90. }
  91. (*val) += delta;
  92. return 0;
  93. }
  94. #if 1
  95. /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
  96. struct sched_switch_args {
  97. unsigned long long pad;
  98. char prev_comm[TASK_COMM_LEN];
  99. int prev_pid;
  100. int prev_prio;
  101. long long prev_state;
  102. char next_comm[TASK_COMM_LEN];
  103. int next_pid;
  104. int next_prio;
  105. };
  106. SEC("tracepoint/sched/sched_switch")
  107. int oncpu(struct sched_switch_args *ctx)
  108. {
  109. /* record previous thread sleep time */
  110. u32 pid = ctx->prev_pid;
  111. #else
  112. SEC("kprobe/finish_task_switch")
  113. int oncpu(struct pt_regs *ctx)
  114. {
  115. struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
  116. /* record previous thread sleep time */
  117. u32 pid = _(p->pid);
  118. #endif
  119. u64 delta, ts, *tsp;
  120. ts = bpf_ktime_get_ns();
  121. bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
  122. /* calculate current thread's delta time */
  123. pid = bpf_get_current_pid_tgid();
  124. tsp = bpf_map_lookup_elem(&start, &pid);
  125. if (!tsp)
  126. /* missed start or filtered */
  127. return 0;
  128. delta = bpf_ktime_get_ns() - *tsp;
  129. bpf_map_delete_elem(&start, &pid);
  130. delta = delta / 1000;
  131. if (delta < MINBLOCK_US)
  132. return 0;
  133. return update_counts(ctx, pid, delta);
  134. }
  135. char _license[] SEC("license") = "GPL";
  136. u32 _version SEC("version") = LINUX_VERSION_CODE;