stats.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _KERNEL_STATS_H
  3. #define _KERNEL_STATS_H
  4. #ifdef CONFIG_SCHEDSTATS
  5. extern struct static_key_false sched_schedstats;
  6. /*
  7. * Expects runqueue lock to be held for atomicity of update
  8. */
  9. static inline void
  10. rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
  11. {
  12. if (rq) {
  13. rq->rq_sched_info.run_delay += delta;
  14. rq->rq_sched_info.pcount++;
  15. }
  16. }
  17. /*
  18. * Expects runqueue lock to be held for atomicity of update
  19. */
  20. static inline void
  21. rq_sched_info_depart(struct rq *rq, unsigned long long delta)
  22. {
  23. if (rq)
  24. rq->rq_cpu_time += delta;
  25. }
  26. static inline void
  27. rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
  28. {
  29. if (rq)
  30. rq->rq_sched_info.run_delay += delta;
  31. }
  32. #define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
  33. #define __schedstat_inc(var) do { var++; } while (0)
  34. #define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
  35. #define __schedstat_add(var, amt) do { var += (amt); } while (0)
  36. #define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
  37. #define __schedstat_set(var, val) do { var = (val); } while (0)
  38. #define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
  39. #define schedstat_val(var) (var)
  40. #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
  41. void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
  42. struct sched_statistics *stats);
  43. void __update_stats_wait_end(struct rq *rq, struct task_struct *p,
  44. struct sched_statistics *stats);
  45. void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
  46. struct sched_statistics *stats);
  47. static inline void
  48. check_schedstat_required(void)
  49. {
  50. if (schedstat_enabled())
  51. return;
  52. /* Force schedstat enabled if a dependent tracepoint is active */
  53. if (trace_sched_stat_wait_enabled() ||
  54. trace_sched_stat_sleep_enabled() ||
  55. trace_sched_stat_iowait_enabled() ||
  56. trace_sched_stat_blocked_enabled() ||
  57. trace_sched_stat_runtime_enabled())
  58. printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
  59. }
  60. #else /* !CONFIG_SCHEDSTATS: */
  61. static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
  62. static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
  63. static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
  64. # define schedstat_enabled() 0
  65. # define __schedstat_inc(var) do { } while (0)
  66. # define schedstat_inc(var) do { } while (0)
  67. # define __schedstat_add(var, amt) do { } while (0)
  68. # define schedstat_add(var, amt) do { } while (0)
  69. # define __schedstat_set(var, val) do { } while (0)
  70. # define schedstat_set(var, val) do { } while (0)
  71. # define schedstat_val(var) 0
  72. # define schedstat_val_or_zero(var) 0
  73. # define __update_stats_wait_start(rq, p, stats) do { } while (0)
  74. # define __update_stats_wait_end(rq, p, stats) do { } while (0)
  75. # define __update_stats_enqueue_sleeper(rq, p, stats) do { } while (0)
  76. # define check_schedstat_required() do { } while (0)
  77. #endif /* CONFIG_SCHEDSTATS */
  78. #ifdef CONFIG_FAIR_GROUP_SCHED
  79. struct sched_entity_stats {
  80. struct sched_entity se;
  81. struct sched_statistics stats;
  82. } __no_randomize_layout;
  83. #endif
  84. static inline struct sched_statistics *
  85. __schedstats_from_se(struct sched_entity *se)
  86. {
  87. #ifdef CONFIG_FAIR_GROUP_SCHED
  88. if (!entity_is_task(se))
  89. return &container_of(se, struct sched_entity_stats, se)->stats;
  90. #endif
  91. return &task_of(se)->stats;
  92. }
  93. #ifdef CONFIG_PSI
  94. void psi_task_change(struct task_struct *task, int clear, int set);
  95. void psi_task_switch(struct task_struct *prev, struct task_struct *next,
  96. bool sleep);
  97. void psi_account_irqtime(struct task_struct *task, u32 delta);
  98. /*
  99. * PSI tracks state that persists across sleeps, such as iowaits and
  100. * memory stalls. As a result, it has to distinguish between sleeps,
  101. * where a task's runnable state changes, and requeues, where a task
  102. * and its state are being moved between CPUs and runqueues.
  103. */
  104. static inline void psi_enqueue(struct task_struct *p, bool wakeup)
  105. {
  106. int clear = 0, set = TSK_RUNNING;
  107. if (static_branch_likely(&psi_disabled))
  108. return;
  109. if (p->in_memstall)
  110. set |= TSK_MEMSTALL_RUNNING;
  111. if (!wakeup || p->sched_psi_wake_requeue) {
  112. if (p->in_memstall)
  113. set |= TSK_MEMSTALL;
  114. if (p->sched_psi_wake_requeue)
  115. p->sched_psi_wake_requeue = 0;
  116. } else {
  117. if (p->in_iowait)
  118. clear |= TSK_IOWAIT;
  119. }
  120. psi_task_change(p, clear, set);
  121. }
  122. static inline void psi_dequeue(struct task_struct *p, bool sleep)
  123. {
  124. int clear = TSK_RUNNING;
  125. if (static_branch_likely(&psi_disabled))
  126. return;
  127. /*
  128. * A voluntary sleep is a dequeue followed by a task switch. To
  129. * avoid walking all ancestors twice, psi_task_switch() handles
  130. * TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
  131. * Do nothing here.
  132. */
  133. if (sleep)
  134. return;
  135. if (p->in_memstall)
  136. clear |= (TSK_MEMSTALL | TSK_MEMSTALL_RUNNING);
  137. psi_task_change(p, clear, 0);
  138. }
  139. static inline void psi_ttwu_dequeue(struct task_struct *p)
  140. {
  141. if (static_branch_likely(&psi_disabled))
  142. return;
  143. /*
  144. * Is the task being migrated during a wakeup? Make sure to
  145. * deregister its sleep-persistent psi states from the old
  146. * queue, and let psi_enqueue() know it has to requeue.
  147. */
  148. if (unlikely(p->in_iowait || p->in_memstall)) {
  149. struct rq_flags rf;
  150. struct rq *rq;
  151. int clear = 0;
  152. if (p->in_iowait)
  153. clear |= TSK_IOWAIT;
  154. if (p->in_memstall)
  155. clear |= TSK_MEMSTALL;
  156. rq = __task_rq_lock(p, &rf);
  157. psi_task_change(p, clear, 0);
  158. p->sched_psi_wake_requeue = 1;
  159. __task_rq_unlock(rq, &rf);
  160. }
  161. }
  162. static inline void psi_sched_switch(struct task_struct *prev,
  163. struct task_struct *next,
  164. bool sleep)
  165. {
  166. if (static_branch_likely(&psi_disabled))
  167. return;
  168. psi_task_switch(prev, next, sleep);
  169. }
  170. #else /* CONFIG_PSI */
  171. static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
  172. static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
  173. static inline void psi_ttwu_dequeue(struct task_struct *p) {}
  174. static inline void psi_sched_switch(struct task_struct *prev,
  175. struct task_struct *next,
  176. bool sleep) {}
  177. static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
  178. #endif /* CONFIG_PSI */
  179. #ifdef CONFIG_SCHED_INFO
  180. /*
  181. * We are interested in knowing how long it was from the *first* time a
  182. * task was queued to the time that it finally hit a CPU, we call this routine
  183. * from dequeue_task() to account for possible rq->clock skew across CPUs. The
  184. * delta taken on each CPU would annul the skew.
  185. */
  186. static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
  187. {
  188. unsigned long long delta = 0;
  189. if (!t->sched_info.last_queued)
  190. return;
  191. delta = rq_clock(rq) - t->sched_info.last_queued;
  192. t->sched_info.last_queued = 0;
  193. t->sched_info.run_delay += delta;
  194. rq_sched_info_dequeue(rq, delta);
  195. }
  196. /*
  197. * Called when a task finally hits the CPU. We can now calculate how
  198. * long it was waiting to run. We also note when it began so that we
  199. * can keep stats on how long its timeslice is.
  200. */
  201. static void sched_info_arrive(struct rq *rq, struct task_struct *t)
  202. {
  203. unsigned long long now, delta = 0;
  204. if (!t->sched_info.last_queued)
  205. return;
  206. now = rq_clock(rq);
  207. delta = now - t->sched_info.last_queued;
  208. t->sched_info.last_queued = 0;
  209. t->sched_info.run_delay += delta;
  210. t->sched_info.last_arrival = now;
  211. t->sched_info.pcount++;
  212. rq_sched_info_arrive(rq, delta);
  213. }
  214. /*
  215. * This function is only called from enqueue_task(), but also only updates
  216. * the timestamp if it is already not set. It's assumed that
  217. * sched_info_dequeue() will clear that stamp when appropriate.
  218. */
  219. static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
  220. {
  221. if (!t->sched_info.last_queued)
  222. t->sched_info.last_queued = rq_clock(rq);
  223. }
  224. /*
  225. * Called when a process ceases being the active-running process involuntarily
  226. * due, typically, to expiring its time slice (this may also be called when
  227. * switching to the idle task). Now we can calculate how long we ran.
  228. * Also, if the process is still in the TASK_RUNNING state, call
  229. * sched_info_enqueue() to mark that it has now again started waiting on
  230. * the runqueue.
  231. */
  232. static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
  233. {
  234. unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
  235. rq_sched_info_depart(rq, delta);
  236. if (task_is_running(t))
  237. sched_info_enqueue(rq, t);
  238. }
  239. /*
  240. * Called when tasks are switched involuntarily due, typically, to expiring
  241. * their time slice. (This may also be called when switching to or from
  242. * the idle task.) We are only called when prev != next.
  243. */
  244. static inline void
  245. sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
  246. {
  247. /*
  248. * prev now departs the CPU. It's not interesting to record
  249. * stats about how efficient we were at scheduling the idle
  250. * process, however.
  251. */
  252. if (prev != rq->idle)
  253. sched_info_depart(rq, prev);
  254. if (next != rq->idle)
  255. sched_info_arrive(rq, next);
  256. }
  257. #else /* !CONFIG_SCHED_INFO: */
  258. # define sched_info_enqueue(rq, t) do { } while (0)
  259. # define sched_info_dequeue(rq, t) do { } while (0)
  260. # define sched_info_switch(rq, t, next) do { } while (0)
  261. #endif /* CONFIG_SCHED_INFO */
  262. #endif /* _KERNEL_STATS_H */