walt_rt.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
  4. * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
  5. */
  6. #include <trace/hooks/sched.h>
  7. #include "walt.h"
  8. #include "trace.h"
  9. static DEFINE_PER_CPU(cpumask_var_t, walt_local_cpu_mask);
  10. DEFINE_PER_CPU(u64, rt_task_arrival_time) = 0;
  11. static bool long_running_rt_task_trace_rgstrd;
  12. static void rt_task_arrival_marker(void *unused, bool preempt,
  13. struct task_struct *prev, struct task_struct *next,
  14. unsigned int prev_state)
  15. {
  16. unsigned int cpu = raw_smp_processor_id();
  17. if (next->policy == SCHED_FIFO && next != cpu_rq(cpu)->stop)
  18. per_cpu(rt_task_arrival_time, cpu) = rq_clock_task(this_rq());
  19. else
  20. per_cpu(rt_task_arrival_time, cpu) = 0;
  21. }
  22. static void long_running_rt_task_notifier(void *unused, struct rq *rq)
  23. {
  24. struct task_struct *curr = rq->curr;
  25. unsigned int cpu = raw_smp_processor_id();
  26. if (!sysctl_sched_long_running_rt_task_ms)
  27. return;
  28. if (!per_cpu(rt_task_arrival_time, cpu))
  29. return;
  30. if (per_cpu(rt_task_arrival_time, cpu) && curr->policy != SCHED_FIFO) {
  31. /*
  32. * It is possible that the scheduling policy for the current
  33. * task might get changed after task arrival time stamp is
  34. * noted during sched_switch of RT task. To avoid such false
  35. * positives, reset arrival time stamp.
  36. */
  37. per_cpu(rt_task_arrival_time, cpu) = 0;
  38. return;
  39. }
  40. /*
  41. * Since we are called from the main tick, rq clock task must have
  42. * been updated very recently. Use it directly, instead of
  43. * update_rq_clock_task() to avoid warnings.
  44. */
  45. if (rq->clock_task -
  46. per_cpu(rt_task_arrival_time, cpu)
  47. > sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC) {
  48. printk_deferred("RT task %s (%d) runtime > %u now=%llu task arrival time=%llu runtime=%llu\n",
  49. curr->comm, curr->pid,
  50. sysctl_sched_long_running_rt_task_ms * MSEC_TO_NSEC,
  51. rq->clock_task,
  52. per_cpu(rt_task_arrival_time, cpu),
  53. rq->clock_task -
  54. per_cpu(rt_task_arrival_time, cpu));
  55. BUG();
  56. }
  57. }
  58. int sched_long_running_rt_task_ms_handler(struct ctl_table *table, int write,
  59. void __user *buffer, size_t *lenp,
  60. loff_t *ppos)
  61. {
  62. int ret;
  63. static DEFINE_MUTEX(mutex);
  64. mutex_lock(&mutex);
  65. ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
  66. if (sysctl_sched_long_running_rt_task_ms > 0 &&
  67. sysctl_sched_long_running_rt_task_ms < 800)
  68. sysctl_sched_long_running_rt_task_ms = 800;
  69. if (write && !long_running_rt_task_trace_rgstrd) {
  70. register_trace_sched_switch(rt_task_arrival_marker, NULL);
  71. register_trace_android_vh_scheduler_tick(long_running_rt_task_notifier, NULL);
  72. long_running_rt_task_trace_rgstrd = true;
  73. }
  74. mutex_unlock(&mutex);
  75. return ret;
  76. }
  77. static void walt_rt_energy_aware_wake_cpu(struct task_struct *task, struct cpumask *lowest_mask,
  78. int ret, int *best_cpu)
  79. {
  80. int cpu;
  81. unsigned long util, best_cpu_util = ULONG_MAX;
  82. unsigned long best_cpu_util_cum = ULONG_MAX;
  83. unsigned long util_cum;
  84. unsigned long tutil = task_util(task);
  85. unsigned int best_idle_exit_latency = UINT_MAX;
  86. unsigned int cpu_idle_exit_latency = UINT_MAX;
  87. bool boost_on_big = rt_boost_on_big();
  88. int cluster;
  89. int order_index = (boost_on_big && num_sched_clusters > 1) ? 1 : 0;
  90. int end_index = 0;
  91. bool best_cpu_lt = true;
  92. if (unlikely(walt_disabled))
  93. return;
  94. if (!ret)
  95. return; /* No targets found */
  96. rcu_read_lock();
  97. if (num_sched_clusters > 3 && order_index == 0)
  98. end_index = 1;
  99. for (cluster = 0; cluster < num_sched_clusters; cluster++) {
  100. for_each_cpu_and(cpu, lowest_mask, &cpu_array[order_index][cluster]) {
  101. bool lt;
  102. trace_sched_cpu_util(cpu, lowest_mask);
  103. if (!cpu_active(cpu))
  104. continue;
  105. if (cpu_halted(cpu))
  106. continue;
  107. if (sched_cpu_high_irqload(cpu))
  108. continue;
  109. if (__cpu_overutilized(cpu, tutil))
  110. continue;
  111. util = cpu_util(cpu);
  112. lt = (walt_low_latency_task(cpu_rq(cpu)->curr) ||
  113. walt_nr_rtg_high_prio(cpu));
  114. /*
  115. * When the best is suitable and the current is not,
  116. * skip it
  117. */
  118. if (lt && !best_cpu_lt)
  119. continue;
  120. /*
  121. * Either both are sutilable or unsuitable, load takes
  122. * precedence.
  123. */
  124. if (!(best_cpu_lt ^ lt) && (util > best_cpu_util))
  125. continue;
  126. /*
  127. * If the previous CPU has same load, keep it as
  128. * best_cpu.
  129. */
  130. if (best_cpu_util == util && *best_cpu == task_cpu(task))
  131. continue;
  132. /*
  133. * If candidate CPU is the previous CPU, select it.
  134. * Otherwise, if its load is same with best_cpu and in
  135. * a shallower C-state, select it. If all above
  136. * conditions are same, select the least cumulative
  137. * window demand CPU.
  138. */
  139. cpu_idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(cpu));
  140. util_cum = cpu_util_cum(cpu);
  141. if (cpu != task_cpu(task) && best_cpu_util == util) {
  142. if (best_idle_exit_latency < cpu_idle_exit_latency)
  143. continue;
  144. if (best_idle_exit_latency == cpu_idle_exit_latency &&
  145. best_cpu_util_cum < util_cum)
  146. continue;
  147. }
  148. best_idle_exit_latency = cpu_idle_exit_latency;
  149. best_cpu_util_cum = util_cum;
  150. best_cpu_util = util;
  151. *best_cpu = cpu;
  152. best_cpu_lt = lt;
  153. }
  154. if (cluster < end_index) {
  155. if (*best_cpu == -1 || !available_idle_cpu(*best_cpu))
  156. continue;
  157. }
  158. if (*best_cpu != -1)
  159. break;
  160. }
  161. rcu_read_unlock();
  162. }
  163. #ifdef CONFIG_UCLAMP_TASK
  164. static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
  165. {
  166. unsigned int min_cap;
  167. unsigned int max_cap;
  168. unsigned int cpu_cap;
  169. min_cap = uclamp_eff_value(p, UCLAMP_MIN);
  170. max_cap = uclamp_eff_value(p, UCLAMP_MAX);
  171. cpu_cap = capacity_orig_of(cpu);
  172. return cpu_cap >= min(min_cap, max_cap);
  173. }
  174. #else
  175. static inline bool walt_rt_task_fits_capacity(struct task_struct *p, int cpu)
  176. {
  177. return true;
  178. }
  179. #endif
  180. /*
  181. * walt specific should_honor_rt_sync (see rt.c). this will honor
  182. * the sync flag regardless of whether the current waker is cfs or rt
  183. */
  184. static inline bool walt_should_honor_rt_sync(struct rq *rq, struct task_struct *p,
  185. bool sync)
  186. {
  187. return sync &&
  188. p->prio <= rq->rt.highest_prio.next &&
  189. rq->rt.rt_nr_running <= 2;
  190. }
  191. enum rt_fastpaths {
  192. NONE = 0,
  193. NON_WAKEUP,
  194. SYNC_WAKEUP,
  195. CLUSTER_PACKING_FASTPATH,
  196. };
  197. static void walt_select_task_rq_rt(void *unused, struct task_struct *task, int cpu,
  198. int sd_flag, int wake_flags, int *new_cpu)
  199. {
  200. struct task_struct *curr;
  201. struct rq *rq, *this_cpu_rq;
  202. bool may_not_preempt;
  203. bool sync = !!(wake_flags & WF_SYNC);
  204. int ret, target = -1, this_cpu;
  205. struct cpumask *lowest_mask = NULL;
  206. int packing_cpu = -1;
  207. int fastpath = NONE;
  208. struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
  209. struct walt_task_struct *wts;
  210. if (unlikely(walt_disabled))
  211. return;
  212. /* For anything but wake ups, just return the task_cpu */
  213. if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) {
  214. fastpath = NON_WAKEUP;
  215. goto out;
  216. }
  217. this_cpu = raw_smp_processor_id();
  218. this_cpu_rq = cpu_rq(this_cpu);
  219. wts = (struct walt_task_struct *) task->android_vendor_data1;
  220. /*
  221. * Respect the sync flag as long as the task can run on this CPU.
  222. */
  223. if (sysctl_sched_sync_hint_enable && cpu_active(this_cpu) && !cpu_halted(this_cpu) &&
  224. cpumask_test_cpu(this_cpu, task->cpus_ptr) &&
  225. cpumask_test_cpu(this_cpu, &wts->reduce_mask) &&
  226. walt_should_honor_rt_sync(this_cpu_rq, task, sync)) {
  227. fastpath = SYNC_WAKEUP;
  228. *new_cpu = this_cpu;
  229. goto out;
  230. }
  231. *new_cpu = cpu; /* previous CPU as back up */
  232. rq = cpu_rq(cpu);
  233. rcu_read_lock();
  234. curr = READ_ONCE(rq->curr); /* unlocked access */
  235. /*
  236. * If the current task on @p's runqueue is a softirq task,
  237. * it may run without preemption for a time that is
  238. * ill-suited for a waiting RT task. Therefore, try to
  239. * wake this RT task on another runqueue.
  240. *
  241. * Otherwise, just let it ride on the affined RQ and the
  242. * post-schedule router will push the preempted task away
  243. *
  244. * This test is optimistic, if we get it wrong the load-balancer
  245. * will have to sort it out.
  246. *
  247. * We take into account the capacity of the CPU to ensure it fits the
  248. * requirement of the task - which is only important on heterogeneous
  249. * systems like big.LITTLE.
  250. */
  251. may_not_preempt = task_may_not_preempt(curr, cpu);
  252. lowest_mask = this_cpu_cpumask_var_ptr(walt_local_cpu_mask);
  253. /*
  254. * If we're on asym system ensure we consider the different capacities
  255. * of the CPUs when searching for the lowest_mask.
  256. */
  257. ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task,
  258. lowest_mask, walt_rt_task_fits_capacity);
  259. if (cpumask_test_cpu(0, &wts->reduce_mask))
  260. packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
  261. if (packing_cpu >= 0) {
  262. fastpath = CLUSTER_PACKING_FASTPATH;
  263. *new_cpu = packing_cpu;
  264. goto unlock;
  265. }
  266. cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
  267. if (!cpumask_empty(&lowest_mask_reduced))
  268. walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, &target);
  269. if (target == -1)
  270. walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, &target);
  271. /*
  272. * If cpu is non-preemptible, prefer remote cpu
  273. * even if it's running a higher-prio task.
  274. * Otherwise: Don't bother moving it if the destination CPU is
  275. * not running a lower priority task.
  276. */
  277. if (target != -1 &&
  278. (may_not_preempt || task->prio < cpu_rq(target)->rt.highest_prio.curr))
  279. *new_cpu = target;
  280. /* if backup or chosen cpu is halted, pick something else */
  281. if (cpu_halted(*new_cpu)) {
  282. cpumask_t non_halted;
  283. /* choose the lowest-order, unhalted, allowed CPU */
  284. cpumask_andnot(&non_halted, task->cpus_ptr, cpu_halt_mask);
  285. target = cpumask_first(&non_halted);
  286. if (target < nr_cpu_ids)
  287. *new_cpu = target;
  288. }
  289. unlock:
  290. rcu_read_unlock();
  291. out:
  292. trace_sched_select_task_rt(task, fastpath, *new_cpu, lowest_mask);
  293. }
  294. static void walt_rt_find_lowest_rq(void *unused, struct task_struct *task,
  295. struct cpumask *lowest_mask, int ret, int *best_cpu)
  296. {
  297. int packing_cpu = -1;
  298. int fastpath = 0;
  299. struct walt_task_struct *wts;
  300. struct cpumask lowest_mask_reduced = { CPU_BITS_NONE };
  301. if (unlikely(walt_disabled))
  302. return;
  303. wts = (struct walt_task_struct *) task->android_vendor_data1;
  304. if (cpumask_test_cpu(0, &wts->reduce_mask))
  305. packing_cpu = walt_find_and_choose_cluster_packing_cpu(0, task);
  306. if (packing_cpu >= 0) {
  307. *best_cpu = packing_cpu;
  308. fastpath = CLUSTER_PACKING_FASTPATH;
  309. goto out;
  310. }
  311. cpumask_and(&lowest_mask_reduced, lowest_mask, &wts->reduce_mask);
  312. if (!cpumask_empty(&lowest_mask_reduced))
  313. walt_rt_energy_aware_wake_cpu(task, &lowest_mask_reduced, ret, best_cpu);
  314. if (*best_cpu == -1)
  315. walt_rt_energy_aware_wake_cpu(task, lowest_mask, ret, best_cpu);
  316. /*
  317. * Walt was not able to find a non-halted best cpu. Ensure that
  318. * find_lowest_rq doesn't use a halted cpu going forward, but
  319. * does a best effort itself to find a good CPU.
  320. */
  321. if (*best_cpu == -1)
  322. cpumask_andnot(lowest_mask, lowest_mask, cpu_halt_mask);
  323. out:
  324. trace_sched_rt_find_lowest_rq(task, fastpath, *best_cpu, lowest_mask);
  325. }
  326. void walt_rt_init(void)
  327. {
  328. unsigned int i;
  329. for_each_possible_cpu(i) {
  330. if(!(zalloc_cpumask_var_node(&per_cpu(walt_local_cpu_mask, i),
  331. GFP_KERNEL, cpu_to_node(i)))) {
  332. pr_err("walt_local_cpu_mask alloc failed for cpu%d\n", i);
  333. return;
  334. }
  335. }
  336. register_trace_android_rvh_select_task_rq_rt(walt_select_task_rq_rt, NULL);
  337. register_trace_android_rvh_find_lowest_rq(walt_rt_find_lowest_rq, NULL);
  338. }