sched_avg.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
  4. */
  5. /*
  6. * Scheduler hook for average runqueue determination
  7. */
  8. #include <linux/module.h>
  9. #include <linux/percpu.h>
  10. #include <linux/hrtimer.h>
  11. #include <linux/sched.h>
  12. #include <linux/math64.h>
  13. #include "walt.h"
  14. #include "trace.h"
  15. #include "hyst_qos.h"
  16. struct user_req us_req;
  17. bool qos_reg;
  18. static DEFINE_PER_CPU(u64, nr_prod_sum);
  19. static DEFINE_PER_CPU(u64, last_time);
  20. static DEFINE_PER_CPU(u64, nr_big_prod_sum);
  21. static DEFINE_PER_CPU(u64, nr);
  22. static DEFINE_PER_CPU(u64, nr_max);
  23. static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
  24. static s64 last_get_time;
  25. static DEFINE_PER_CPU(atomic64_t, busy_hyst_end_time) = ATOMIC64_INIT(0);
  26. static DEFINE_PER_CPU(u64, hyst_time);
  27. static DEFINE_PER_CPU(u64, coloc_hyst_busy);
  28. static DEFINE_PER_CPU(u64, coloc_hyst_time);
  29. static DEFINE_PER_CPU(u64, util_hyst_time);
  30. #define NR_THRESHOLD_PCT 40
  31. #define MAX_RTGB_TIME (sysctl_sched_coloc_busy_hyst_max_ms * NSEC_PER_MSEC)
  32. struct sched_avg_stats stats[WALT_NR_CPUS];
  33. unsigned int cstats_util_pct[MAX_CLUSTERS];
  34. /**
  35. * sched_get_cluster_util_pct
  36. * @return: provide the percentage of this cluter that was used in the
  37. * previous window.
  38. *
  39. * This routine may be called any number of times as needed during
  40. * a window, but will always return the same result until window
  41. * rollover.
  42. */
  43. unsigned int sched_get_cluster_util_pct(struct walt_sched_cluster *cluster)
  44. {
  45. unsigned int cluster_util_pct = 0;
  46. if (cluster->id < MAX_CLUSTERS)
  47. cluster_util_pct = cstats_util_pct[cluster->id];
  48. return cluster_util_pct;
  49. }
  50. /**
  51. * sched_get_nr_running_avg
  52. * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
  53. * Returns the avg * 100 to return up to two decimal points
  54. * of accuracy.
  55. *
  56. * Obtains the average nr_running value since the last poll.
  57. * This function may not be called concurrently with itself.
  58. *
  59. * It is assumed that this function is called at most once per window
  60. * rollover.
  61. */
  62. struct sched_avg_stats *sched_get_nr_running_avg(void)
  63. {
  64. int cpu;
  65. u64 curr_time = sched_clock();
  66. u64 period = curr_time - last_get_time;
  67. u64 tmp_nr, tmp_misfit;
  68. bool any_hyst_time = false;
  69. struct walt_sched_cluster *cluster;
  70. if (unlikely(walt_disabled))
  71. return NULL;
  72. if (!period)
  73. goto done;
  74. /* read and reset nr_running counts */
  75. for_each_possible_cpu(cpu) {
  76. unsigned long flags;
  77. u64 diff;
  78. spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
  79. curr_time = sched_clock();
  80. diff = curr_time - per_cpu(last_time, cpu);
  81. BUG_ON((s64)diff < 0);
  82. tmp_nr = per_cpu(nr_prod_sum, cpu);
  83. tmp_nr += per_cpu(nr, cpu) * diff;
  84. tmp_nr = div64_u64((tmp_nr * 100), period);
  85. tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
  86. tmp_misfit += walt_big_tasks(cpu) * diff;
  87. tmp_misfit = div64_u64((tmp_misfit * 100), period);
  88. /*
  89. * NR_THRESHOLD_PCT is to make sure that the task ran
  90. * at least 85% in the last window to compensate any
  91. * over estimating being done.
  92. */
  93. stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
  94. 100);
  95. stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
  96. NR_THRESHOLD_PCT), 100);
  97. stats[cpu].nr_max = per_cpu(nr_max, cpu);
  98. stats[cpu].nr_scaled = tmp_nr;
  99. trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
  100. stats[cpu].nr_misfit, stats[cpu].nr_max,
  101. stats[cpu].nr_scaled);
  102. per_cpu(last_time, cpu) = curr_time;
  103. per_cpu(nr_prod_sum, cpu) = 0;
  104. per_cpu(nr_big_prod_sum, cpu) = 0;
  105. per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
  106. spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
  107. }
  108. /* collect cluster load stats */
  109. for_each_sched_cluster(cluster) {
  110. unsigned int num_cpus = cpumask_weight(&cluster->cpus);
  111. unsigned int sum_util_pct = 0;
  112. /* load is already scaled, see freq_policy_load/prev_runnable_sum */
  113. for_each_cpu(cpu, &cluster->cpus) {
  114. struct rq *rq = cpu_rq(cpu);
  115. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  116. /* compute the % this cpu's utilization of the cpu capacity,
  117. * and sum it across all cpus
  118. */
  119. sum_util_pct +=
  120. (wrq->util * 100) / arch_scale_cpu_capacity(cpu);
  121. }
  122. /* calculate the averge per-cpu utilization */
  123. cstats_util_pct[cluster->id] = sum_util_pct / num_cpus;
  124. }
  125. for_each_possible_cpu(cpu) {
  126. if (per_cpu(coloc_hyst_time, cpu)) {
  127. any_hyst_time = true;
  128. break;
  129. }
  130. }
  131. if (any_hyst_time && get_rtgb_active_time() >= MAX_RTGB_TIME)
  132. sched_update_hyst_times();
  133. last_get_time = curr_time;
  134. done:
  135. return &stats[0];
  136. }
  137. EXPORT_SYMBOL_GPL(sched_get_nr_running_avg);
  138. void sched_update_hyst_times(void)
  139. {
  140. bool rtgb_active;
  141. int cpu;
  142. unsigned long cpu_cap, coloc_busy_pct;
  143. rtgb_active = is_rtgb_active() && (sched_boost_type != CONSERVATIVE_BOOST)
  144. && (get_rtgb_active_time() < MAX_RTGB_TIME);
  145. for_each_possible_cpu(cpu) {
  146. cpu_cap = arch_scale_cpu_capacity(cpu);
  147. coloc_busy_pct = sysctl_sched_coloc_busy_hyst_cpu_busy_pct[cpu];
  148. per_cpu(hyst_time, cpu) = (BIT(cpu)
  149. & sysctl_sched_busy_hyst_enable_cpus) ?
  150. busy_hyst_qos_value : 0;
  151. per_cpu(coloc_hyst_time, cpu) = ((BIT(cpu)
  152. & sysctl_sched_coloc_busy_hyst_enable_cpus)
  153. && rtgb_active) ?
  154. sysctl_sched_coloc_busy_hyst_cpu[cpu] : 0;
  155. per_cpu(coloc_hyst_busy, cpu) = mult_frac(cpu_cap,
  156. coloc_busy_pct, 100);
  157. per_cpu(util_hyst_time, cpu) = (BIT(cpu)
  158. & sysctl_sched_util_busy_hyst_enable_cpus) ?
  159. sysctl_sched_util_busy_hyst_cpu[cpu] : 0;
  160. }
  161. }
  162. #define BUSY_NR_RUN 3
  163. #define BUSY_LOAD_FACTOR 10
  164. static inline void update_busy_hyst_end_time(int cpu, int enq,
  165. unsigned long prev_nr_run, u64 curr_time)
  166. {
  167. bool nr_run_trigger = false;
  168. bool load_trigger = false, coloc_load_trigger = false;
  169. u64 agg_hyst_time, total_util = 0;
  170. bool util_load_trigger = false;
  171. int i;
  172. bool hyst_trigger, coloc_trigger;
  173. bool dequeue = (enq < 0);
  174. if (!per_cpu(hyst_time, cpu) && !per_cpu(coloc_hyst_time, cpu) &&
  175. !per_cpu(util_hyst_time, cpu))
  176. return;
  177. if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
  178. nr_run_trigger = true;
  179. if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
  180. capacity_orig_of(cpu))
  181. load_trigger = true;
  182. if (dequeue && cpu_util(cpu) > per_cpu(coloc_hyst_busy, cpu))
  183. coloc_load_trigger = true;
  184. if (dequeue) {
  185. for_each_possible_cpu(i) {
  186. total_util += cpu_util(i);
  187. if (total_util >= sysctl_sched_util_busy_hyst_cpu_util[cpu]) {
  188. util_load_trigger = true;
  189. break;
  190. }
  191. }
  192. }
  193. coloc_trigger = nr_run_trigger || coloc_load_trigger;
  194. #if IS_ENABLED(CONFIG_SCHED_CONSERVATIVE_BOOST_LPM_BIAS)
  195. hyst_trigger = nr_run_trigger || load_trigger || (sched_boost_type == CONSERVATIVE_BOOST);
  196. #else
  197. hyst_trigger = nr_run_trigger || load_trigger;
  198. #endif
  199. agg_hyst_time = max(max(hyst_trigger ? per_cpu(hyst_time, cpu) : 0,
  200. coloc_trigger ? per_cpu(coloc_hyst_time, cpu) : 0),
  201. util_load_trigger ? per_cpu(util_hyst_time, cpu) : 0);
  202. if (agg_hyst_time) {
  203. atomic64_set(&per_cpu(busy_hyst_end_time, cpu),
  204. curr_time + agg_hyst_time);
  205. trace_sched_busy_hyst_time(cpu, agg_hyst_time, prev_nr_run,
  206. cpu_util(cpu), per_cpu(hyst_time, cpu),
  207. per_cpu(coloc_hyst_time, cpu),
  208. per_cpu(util_hyst_time, cpu));
  209. }
  210. }
  211. int sched_busy_hyst_handler(struct ctl_table *table, int write,
  212. void __user *buffer, size_t *lenp, loff_t *ppos)
  213. {
  214. int ret;
  215. if (table->maxlen > (sizeof(unsigned int) * num_possible_cpus()))
  216. table->maxlen = sizeof(unsigned int) * num_possible_cpus();
  217. ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  218. if (!ret && write) {
  219. if (!qos_reg) {
  220. hyst_add_request(&us_req, 0, "PerfLock");
  221. qos_reg = true;
  222. }
  223. hyst_update_request(&us_req, PM_QOS_MIN_LIMIT, sysctl_sched_busy_hyst);
  224. sched_update_hyst_times();
  225. }
  226. return ret;
  227. }
  228. /**
  229. * sched_update_nr_prod
  230. * @cpu: The core id of the nr running driver.
  231. * @enq: enqueue/dequeue/misfit happening on this CPU.
  232. * @return: N/A
  233. *
  234. * Update average with latest nr_running value for CPU
  235. */
  236. void sched_update_nr_prod(int cpu, int enq)
  237. {
  238. u64 diff;
  239. u64 curr_time;
  240. unsigned long flags, nr_running;
  241. spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
  242. nr_running = per_cpu(nr, cpu);
  243. curr_time = sched_clock();
  244. diff = curr_time - per_cpu(last_time, cpu);
  245. BUG_ON((s64)diff < 0);
  246. per_cpu(last_time, cpu) = curr_time;
  247. per_cpu(nr, cpu) = cpu_rq(cpu)->nr_running + enq;
  248. if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
  249. per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
  250. /* Don't update hyst time for misfit tasks */
  251. if (enq)
  252. update_busy_hyst_end_time(cpu, enq, nr_running, curr_time);
  253. per_cpu(nr_prod_sum, cpu) += nr_running * diff;
  254. per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
  255. spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
  256. }
  257. /*
  258. * Returns the CPU utilization % in the last window.
  259. */
  260. unsigned int sched_get_cpu_util_pct(int cpu)
  261. {
  262. struct rq *rq = cpu_rq(cpu);
  263. u64 util;
  264. unsigned long capacity, flags;
  265. unsigned int busy;
  266. struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
  267. raw_spin_lock_irqsave(&rq->__lock, flags);
  268. capacity = capacity_orig_of(cpu);
  269. util = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
  270. util = scale_time_to_util(util);
  271. raw_spin_unlock_irqrestore(&rq->__lock, flags);
  272. util = (util >= capacity) ? capacity : util;
  273. busy = div64_ul((util * 100), capacity);
  274. return busy;
  275. }
  276. int sched_lpm_disallowed_time(int cpu, u64 *timeout)
  277. {
  278. u64 now = sched_clock();
  279. u64 bias_end_time = atomic64_read(&per_cpu(busy_hyst_end_time, cpu));
  280. if (unlikely(walt_disabled))
  281. return -EAGAIN;
  282. if (unlikely(is_reserved(cpu))) {
  283. *timeout = 10 * NSEC_PER_MSEC;
  284. return 0; /* shallowest c-state */
  285. }
  286. if (now < bias_end_time) {
  287. *timeout = bias_end_time - now;
  288. return 0; /* shallowest c-state */
  289. }
  290. return INT_MAX; /* don't care */
  291. }
  292. EXPORT_SYMBOL_GPL(sched_lpm_disallowed_time);