walt_cfs.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
  4. * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
  5. */
  6. #include <linux/seq_file.h>
  7. #include <trace/hooks/sched.h>
  8. #include <trace/hooks/binder.h>
  9. #include "walt.h"
  10. #include "trace.h"
  11. #include <../../../drivers/android/binder_internal.h>
  12. #include "../../../drivers/android/binder_trace.h"
  13. static void create_util_to_cost_pd(struct em_perf_domain *pd)
  14. {
  15. int util, cpu = cpumask_first(to_cpumask(pd->cpus));
  16. unsigned long fmax;
  17. unsigned long scale_cpu;
  18. struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
  19. struct walt_sched_cluster *cluster = wrq->cluster;
  20. fmax = (u64)pd->table[pd->nr_perf_states - 1].frequency;
  21. scale_cpu = arch_scale_cpu_capacity(cpu);
  22. for (util = 0; util < 1024; util++) {
  23. int j;
  24. int f = (fmax * util) / scale_cpu;
  25. struct em_perf_state *ps = &pd->table[0];
  26. for (j = 0; j < pd->nr_perf_states; j++) {
  27. ps = &pd->table[j];
  28. if (ps->frequency >= f)
  29. break;
  30. }
  31. cluster->util_to_cost[util] = ps->cost;
  32. }
  33. }
  34. void create_util_to_cost(void)
  35. {
  36. struct perf_domain *pd;
  37. struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
  38. rcu_read_lock();
  39. pd = rcu_dereference(rd->pd);
  40. for (; pd; pd = pd->next)
  41. create_util_to_cost_pd(pd->em_pd);
  42. rcu_read_unlock();
  43. }
  44. DECLARE_PER_CPU(unsigned long, gov_last_util);
  45. /* Migration margins */
  46. unsigned int sched_capacity_margin_up[WALT_NR_CPUS] = {
  47. [0 ... WALT_NR_CPUS-1] = 1078 /* ~5% margin */
  48. };
  49. unsigned int sched_capacity_margin_down[WALT_NR_CPUS] = {
  50. [0 ... WALT_NR_CPUS-1] = 1205 /* ~15% margin */
  51. };
  52. /* Migration margins for topapp */
  53. unsigned int sched_capacity_margin_early_up[WALT_NR_CPUS] = {
  54. [0 ... WALT_NR_CPUS-1] = 1078 /* ~5% margin */
  55. };
  56. unsigned int sched_capacity_margin_early_down[WALT_NR_CPUS] = {
  57. [0 ... WALT_NR_CPUS-1] = 1205 /* ~15% margin */
  58. };
  59. static inline bool
  60. bias_to_this_cpu(struct task_struct *p, int cpu, int start_cpu)
  61. {
  62. bool base_test = cpumask_test_cpu(cpu, p->cpus_ptr) &&
  63. cpu_active(cpu);
  64. bool start_cap_test = !check_for_higher_capacity(start_cpu, cpu);
  65. return base_test && start_cap_test;
  66. }
  67. static inline bool task_demand_fits(struct task_struct *p, int dst_cpu)
  68. {
  69. if (is_max_possible_cluster_cpu(dst_cpu))
  70. return true;
  71. if (!task_in_related_thread_group(p) && p->prio >= 124 &&
  72. !is_min_possible_cluster_cpu(dst_cpu) &&
  73. !is_max_possible_cluster_cpu(dst_cpu)) {
  74. /* a non topapp low prio task fits on gold */
  75. return true;
  76. }
  77. return task_fits_capacity(p, dst_cpu);
  78. }
  79. struct find_best_target_env {
  80. bool is_rtg;
  81. int need_idle;
  82. int fastpath;
  83. int start_cpu;
  84. int order_index;
  85. int end_index;
  86. bool strict_max;
  87. int skip_cpu;
  88. u64 prs[8];
  89. };
  90. /*
  91. * cpu_util_without: compute cpu utilization without any contributions from *p
  92. * @cpu: the CPU which utilization is requested
  93. * @p: the task which utilization should be discounted
  94. *
  95. * The utilization of a CPU is defined by the utilization of tasks currently
  96. * enqueued on that CPU as well as tasks which are currently sleeping after an
  97. * execution on that CPU.
  98. *
  99. * This method returns the utilization of the specified CPU by discounting the
  100. * utilization of the specified task, whenever the task is currently
  101. * contributing to the CPU utilization.
  102. */
  103. static unsigned long cpu_util_without(int cpu, struct task_struct *p)
  104. {
  105. unsigned int util;
  106. /*
  107. * WALT does not decay idle tasks in the same manner
  108. * as PELT, so it makes little sense to subtract task
  109. * utilization from cpu utilization. Instead just use
  110. * cpu_util for this case.
  111. */
  112. if (likely(READ_ONCE(p->__state) == TASK_WAKING))
  113. return cpu_util(cpu);
  114. /* Task has no contribution or is new */
  115. if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
  116. return cpu_util(cpu);
  117. util = max_t(long, cpu_util(cpu) - task_util(p), 0);
  118. /*
  119. * Utilization (estimated) can exceed the CPU capacity, thus let's
  120. * clamp to the maximum CPU capacity to ensure consistency with
  121. * the cpu_util call.
  122. */
  123. return min_t(unsigned long, util, capacity_orig_of(cpu));
  124. }
  125. static inline bool walt_task_skip_min_cpu(struct task_struct *p)
  126. {
  127. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  128. return (sched_boost_type != CONSERVATIVE_BOOST) &&
  129. walt_get_rtg_status(p) && (wts->unfilter ||
  130. walt_pipeline_low_latency_task(p));
  131. }
  132. static inline bool walt_is_many_wakeup(int sibling_count_hint)
  133. {
  134. return sibling_count_hint >= sysctl_sched_many_wakeup_threshold;
  135. }
  136. static inline bool walt_target_ok(int target_cpu, int order_index)
  137. {
  138. return !((order_index != num_sched_clusters - 1) &&
  139. (cpumask_weight(&cpu_array[order_index][0]) == 1) &&
  140. (target_cpu == cpumask_first(&cpu_array[order_index][0])));
  141. }
  142. #define MIN_UTIL_FOR_ENERGY_EVAL 52
  143. static void walt_get_indicies(struct task_struct *p, int *order_index,
  144. int *end_index, int per_task_boost, bool is_uclamp_boosted,
  145. bool *energy_eval_needed)
  146. {
  147. *order_index = 0;
  148. *end_index = 0;
  149. if (num_sched_clusters <= 1)
  150. return;
  151. if (per_task_boost > TASK_BOOST_ON_MID) {
  152. *order_index = num_sched_clusters - 1;
  153. *energy_eval_needed = false;
  154. return;
  155. }
  156. if (is_full_throttle_boost()) {
  157. *energy_eval_needed = false;
  158. *order_index = num_sched_clusters - 1;
  159. *end_index = num_sched_clusters - 2;
  160. for (; *end_index >= 0; (*end_index)--)
  161. if (task_demand_fits(p,
  162. cpumask_first(&cpu_array[*order_index][*end_index])))
  163. break;
  164. return;
  165. }
  166. if (is_uclamp_boosted || per_task_boost ||
  167. task_boost_policy(p) == SCHED_BOOST_ON_BIG ||
  168. walt_task_skip_min_cpu(p)) {
  169. *energy_eval_needed = false;
  170. *order_index = 1;
  171. *end_index = max(0, num_sched_clusters - 3);
  172. if (sysctl_sched_asymcap_boost) {
  173. (*end_index)++;
  174. return;
  175. }
  176. }
  177. for (; *order_index < num_sched_clusters - 1; (*order_index)++) {
  178. if (task_demand_fits(p, cpumask_first(&cpu_array[*order_index][0])))
  179. break;
  180. }
  181. if (*order_index == 0 &&
  182. (task_util(p) >= MIN_UTIL_FOR_ENERGY_EVAL) &&
  183. !(p->in_iowait && task_in_related_thread_group(p)) &&
  184. !walt_get_rtg_status(p) &&
  185. !(sched_boost_type == CONSERVATIVE_BOOST && task_sched_boost(p)) &&
  186. !sysctl_sched_suppress_region2
  187. )
  188. *end_index = 1;
  189. if (p->in_iowait && task_in_related_thread_group(p))
  190. *energy_eval_needed = false;
  191. }
  192. enum fastpaths {
  193. NONE = 0,
  194. SYNC_WAKEUP,
  195. PREV_CPU_FASTPATH,
  196. CLUSTER_PACKING_FASTPATH,
  197. PIPELINE_FASTPATH,
  198. };
  199. static inline bool is_complex_sibling_idle(int cpu)
  200. {
  201. if (cpu_l2_sibling[cpu] != -1)
  202. return available_idle_cpu(cpu_l2_sibling[cpu]);
  203. return false;
  204. }
  205. static inline bool walt_should_reject_fbt_cpu(struct walt_rq *wrq, struct task_struct *p,
  206. int cpu, int order_index,
  207. struct find_best_target_env *fbt_env)
  208. {
  209. if (!cpu_active(cpu))
  210. return true;
  211. if (cpu_halted(cpu))
  212. return true;
  213. if (order_index != 0 && cpu_partial_halted(cpu))
  214. return true;
  215. /*
  216. * This CPU is the target of an active migration that's
  217. * yet to complete. Avoid placing another task on it.
  218. */
  219. if (is_reserved(cpu))
  220. return true;
  221. if (sched_cpu_high_irqload(cpu))
  222. return true;
  223. if (fbt_env->skip_cpu == cpu)
  224. return true;
  225. if (wrq->num_mvp_tasks > 0 && per_task_boost(p) != TASK_BOOST_STRICT_MAX)
  226. return true;
  227. return false;
  228. }
  229. bool select_prev_cpu_fastpath(int prev_cpu, int start_cpu, int order_index,
  230. struct task_struct *p)
  231. {
  232. struct walt_rq *prev_wrq = &per_cpu(walt_rq, prev_cpu);
  233. struct walt_rq *start_wrq = &per_cpu(walt_rq, start_cpu);
  234. bool valid_part_haltable_prev_cpu = false, valid_prev_cpu = false;
  235. if (!cpu_active(prev_cpu))
  236. return false;
  237. if (!available_idle_cpu(prev_cpu))
  238. return false;
  239. if (!cpumask_test_cpu(prev_cpu, p->cpus_ptr))
  240. return false;
  241. if (cpu_halted(prev_cpu))
  242. return false;
  243. if (is_reserved(prev_cpu))
  244. return false;
  245. valid_part_haltable_prev_cpu = cpumask_test_cpu(prev_cpu, &part_haltable_cpus) &&
  246. ((order_index == 0 && cpu_partial_halted(prev_cpu)) ||
  247. (order_index == 1 && !cpu_partial_halted(prev_cpu)));
  248. valid_prev_cpu = (prev_wrq->cluster->id == start_wrq->cluster->id);
  249. if (!(valid_part_haltable_prev_cpu || valid_prev_cpu))
  250. return false;
  251. return true;
  252. }
  253. #define DIRE_STRAITS_PREV_NR_LIMIT 10
  254. static void walt_find_best_target(struct sched_domain *sd,
  255. cpumask_t *candidates,
  256. struct task_struct *p,
  257. struct find_best_target_env *fbt_env)
  258. {
  259. unsigned long min_task_util = uclamp_task_util(p);
  260. long target_max_spare_cap = 0;
  261. unsigned long best_idle_cuml_util = ULONG_MAX;
  262. unsigned int min_exit_latency = UINT_MAX;
  263. int i, start_cpu;
  264. long spare_wake_cap, most_spare_wake_cap = 0;
  265. int most_spare_cap_cpu = -1;
  266. int least_nr_cpu = -1;
  267. unsigned int cpu_rq_runnable_cnt = UINT_MAX;
  268. int prev_cpu = task_cpu(p);
  269. int order_index = fbt_env->order_index, end_index = fbt_env->end_index;
  270. int stop_index = INT_MAX;
  271. int cluster;
  272. unsigned int target_nr_rtg_high_prio = UINT_MAX;
  273. bool rtg_high_prio_task = task_rtg_high_prio(p);
  274. cpumask_t visit_cpus;
  275. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  276. int packing_cpu;
  277. long most_spare_wake_cap_target_clusters = LONG_MIN;
  278. int most_spare_cap_target_cluster_cpu = -1;
  279. /* Find start CPU based on boost value */
  280. start_cpu = fbt_env->start_cpu;
  281. /*
  282. * For higher capacity worth I/O tasks, stop the search
  283. * at the end of higher capacity cluster(s).
  284. */
  285. if (order_index > 0 && wts->iowaited) {
  286. stop_index = num_sched_clusters - 2;
  287. most_spare_wake_cap = LONG_MIN;
  288. }
  289. if (fbt_env->strict_max) {
  290. stop_index = 0;
  291. most_spare_wake_cap = LONG_MIN;
  292. }
  293. /* fast path for packing_cpu */
  294. packing_cpu = walt_find_and_choose_cluster_packing_cpu(start_cpu, p);
  295. if (packing_cpu >= 0) {
  296. fbt_env->fastpath = CLUSTER_PACKING_FASTPATH;
  297. cpumask_set_cpu(packing_cpu, candidates);
  298. goto out;
  299. }
  300. /* fast path for prev_cpu */
  301. if (select_prev_cpu_fastpath(prev_cpu, start_cpu, order_index, p)) {
  302. fbt_env->fastpath = PREV_CPU_FASTPATH;
  303. cpumask_set_cpu(prev_cpu, candidates);
  304. goto out;
  305. }
  306. for (cluster = 0; cluster < num_sched_clusters; cluster++) {
  307. int best_idle_cpu_cluster = -1;
  308. int target_cpu_cluster = -1;
  309. int this_complex_idle = 0;
  310. int best_complex_idle = 0;
  311. target_max_spare_cap = 0;
  312. min_exit_latency = INT_MAX;
  313. best_idle_cuml_util = ULONG_MAX;
  314. cpumask_and(&visit_cpus, p->cpus_ptr,
  315. &cpu_array[order_index][cluster]);
  316. for_each_cpu(i, &visit_cpus) {
  317. unsigned long capacity_orig = capacity_orig_of(i);
  318. unsigned long wake_cpu_util, new_cpu_util, new_util_cuml;
  319. long spare_cap;
  320. unsigned int idle_exit_latency = UINT_MAX;
  321. struct walt_rq *wrq = &per_cpu(walt_rq, i);
  322. trace_sched_cpu_util(i, NULL);
  323. /* record the prss as we visit cpus in a cluster */
  324. fbt_env->prs[i] = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
  325. if (walt_should_reject_fbt_cpu(wrq, p, i, order_index, fbt_env))
  326. continue;
  327. /*
  328. * p's blocked utilization is still accounted for on prev_cpu
  329. * so prev_cpu will receive a negative bias due to the double
  330. * accounting. However, the blocked utilization may be zero.
  331. */
  332. wake_cpu_util = cpu_util_without(i, p);
  333. spare_wake_cap = capacity_orig - wake_cpu_util;
  334. if (spare_wake_cap > most_spare_wake_cap) {
  335. most_spare_wake_cap = spare_wake_cap;
  336. most_spare_cap_cpu = i;
  337. }
  338. /*
  339. * Keep track of least loaded cpu which can be used as a
  340. * fallback placement core for BIG rtg task in case all
  341. * the cores are busy, this is to avoid prev_cpu
  342. * fallback mechanism.
  343. */
  344. if ((cluster <= end_index) &&
  345. (spare_wake_cap > most_spare_wake_cap_target_clusters)) {
  346. most_spare_wake_cap_target_clusters = spare_wake_cap;
  347. most_spare_cap_target_cluster_cpu = i;
  348. }
  349. /*
  350. * Keep track of runnables for each CPU, if none of the
  351. * CPUs have spare capacity then use CPU with less
  352. * number of runnables.
  353. */
  354. if (cpu_rq(i)->nr_running < cpu_rq_runnable_cnt) {
  355. cpu_rq_runnable_cnt = cpu_rq(i)->nr_running;
  356. least_nr_cpu = i;
  357. }
  358. /*
  359. * Ensure minimum capacity to grant the required boost.
  360. * The target CPU can be already at a capacity level higher
  361. * than the one required to boost the task.
  362. */
  363. new_cpu_util = wake_cpu_util + min_task_util;
  364. if (new_cpu_util > capacity_orig)
  365. continue;
  366. /*
  367. * Find an optimal backup IDLE CPU for non latency
  368. * sensitive tasks.
  369. *
  370. * Looking for:
  371. * - favoring shallowest idle states
  372. * i.e. avoid to wakeup deep-idle CPUs
  373. *
  374. * The following code path is used by non latency
  375. * sensitive tasks if IDLE CPUs are available. If at
  376. * least one of such CPUs are available it sets the
  377. * best_idle_cpu to the most suitable idle CPU to be
  378. * selected.
  379. *
  380. * If idle CPUs are available, favour these CPUs to
  381. * improve performances by spreading tasks.
  382. * Indeed, the energy_diff() computed by the caller
  383. * will take care to ensure the minimization of energy
  384. * consumptions without affecting performance.
  385. */
  386. if (available_idle_cpu(i)) {
  387. idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(i));
  388. this_complex_idle = is_complex_sibling_idle(i) ? 1 : 0;
  389. if (this_complex_idle < best_complex_idle)
  390. continue;
  391. /*
  392. * Prefer shallowest over deeper idle state cpu,
  393. * of same capacity cpus.
  394. */
  395. if (idle_exit_latency > min_exit_latency)
  396. continue;
  397. new_util_cuml = cpu_util_cum(i);
  398. if (min_exit_latency == idle_exit_latency &&
  399. (best_idle_cpu_cluster == prev_cpu ||
  400. (i != prev_cpu &&
  401. new_util_cuml > best_idle_cuml_util)))
  402. continue;
  403. min_exit_latency = idle_exit_latency;
  404. best_idle_cuml_util = new_util_cuml;
  405. best_idle_cpu_cluster = i;
  406. best_complex_idle = this_complex_idle;
  407. continue;
  408. }
  409. /* skip visiting any more busy if idle was found */
  410. if (best_idle_cpu_cluster != -1)
  411. continue;
  412. /*
  413. * Compute the maximum possible capacity we expect
  414. * to have available on this CPU once the task is
  415. * enqueued here.
  416. */
  417. spare_cap = capacity_orig - new_cpu_util;
  418. /*
  419. * Try to spread the rtg high prio tasks so that they
  420. * don't preempt each other. This is a optimisitc
  421. * check assuming rtg high prio can actually preempt
  422. * the current running task with the given vruntime
  423. * boost.
  424. */
  425. if (rtg_high_prio_task) {
  426. if (walt_nr_rtg_high_prio(i) > target_nr_rtg_high_prio)
  427. continue;
  428. /* Favor CPUs with maximum spare capacity */
  429. if (walt_nr_rtg_high_prio(i) == target_nr_rtg_high_prio &&
  430. spare_cap < target_max_spare_cap)
  431. continue;
  432. } else {
  433. /* Favor CPUs with maximum spare capacity */
  434. if (spare_cap < target_max_spare_cap)
  435. continue;
  436. }
  437. target_max_spare_cap = spare_cap;
  438. target_nr_rtg_high_prio = walt_nr_rtg_high_prio(i);
  439. target_cpu_cluster = i;
  440. }
  441. if (best_idle_cpu_cluster != -1)
  442. cpumask_set_cpu(best_idle_cpu_cluster, candidates);
  443. else if (target_cpu_cluster != -1)
  444. cpumask_set_cpu(target_cpu_cluster, candidates);
  445. if ((cluster >= end_index) && (!cpumask_empty(candidates)) &&
  446. walt_target_ok(target_cpu_cluster, order_index))
  447. break;
  448. if (most_spare_cap_cpu != -1 && cluster >= stop_index)
  449. break;
  450. }
  451. /*
  452. * We have set idle or target as long as they are valid CPUs.
  453. * If we don't find either, then we fallback to most_spare_cap,
  454. * If we don't find most spare cap, we fallback to prev_cpu,
  455. * provided that the prev_cpu is active and has less than
  456. * DIRE_STRAITS_PREV_NR_LIMIT runnables otherwise, we fallback to cpu
  457. * with least number of runnables.
  458. */
  459. if (unlikely(cpumask_empty(candidates))) {
  460. if (most_spare_cap_cpu != -1)
  461. cpumask_set_cpu(most_spare_cap_cpu, candidates);
  462. else if (most_spare_cap_target_cluster_cpu != -1 && (order_index > 0) &&
  463. fbt_env->is_rtg)
  464. cpumask_set_cpu(most_spare_cap_target_cluster_cpu, candidates);
  465. else if (cpu_active(prev_cpu)
  466. && (cpu_rq(prev_cpu)->nr_running < DIRE_STRAITS_PREV_NR_LIMIT))
  467. cpumask_set_cpu(prev_cpu, candidates);
  468. else if (least_nr_cpu != -1)
  469. cpumask_set_cpu(least_nr_cpu, candidates);
  470. }
  471. out:
  472. trace_sched_find_best_target(p, min_task_util, start_cpu, cpumask_bits(candidates)[0],
  473. most_spare_cap_cpu, order_index, end_index,
  474. fbt_env->skip_cpu, task_on_rq_queued(p), least_nr_cpu,
  475. cpu_rq_runnable_cnt, most_spare_cap_target_cluster_cpu);
  476. }
  477. static inline unsigned long
  478. cpu_util_next_walt(int cpu, struct task_struct *p, int dst_cpu)
  479. {
  480. struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
  481. unsigned long util = wrq->walt_stats.cumulative_runnable_avg_scaled;
  482. bool queued = task_on_rq_queued(p);
  483. /*
  484. * When task is queued,
  485. * (a) The evaluating CPU (cpu) is task's current CPU. If the
  486. * task is migrating, discount the task contribution from the
  487. * evaluation cpu.
  488. * (b) The evaluating CPU (cpu) is task's current CPU. If the
  489. * task is NOT migrating, nothing to do. The contribution is
  490. * already present on the evaluation CPU.
  491. * (c) The evaluating CPU (cpu) is not task's current CPU. But
  492. * the task is migrating to the evaluating CPU. So add the
  493. * task contribution to it.
  494. * (d) The evaluating CPU (cpu) is neither the current CPU nor
  495. * the destination CPU. don't care.
  496. *
  497. * When task is NOT queued i.e waking. Task contribution is not
  498. * present on any CPU.
  499. *
  500. * (a) If the evaluating CPU is the destination CPU, add the task
  501. * contribution.
  502. * (b) The evaluation CPU is not the destination CPU, don't care.
  503. */
  504. if (unlikely(queued)) {
  505. if (task_cpu(p) == cpu) {
  506. if (dst_cpu != cpu)
  507. util = max_t(long, util - task_util(p), 0);
  508. } else if (dst_cpu == cpu) {
  509. util += task_util(p);
  510. }
  511. } else if (dst_cpu == cpu) {
  512. util += task_util(p);
  513. }
  514. return min_t(unsigned long, util, capacity_orig_of(cpu));
  515. }
  516. static inline u64
  517. cpu_util_next_walt_prs(int cpu, struct task_struct *p, int dst_cpu, bool prev_dst_same_cluster,
  518. u64 *prs)
  519. {
  520. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  521. long util = prs[cpu];
  522. if (wts->prev_window) {
  523. if (!prev_dst_same_cluster) {
  524. /* intercluster migration of non rtg task - mimic fixups */
  525. util -= wts->prev_window_cpu[cpu];
  526. if (util < 0)
  527. util = 0;
  528. if (cpu == dst_cpu)
  529. util += wts->prev_window;
  530. }
  531. } else {
  532. if (cpu == dst_cpu)
  533. util += wts->demand;
  534. }
  535. return util;
  536. }
  537. static inline unsigned long get_util_to_cost(int cpu, unsigned long util)
  538. {
  539. struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
  540. if (cpu == 0 && util > sysctl_em_inflate_thres)
  541. return mult_frac(wrq->cluster->util_to_cost[util], sysctl_em_inflate_pct, 100);
  542. else
  543. return wrq->cluster->util_to_cost[util];
  544. }
  545. /**
  546. * walt_em_cpu_energy() - Estimates the energy consumed by the CPUs of a
  547. performance domain
  548. * @pd : performance domain for which energy has to be estimated
  549. * @max_util : highest utilization among CPUs of the domain
  550. * @sum_util : sum of the utilization of all CPUs in the domain
  551. *
  552. * This function must be used only for CPU devices. There is no validation,
  553. * i.e. if the EM is a CPU type and has cpumask allocated. It is called from
  554. * the scheduler code quite frequently and that is why there is not checks.
  555. *
  556. * Return: the sum of the energy consumed by the CPUs of the domain assuming
  557. * a capacity state satisfying the max utilization of the domain.
  558. */
  559. static inline unsigned long walt_em_cpu_energy(struct em_perf_domain *pd,
  560. unsigned long max_util, unsigned long sum_util,
  561. struct compute_energy_output *output, unsigned int x)
  562. {
  563. unsigned long scale_cpu, cost;
  564. int cpu;
  565. if (!sum_util)
  566. return 0;
  567. /*
  568. * In order to predict the capacity state, map the utilization of the
  569. * most utilized CPU of the performance domain to a requested frequency,
  570. * like schedutil.
  571. */
  572. cpu = cpumask_first(to_cpumask(pd->cpus));
  573. scale_cpu = arch_scale_cpu_capacity(cpu);
  574. max_util = max_util + (max_util >> 2); /* account for TARGET_LOAD usually 80 */
  575. max_util = max(max_util,
  576. (arch_scale_freq_capacity(cpu) * scale_cpu) >>
  577. SCHED_CAPACITY_SHIFT);
  578. /*
  579. * The capacity of a CPU in the domain at the performance state (ps)
  580. * can be computed as:
  581. *
  582. * ps->freq * scale_cpu
  583. * ps->cap = -------------------- (1)
  584. * cpu_max_freq
  585. *
  586. * So, ignoring the costs of idle states (which are not available in
  587. * the EM), the energy consumed by this CPU at that performance state
  588. * is estimated as:
  589. *
  590. * ps->power * cpu_util
  591. * cpu_nrg = -------------------- (2)
  592. * ps->cap
  593. *
  594. * since 'cpu_util / ps->cap' represents its percentage of busy time.
  595. *
  596. * NOTE: Although the result of this computation actually is in
  597. * units of power, it can be manipulated as an energy value
  598. * over a scheduling period, since it is assumed to be
  599. * constant during that interval.
  600. *
  601. * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
  602. * of two terms:
  603. *
  604. * ps->power * cpu_max_freq cpu_util
  605. * cpu_nrg = ------------------------ * --------- (3)
  606. * ps->freq scale_cpu
  607. *
  608. * The first term is static, and is stored in the em_perf_state struct
  609. * as 'ps->cost'.
  610. *
  611. * Since all CPUs of the domain have the same micro-architecture, they
  612. * share the same 'ps->cost', and the same CPU capacity. Hence, the
  613. * total energy of the domain (which is the simple sum of the energy of
  614. * all of its CPUs) can be factorized as:
  615. *
  616. * ps->cost * \Sum cpu_util
  617. * pd_nrg = ------------------------ (4)
  618. * scale_cpu
  619. */
  620. if (max_util >= 1024)
  621. max_util = 1023;
  622. cost = get_util_to_cost(cpu, max_util);
  623. if (output) {
  624. output->cost[x] = cost;
  625. output->max_util[x] = max_util;
  626. output->sum_util[x] = sum_util;
  627. }
  628. return cost * sum_util / scale_cpu;
  629. }
  630. /*
  631. * walt_pd_compute_energy(): Estimates the energy that @pd would consume if @p was
  632. * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
  633. * landscape of @pd's CPUs after the task migration, and uses the Energy Model
  634. * to compute what would be the energy if we decided to actually migrate that
  635. * task.
  636. */
  637. static long
  638. walt_pd_compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd, u64 *prs,
  639. struct compute_energy_output *output, unsigned int x)
  640. {
  641. struct cpumask *pd_mask = perf_domain_span(pd);
  642. unsigned long max_util = 0, sum_util = 0;
  643. int cpu;
  644. unsigned long cpu_util;
  645. bool prev_dst_same_cluster = false;
  646. if (same_cluster(task_cpu(p), dst_cpu))
  647. prev_dst_same_cluster = true;
  648. /*
  649. * The capacity state of CPUs of the current rd can be driven by CPUs
  650. * of another rd if they belong to the same pd. So, account for the
  651. * utilization of these CPUs too by masking pd with cpu_online_mask
  652. * instead of the rd span.
  653. *
  654. * If an entire pd is outside of the current rd, it will not appear in
  655. * its pd list and will not be accounted by compute_energy().
  656. */
  657. for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
  658. sum_util += cpu_util_next_walt(cpu, p, dst_cpu);
  659. cpu_util = cpu_util_next_walt_prs(cpu, p, dst_cpu, prev_dst_same_cluster, prs);
  660. max_util = max(max_util, cpu_util);
  661. }
  662. max_util = scale_time_to_util(max_util);
  663. if (output)
  664. output->cluster_first_cpu[x] = cpumask_first(pd_mask);
  665. return walt_em_cpu_energy(pd->em_pd, max_util, sum_util, output, x);
  666. }
  667. static inline long
  668. walt_compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd,
  669. cpumask_t *candidates, u64 *prs, struct compute_energy_output *output)
  670. {
  671. long energy = 0;
  672. unsigned int x = 0;
  673. for (; pd; pd = pd->next) {
  674. struct cpumask *pd_mask = perf_domain_span(pd);
  675. if (cpumask_intersects(candidates, pd_mask)
  676. || cpumask_test_cpu(task_cpu(p), pd_mask)) {
  677. energy += walt_pd_compute_energy(p, dst_cpu, pd, prs, output, x);
  678. x++;
  679. }
  680. }
  681. return energy;
  682. }
  683. static inline int wake_to_idle(struct task_struct *p)
  684. {
  685. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  686. struct walt_task_struct *cur_wts =
  687. (struct walt_task_struct *) current->android_vendor_data1;
  688. return (cur_wts->wake_up_idle || wts->wake_up_idle);
  689. }
  690. /* return true if cpu should be chosen over best_energy_cpu */
  691. static inline bool select_cpu_same_energy(int cpu, int best_cpu, int prev_cpu)
  692. {
  693. bool new_cpu_is_idle = available_idle_cpu(cpu);
  694. bool best_cpu_is_idle = available_idle_cpu(best_cpu);
  695. if (check_for_higher_capacity(cpu, best_cpu))
  696. return false;
  697. if (check_for_higher_capacity(best_cpu, cpu))
  698. return true;
  699. if (best_cpu_is_idle && walt_get_idle_exit_latency(cpu_rq(best_cpu)) <= 1)
  700. return false;
  701. if (new_cpu_is_idle && walt_get_idle_exit_latency(cpu_rq(cpu)) <= 1)
  702. return true;
  703. if (best_cpu_is_idle && !new_cpu_is_idle)
  704. return false;
  705. if (new_cpu_is_idle && !best_cpu_is_idle)
  706. return true;
  707. if (best_cpu == prev_cpu)
  708. return false;
  709. if (cpu == prev_cpu)
  710. return true;
  711. if (best_cpu_is_idle && new_cpu_is_idle)
  712. return false;
  713. if (cpu_util(best_cpu) <= cpu_util(cpu))
  714. return false;
  715. return true;
  716. }
  717. static inline unsigned int capacity_spare_of(int cpu)
  718. {
  719. return capacity_orig_of(cpu) - cpu_util(cpu);
  720. }
  721. static DEFINE_PER_CPU(cpumask_t, energy_cpus);
  722. int walt_find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
  723. int sync, int sibling_count_hint)
  724. {
  725. unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
  726. struct root_domain *rd = cpu_rq(cpumask_first(cpu_active_mask))->rd;
  727. int weight, cpu = smp_processor_id(), best_energy_cpu = prev_cpu;
  728. struct perf_domain *pd;
  729. unsigned long cur_energy;
  730. cpumask_t *candidates;
  731. bool is_rtg, curr_is_rtg;
  732. struct find_best_target_env fbt_env;
  733. bool need_idle = wake_to_idle(p);
  734. u64 start_t = 0;
  735. int delta = 0;
  736. int task_boost = per_task_boost(p);
  737. bool uclamp_boost = walt_uclamp_boosted(p);
  738. int start_cpu = 0, order_index, end_index;
  739. int first_cpu;
  740. bool energy_eval_needed = true;
  741. struct compute_energy_output output;
  742. struct walt_task_struct *wts;
  743. int pipeline_cpu;
  744. if (walt_is_many_wakeup(sibling_count_hint) && prev_cpu != cpu &&
  745. cpumask_test_cpu(prev_cpu, p->cpus_ptr))
  746. return prev_cpu;
  747. if (unlikely(!cpu_array))
  748. return prev_cpu;
  749. /* Pre-select a set of candidate CPUs. */
  750. candidates = this_cpu_ptr(&energy_cpus);
  751. cpumask_clear(candidates);
  752. wts = (struct walt_task_struct *) p->android_vendor_data1;
  753. pipeline_cpu = wts->pipeline_cpu;
  754. if ((wts->low_latency & WALT_LOW_LATENCY_MASK) &&
  755. (pipeline_cpu != -1) &&
  756. walt_task_skip_min_cpu(p) &&
  757. cpumask_test_cpu(pipeline_cpu, p->cpus_ptr) &&
  758. cpu_active(pipeline_cpu) &&
  759. !cpu_halted(pipeline_cpu)) {
  760. if ((p == cpu_rq(pipeline_cpu)->curr) ||
  761. !walt_pipeline_low_latency_task(cpu_rq(pipeline_cpu)->curr)) {
  762. best_energy_cpu = pipeline_cpu;
  763. fbt_env.fastpath = PIPELINE_FASTPATH;
  764. goto out;
  765. }
  766. }
  767. walt_get_indicies(p, &order_index, &end_index, task_boost, uclamp_boost,
  768. &energy_eval_needed);
  769. start_cpu = cpumask_first(&cpu_array[order_index][0]);
  770. is_rtg = task_in_related_thread_group(p);
  771. curr_is_rtg = task_in_related_thread_group(cpu_rq(cpu)->curr);
  772. if (trace_sched_task_util_enabled())
  773. start_t = sched_clock();
  774. rcu_read_lock();
  775. need_idle |= uclamp_latency_sensitive(p);
  776. fbt_env.fastpath = 0;
  777. fbt_env.need_idle = need_idle;
  778. if (sync && (need_idle || (is_rtg && curr_is_rtg)))
  779. sync = 0;
  780. if (sysctl_sched_sync_hint_enable && sync
  781. && bias_to_this_cpu(p, cpu, start_cpu) && !cpu_halted(cpu)) {
  782. best_energy_cpu = cpu;
  783. fbt_env.fastpath = SYNC_WAKEUP;
  784. goto unlock;
  785. }
  786. /* if symmetrical system, default to upstream behavior */
  787. pd = rcu_dereference(rd->pd);
  788. if (!pd)
  789. goto fail;
  790. fbt_env.is_rtg = is_rtg;
  791. fbt_env.start_cpu = start_cpu;
  792. fbt_env.order_index = order_index;
  793. fbt_env.end_index = end_index;
  794. fbt_env.strict_max = is_rtg &&
  795. (task_boost == TASK_BOOST_STRICT_MAX);
  796. fbt_env.skip_cpu = walt_is_many_wakeup(sibling_count_hint) ?
  797. cpu : -1;
  798. walt_find_best_target(NULL, candidates, p, &fbt_env);
  799. /* Bail out if no candidate was found. */
  800. weight = cpumask_weight(candidates);
  801. if (!weight)
  802. goto unlock;
  803. first_cpu = cpumask_first(candidates);
  804. if (fbt_env.fastpath == CLUSTER_PACKING_FASTPATH) {
  805. best_energy_cpu = first_cpu;
  806. goto unlock;
  807. }
  808. if (weight == 1) {
  809. if (available_idle_cpu(first_cpu) || first_cpu == prev_cpu) {
  810. best_energy_cpu = first_cpu;
  811. goto unlock;
  812. }
  813. }
  814. if (need_idle && available_idle_cpu(first_cpu)) {
  815. best_energy_cpu = first_cpu;
  816. goto unlock;
  817. }
  818. if (!energy_eval_needed) {
  819. int max_spare_cpu = first_cpu;
  820. for_each_cpu(cpu, candidates) {
  821. if (capacity_spare_of(max_spare_cpu) < capacity_spare_of(cpu))
  822. max_spare_cpu = cpu;
  823. }
  824. best_energy_cpu = max_spare_cpu;
  825. goto unlock;
  826. }
  827. if (READ_ONCE(p->__state) == TASK_WAKING)
  828. delta = task_util(p);
  829. if (cpumask_test_cpu(prev_cpu, p->cpus_ptr) && !__cpu_overutilized(prev_cpu, delta)) {
  830. if (trace_sched_compute_energy_enabled()) {
  831. memset(&output, 0, sizeof(output));
  832. prev_energy = walt_compute_energy(p, prev_cpu, pd, candidates, fbt_env.prs,
  833. &output);
  834. } else {
  835. prev_energy = walt_compute_energy(p, prev_cpu, pd, candidates, fbt_env.prs,
  836. NULL);
  837. }
  838. best_energy = prev_energy;
  839. trace_sched_compute_energy(p, prev_cpu, prev_energy, 0, 0, 0, &output);
  840. } else {
  841. prev_energy = best_energy = ULONG_MAX;
  842. if (weight == 1) {
  843. best_energy_cpu = first_cpu;
  844. goto unlock;
  845. }
  846. }
  847. /* Select the best candidate energy-wise. */
  848. for_each_cpu(cpu, candidates) {
  849. if (cpu == prev_cpu)
  850. continue;
  851. if (trace_sched_compute_energy_enabled()) {
  852. memset(&output, 0, sizeof(output));
  853. cur_energy = walt_compute_energy(p, cpu, pd, candidates, fbt_env.prs,
  854. &output);
  855. } else {
  856. cur_energy = walt_compute_energy(p, cpu, pd, candidates, fbt_env.prs,
  857. NULL);
  858. }
  859. if (cur_energy < best_energy) {
  860. best_energy = cur_energy;
  861. best_energy_cpu = cpu;
  862. } else if (cur_energy == best_energy) {
  863. if (select_cpu_same_energy(cpu, best_energy_cpu,
  864. prev_cpu)) {
  865. best_energy = cur_energy;
  866. best_energy_cpu = cpu;
  867. }
  868. }
  869. trace_sched_compute_energy(p, cpu, cur_energy,
  870. prev_energy, best_energy, best_energy_cpu, &output);
  871. }
  872. /*
  873. * Pick the prev CPU, if best energy CPU can't saves at least 6% of
  874. * the energy used by prev_cpu.
  875. */
  876. if (!(available_idle_cpu(best_energy_cpu) &&
  877. walt_get_idle_exit_latency(cpu_rq(best_energy_cpu)) <= 1) &&
  878. (prev_energy != ULONG_MAX) && (best_energy_cpu != prev_cpu) &&
  879. ((prev_energy - best_energy) <= prev_energy >> 5) &&
  880. !check_for_higher_capacity(prev_cpu, start_cpu))
  881. best_energy_cpu = prev_cpu;
  882. unlock:
  883. rcu_read_unlock();
  884. out:
  885. if (best_energy_cpu < 0 || best_energy_cpu >= WALT_NR_CPUS)
  886. best_energy_cpu = prev_cpu;
  887. trace_sched_task_util(p, cpumask_bits(candidates)[0], best_energy_cpu,
  888. sync, fbt_env.need_idle, fbt_env.fastpath,
  889. start_t, uclamp_boost, start_cpu);
  890. return best_energy_cpu;
  891. fail:
  892. rcu_read_unlock();
  893. return -1;
  894. }
  895. static void
  896. walt_select_task_rq_fair(void *unused, struct task_struct *p, int prev_cpu,
  897. int sd_flag, int wake_flags, int *target_cpu)
  898. {
  899. int sync;
  900. int sibling_count_hint;
  901. if (unlikely(walt_disabled))
  902. return;
  903. sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
  904. sibling_count_hint = p->wake_q_count;
  905. p->wake_q_count = 0;
  906. *target_cpu = walt_find_energy_efficient_cpu(p, prev_cpu, sync, sibling_count_hint);
  907. }
  908. static void walt_binder_low_latency_set(void *unused, struct task_struct *task,
  909. bool sync, struct binder_proc *proc)
  910. {
  911. struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
  912. if (unlikely(walt_disabled))
  913. return;
  914. if (task && ((task_in_related_thread_group(current) &&
  915. task->group_leader->prio < MAX_RT_PRIO) ||
  916. (current->group_leader->prio < MAX_RT_PRIO &&
  917. task_in_related_thread_group(task))))
  918. wts->low_latency |= WALT_LOW_LATENCY_BINDER;
  919. else
  920. /*
  921. * Clear low_latency flag if criterion above is not met, this
  922. * will handle usecase where for a binder thread WALT_LOW_LATENCY_BINDER
  923. * is set by one task and before WALT clears this flag after timer expiry
  924. * some other task tries to use same binder thread.
  925. *
  926. * The only gets cleared when binder transaction is initiated
  927. * and the above condition to set flasg is nto satisfied.
  928. */
  929. wts->low_latency &= ~WALT_LOW_LATENCY_BINDER;
  930. }
  931. static void binder_set_priority_hook(void *data,
  932. struct binder_transaction *bndrtrans, struct task_struct *task)
  933. {
  934. struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
  935. struct walt_task_struct *current_wts =
  936. (struct walt_task_struct *) current->android_vendor_data1;
  937. if (unlikely(walt_disabled))
  938. return;
  939. if (bndrtrans && bndrtrans->need_reply && current_wts->boost == TASK_BOOST_STRICT_MAX) {
  940. bndrtrans->android_vendor_data1 = wts->boost;
  941. wts->boost = TASK_BOOST_STRICT_MAX;
  942. }
  943. }
  944. static void binder_restore_priority_hook(void *data,
  945. struct binder_transaction *bndrtrans, struct task_struct *task)
  946. {
  947. struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
  948. if (unlikely(walt_disabled))
  949. return;
  950. if (bndrtrans && wts->boost == TASK_BOOST_STRICT_MAX)
  951. wts->boost = bndrtrans->android_vendor_data1;
  952. }
  953. /*
  954. * Higher prio mvp can preempt lower prio mvp.
  955. *
  956. * However, the lower prio MVP slice will be more since we expect them to
  957. * be the work horses. For example, binders will have higher prio MVP and
  958. * they can preempt long running rtg prio tasks but binders loose their
  959. * powers with in 3 msec where as rtg prio tasks can run more than that.
  960. */
  961. int walt_get_mvp_task_prio(struct task_struct *p)
  962. {
  963. if (walt_procfs_low_latency_task(p) ||
  964. walt_pipeline_low_latency_task(p))
  965. return WALT_LL_PIPE_MVP;
  966. if (per_task_boost(p) == TASK_BOOST_STRICT_MAX)
  967. return WALT_TASK_BOOST_MVP;
  968. if (walt_binder_low_latency_task(p))
  969. return WALT_BINDER_MVP;
  970. if (task_rtg_high_prio(p))
  971. return WALT_RTG_MVP;
  972. return WALT_NOT_MVP;
  973. }
  974. static inline unsigned int walt_cfs_mvp_task_limit(struct task_struct *p)
  975. {
  976. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  977. /* Binder MVP tasks are high prio but have only single slice */
  978. if (wts->mvp_prio == WALT_BINDER_MVP)
  979. return WALT_MVP_SLICE;
  980. return WALT_MVP_LIMIT;
  981. }
  982. static void walt_cfs_insert_mvp_task(struct walt_rq *wrq, struct walt_task_struct *wts,
  983. bool at_front)
  984. {
  985. struct list_head *pos;
  986. list_for_each(pos, &wrq->mvp_tasks) {
  987. struct walt_task_struct *tmp_wts = container_of(pos, struct walt_task_struct,
  988. mvp_list);
  989. if (at_front) {
  990. if (wts->mvp_prio >= tmp_wts->mvp_prio)
  991. break;
  992. } else {
  993. if (wts->mvp_prio > tmp_wts->mvp_prio)
  994. break;
  995. }
  996. }
  997. list_add(&wts->mvp_list, pos->prev);
  998. wrq->num_mvp_tasks++;
  999. }
  1000. void walt_cfs_deactivate_mvp_task(struct rq *rq, struct task_struct *p)
  1001. {
  1002. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1003. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  1004. list_del_init(&wts->mvp_list);
  1005. wts->mvp_prio = WALT_NOT_MVP;
  1006. wrq->num_mvp_tasks--;
  1007. }
  1008. /*
  1009. * MVP task runtime update happens here. Three possibilities:
  1010. *
  1011. * de-activated: The MVP consumed its runtime. Non MVP can preempt.
  1012. * slice expired: MVP slice is expired and other MVP can preempt.
  1013. * slice not expired: This MVP task can continue to run.
  1014. */
  1015. #define MAX_MVP_TIME_NS 500000000ULL
  1016. #define MVP_THROTTLE_TIME_NS 100000000ULL
  1017. static void walt_cfs_account_mvp_runtime(struct rq *rq, struct task_struct *curr)
  1018. {
  1019. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1020. struct walt_task_struct *wts = (struct walt_task_struct *) curr->android_vendor_data1;
  1021. u64 slice;
  1022. unsigned int limit;
  1023. walt_lockdep_assert_rq(rq, NULL);
  1024. /*
  1025. * RQ clock update happens in tick path in the scheduler.
  1026. * Since we drop the lock in the scheduler before calling
  1027. * into vendor hook, it is possible that update flags are
  1028. * reset by another rq lock and unlock. Do the update here
  1029. * if required.
  1030. */
  1031. if (!(rq->clock_update_flags & RQCF_UPDATED))
  1032. update_rq_clock(rq);
  1033. if (wrq->mvp_throttle_time) {
  1034. if ((rq->clock - wrq->mvp_throttle_time) > MVP_THROTTLE_TIME_NS) {
  1035. wrq->skip_mvp = false;
  1036. wrq->mvp_throttle_time = 0;
  1037. }
  1038. } else if (wrq->mvp_arrival_time) {
  1039. if ((rq->clock - wrq->mvp_arrival_time) > MAX_MVP_TIME_NS) {
  1040. wrq->skip_mvp = true;
  1041. wrq->mvp_arrival_time = 0;
  1042. wrq->mvp_throttle_time = rq->clock;
  1043. }
  1044. }
  1045. /*
  1046. * continue accounting even in skip_mvp state if a MVP task is selected
  1047. * by scheduler core to run on CPU.
  1048. */
  1049. if (curr->se.sum_exec_runtime > wts->sum_exec_snapshot_for_total)
  1050. wts->total_exec = curr->se.sum_exec_runtime - wts->sum_exec_snapshot_for_total;
  1051. else
  1052. wts->total_exec = 0;
  1053. if (curr->se.sum_exec_runtime > wts->sum_exec_snapshot_for_slice)
  1054. slice = curr->se.sum_exec_runtime - wts->sum_exec_snapshot_for_slice;
  1055. else
  1056. slice = 0;
  1057. /* slice is not expired */
  1058. if (slice < WALT_MVP_SLICE)
  1059. return;
  1060. wts->sum_exec_snapshot_for_slice = curr->se.sum_exec_runtime;
  1061. /*
  1062. * slice is expired, check if we have to deactivate the
  1063. * MVP task, otherwise requeue the task in the list so
  1064. * that other MVP tasks gets a chance.
  1065. */
  1066. limit = walt_cfs_mvp_task_limit(curr);
  1067. if (wts->total_exec > limit) {
  1068. walt_cfs_deactivate_mvp_task(rq, curr);
  1069. trace_walt_cfs_deactivate_mvp_task(curr, wts, limit);
  1070. return;
  1071. }
  1072. if (wrq->num_mvp_tasks == 1)
  1073. return;
  1074. /* slice expired. re-queue the task */
  1075. list_del(&wts->mvp_list);
  1076. wrq->num_mvp_tasks--;
  1077. walt_cfs_insert_mvp_task(wrq, wts, false);
  1078. }
  1079. void walt_cfs_enqueue_task(struct rq *rq, struct task_struct *p)
  1080. {
  1081. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1082. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  1083. int mvp_prio = walt_get_mvp_task_prio(p);
  1084. if (mvp_prio == WALT_NOT_MVP)
  1085. return;
  1086. /*
  1087. * This can happen during migration or enq/deq for prio/class change.
  1088. * it was once MVP but got demoted, it will not be MVP until
  1089. * it goes to sleep again.
  1090. */
  1091. if (wts->total_exec > walt_cfs_mvp_task_limit(p))
  1092. return;
  1093. wts->mvp_prio = mvp_prio;
  1094. walt_cfs_insert_mvp_task(wrq, wts, task_on_cpu(rq, p));
  1095. /*
  1096. * We inserted the task at the appropriate position. Take the
  1097. * task runtime snapshot. From now onwards we use this point as a
  1098. * baseline to enforce the slice and demotion.
  1099. */
  1100. if (!wts->total_exec) /* queue after sleep */ {
  1101. wts->sum_exec_snapshot_for_total = p->se.sum_exec_runtime;
  1102. wts->sum_exec_snapshot_for_slice = p->se.sum_exec_runtime;
  1103. }
  1104. }
  1105. void walt_cfs_dequeue_task(struct rq *rq, struct task_struct *p)
  1106. {
  1107. struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
  1108. if (!list_empty(&wts->mvp_list) && wts->mvp_list.next)
  1109. walt_cfs_deactivate_mvp_task(rq, p);
  1110. /*
  1111. * Reset the exec time during sleep so that it starts
  1112. * from scratch upon next wakeup. total_exec should
  1113. * be preserved when task is enq/deq while it is on
  1114. * runqueue.
  1115. */
  1116. if (READ_ONCE(p->__state) != TASK_RUNNING)
  1117. wts->total_exec = 0;
  1118. }
  1119. void walt_cfs_tick(struct rq *rq)
  1120. {
  1121. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1122. struct walt_task_struct *wts = (struct walt_task_struct *) rq->curr->android_vendor_data1;
  1123. bool skip_mvp;
  1124. if (unlikely(walt_disabled))
  1125. return;
  1126. raw_spin_lock(&rq->__lock);
  1127. if (list_empty(&wts->mvp_list) || (wts->mvp_list.next == NULL))
  1128. goto out;
  1129. /* Reschedule if RQ's skip_mvp state changes */
  1130. skip_mvp = wrq->skip_mvp;
  1131. walt_cfs_account_mvp_runtime(rq, rq->curr);
  1132. /*
  1133. * If the current is not MVP means, we have to re-schedule to
  1134. * see if we can run any other task including MVP tasks.
  1135. */
  1136. if (((skip_mvp != wrq->skip_mvp) ||
  1137. (wrq->mvp_tasks.next != &wts->mvp_list)) && rq->cfs.h_nr_running > 1)
  1138. resched_curr(rq);
  1139. out:
  1140. raw_spin_unlock(&rq->__lock);
  1141. }
  1142. /*
  1143. * When preempt = false and nopreempt = false, we leave the preemption
  1144. * decision to CFS.
  1145. */
  1146. static void walt_cfs_check_preempt_wakeup(void *unused, struct rq *rq, struct task_struct *p,
  1147. bool *preempt, bool *nopreempt, int wake_flags,
  1148. struct sched_entity *se, struct sched_entity *pse,
  1149. int next_buddy_marked, unsigned int granularity)
  1150. {
  1151. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1152. struct walt_task_struct *wts_p = (struct walt_task_struct *) p->android_vendor_data1;
  1153. struct task_struct *c = rq->curr;
  1154. struct walt_task_struct *wts_c = (struct walt_task_struct *) rq->curr->android_vendor_data1;
  1155. bool resched = false, skip_mvp;
  1156. bool p_is_mvp, curr_is_mvp;
  1157. if (unlikely(walt_disabled))
  1158. return;
  1159. p_is_mvp = !list_empty(&wts_p->mvp_list) && wts_p->mvp_list.next;
  1160. curr_is_mvp = !list_empty(&wts_c->mvp_list) && wts_c->mvp_list.next;
  1161. /*
  1162. * current is not MVP, so preemption decision
  1163. * is simple.
  1164. */
  1165. if (!curr_is_mvp) {
  1166. if (p_is_mvp && !wrq->skip_mvp)
  1167. goto preempt;
  1168. return; /* CFS decides preemption */
  1169. }
  1170. /*
  1171. * current is MVP. update its runtime before deciding the
  1172. * preemption.
  1173. */
  1174. skip_mvp = wrq->skip_mvp;
  1175. walt_cfs_account_mvp_runtime(rq, c);
  1176. resched = (skip_mvp != wrq->skip_mvp) || (wrq->mvp_tasks.next != &wts_c->mvp_list);
  1177. /*
  1178. * current is no longer eligible to run. It must have been
  1179. * picked (because of MVP) ahead of other tasks in the CFS
  1180. * tree, so drive preemption to pick up the next task from
  1181. * the tree, which also includes picking up the first in
  1182. * the MVP queue.
  1183. */
  1184. if (resched)
  1185. goto preempt;
  1186. /* current is the first in the queue, so no preemption */
  1187. *nopreempt = true;
  1188. trace_walt_cfs_mvp_wakeup_nopreempt(c, wts_c, walt_cfs_mvp_task_limit(c));
  1189. return;
  1190. preempt:
  1191. *preempt = true;
  1192. trace_walt_cfs_mvp_wakeup_preempt(p, wts_p, walt_cfs_mvp_task_limit(p));
  1193. }
  1194. #ifdef CONFIG_FAIR_GROUP_SCHED
  1195. /* Walk up scheduling entities hierarchy */
  1196. #define for_each_sched_entity(se) \
  1197. for (; se; se = se->parent)
  1198. #else /* !CONFIG_FAIR_GROUP_SCHED */
  1199. #define for_each_sched_entity(se) \
  1200. for (; se; se = NULL)
  1201. #endif
  1202. extern void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se);
  1203. static void walt_cfs_replace_next_task_fair(void *unused, struct rq *rq, struct task_struct **p,
  1204. struct sched_entity **se, bool *repick, bool simple,
  1205. struct task_struct *prev)
  1206. {
  1207. struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
  1208. struct walt_task_struct *wts;
  1209. struct task_struct *mvp;
  1210. struct cfs_rq *cfs_rq;
  1211. if (unlikely(walt_disabled))
  1212. return;
  1213. if ((*p) && (*p) != prev && ((*p)->on_cpu == 1 || (*p)->on_rq == 0 ||
  1214. (*p)->on_rq == TASK_ON_RQ_MIGRATING ||
  1215. task_thread_info(*p)->cpu != cpu_of(rq)))
  1216. WALT_BUG(WALT_BUG_UPSTREAM, *p,
  1217. "picked %s(%d) on_cpu=%d on_rq=%d p->cpu=%d cpu_of(rq)=%d kthread=%d\n",
  1218. (*p)->comm, (*p)->pid, (*p)->on_cpu,
  1219. (*p)->on_rq, task_thread_info(*p)->cpu,
  1220. cpu_of(rq), ((*p)->flags & PF_KTHREAD));
  1221. /* RQ is in MVP throttled state*/
  1222. if (wrq->skip_mvp)
  1223. return;
  1224. if (list_empty(&wrq->mvp_tasks)) {
  1225. wrq->mvp_arrival_time = 0;
  1226. return;
  1227. }
  1228. /* Return the first task from MVP queue */
  1229. wts = list_first_entry(&wrq->mvp_tasks, struct walt_task_struct, mvp_list);
  1230. mvp = wts_to_ts(wts);
  1231. *p = mvp;
  1232. *se = &mvp->se;
  1233. *repick = true;
  1234. /* TODO: check with team if it is fine in case clock is not updated */
  1235. /* Mark arrival of MVP task */
  1236. if (!wrq->mvp_arrival_time)
  1237. wrq->mvp_arrival_time = rq->clock;
  1238. if (simple) {
  1239. for_each_sched_entity((*se)) {
  1240. /*
  1241. * TODO If CFS_BANDWIDTH is enabled, we might pick
  1242. * from a throttled cfs_rq
  1243. */
  1244. cfs_rq = cfs_rq_of(*se);
  1245. set_next_entity(cfs_rq, *se);
  1246. }
  1247. }
  1248. if ((*p) && (*p) != prev && ((*p)->on_cpu == 1 || (*p)->on_rq == 0 ||
  1249. (*p)->on_rq == TASK_ON_RQ_MIGRATING ||
  1250. task_thread_info(*p)->cpu != cpu_of(rq)))
  1251. WALT_BUG(WALT_BUG_UPSTREAM, *p,
  1252. "picked %s(%d) on_cpu=%d on_rq=%d p->cpu=%d cpu_of(rq)=%d kthread=%d\n",
  1253. (*p)->comm, (*p)->pid, (*p)->on_cpu,
  1254. (*p)->on_rq, task_thread_info(*p)->cpu,
  1255. cpu_of(rq), ((*p)->flags & PF_KTHREAD));
  1256. trace_walt_cfs_mvp_pick_next(mvp, wts, walt_cfs_mvp_task_limit(mvp));
  1257. }
  1258. void walt_cfs_init(void)
  1259. {
  1260. register_trace_android_rvh_select_task_rq_fair(walt_select_task_rq_fair, NULL);
  1261. register_trace_android_vh_binder_wakeup_ilocked(walt_binder_low_latency_set, NULL);
  1262. register_trace_android_vh_binder_set_priority(binder_set_priority_hook, NULL);
  1263. register_trace_android_vh_binder_restore_priority(binder_restore_priority_hook, NULL);
  1264. register_trace_android_rvh_check_preempt_wakeup(walt_cfs_check_preempt_wakeup, NULL);
  1265. register_trace_android_rvh_replace_next_task_fair(walt_cfs_replace_next_task_fair, NULL);
  1266. }