cpufreq_schedutil.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * CPUFreq governor based on scheduler-provided CPU utilization data.
  4. *
  5. * Copyright (C) 2016, Intel Corporation
  6. * Author: Rafael J. Wysocki <[email protected]>
  7. */
  8. #include <trace/hooks/sched.h>
  9. #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
  10. struct sugov_tunables {
  11. struct gov_attr_set attr_set;
  12. unsigned int rate_limit_us;
  13. };
  14. struct sugov_policy {
  15. struct cpufreq_policy *policy;
  16. struct sugov_tunables *tunables;
  17. struct list_head tunables_hook;
  18. raw_spinlock_t update_lock;
  19. u64 last_freq_update_time;
  20. s64 freq_update_delay_ns;
  21. unsigned int next_freq;
  22. unsigned int cached_raw_freq;
  23. /* The next fields are only needed if fast switch cannot be used: */
  24. struct irq_work irq_work;
  25. struct kthread_work work;
  26. struct mutex work_lock;
  27. struct kthread_worker worker;
  28. struct task_struct *thread;
  29. bool work_in_progress;
  30. bool limits_changed;
  31. bool need_freq_update;
  32. };
  33. struct sugov_cpu {
  34. struct update_util_data update_util;
  35. struct sugov_policy *sg_policy;
  36. unsigned int cpu;
  37. bool iowait_boost_pending;
  38. unsigned int iowait_boost;
  39. u64 last_update;
  40. unsigned long util;
  41. unsigned long bw_dl;
  42. unsigned long max;
  43. /* The field below is for single-CPU policies only: */
  44. #ifdef CONFIG_NO_HZ_COMMON
  45. unsigned long saved_idle_calls;
  46. #endif
  47. };
  48. static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
  49. /************************ Governor internals ***********************/
  50. static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
  51. {
  52. s64 delta_ns;
  53. /*
  54. * Since cpufreq_update_util() is called with rq->lock held for
  55. * the @target_cpu, our per-CPU data is fully serialized.
  56. *
  57. * However, drivers cannot in general deal with cross-CPU
  58. * requests, so while get_next_freq() will work, our
  59. * sugov_update_commit() call may not for the fast switching platforms.
  60. *
  61. * Hence stop here for remote requests if they aren't supported
  62. * by the hardware, as calculating the frequency is pointless if
  63. * we cannot in fact act on it.
  64. *
  65. * This is needed on the slow switching platforms too to prevent CPUs
  66. * going offline from leaving stale IRQ work items behind.
  67. */
  68. if (!cpufreq_this_cpu_can_update(sg_policy->policy))
  69. return false;
  70. if (unlikely(sg_policy->limits_changed)) {
  71. sg_policy->limits_changed = false;
  72. sg_policy->need_freq_update = true;
  73. return true;
  74. }
  75. delta_ns = time - sg_policy->last_freq_update_time;
  76. return delta_ns >= sg_policy->freq_update_delay_ns;
  77. }
  78. static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
  79. unsigned int next_freq)
  80. {
  81. if (sg_policy->need_freq_update)
  82. sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
  83. else if (sg_policy->next_freq == next_freq)
  84. return false;
  85. sg_policy->next_freq = next_freq;
  86. sg_policy->last_freq_update_time = time;
  87. return true;
  88. }
  89. static void sugov_deferred_update(struct sugov_policy *sg_policy)
  90. {
  91. if (!sg_policy->work_in_progress) {
  92. sg_policy->work_in_progress = true;
  93. irq_work_queue(&sg_policy->irq_work);
  94. }
  95. }
  96. /**
  97. * get_next_freq - Compute a new frequency for a given cpufreq policy.
  98. * @sg_policy: schedutil policy object to compute the new frequency for.
  99. * @util: Current CPU utilization.
  100. * @max: CPU capacity.
  101. *
  102. * If the utilization is frequency-invariant, choose the new frequency to be
  103. * proportional to it, that is
  104. *
  105. * next_freq = C * max_freq * util / max
  106. *
  107. * Otherwise, approximate the would-be frequency-invariant utilization by
  108. * util_raw * (curr_freq / max_freq) which leads to
  109. *
  110. * next_freq = C * curr_freq * util_raw / max
  111. *
  112. * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
  113. *
  114. * The lowest driver-supported frequency which is equal or greater than the raw
  115. * next_freq (as calculated above) is returned, subject to policy min/max and
  116. * cpufreq driver limitations.
  117. */
  118. static unsigned int get_next_freq(struct sugov_policy *sg_policy,
  119. unsigned long util, unsigned long max)
  120. {
  121. struct cpufreq_policy *policy = sg_policy->policy;
  122. unsigned int freq = arch_scale_freq_invariant() ?
  123. policy->cpuinfo.max_freq : policy->cur;
  124. unsigned long next_freq = 0;
  125. util = map_util_perf(util);
  126. trace_android_vh_map_util_freq(util, freq, max, &next_freq, policy,
  127. &sg_policy->need_freq_update);
  128. if (next_freq)
  129. freq = next_freq;
  130. else
  131. freq = map_util_freq(util, freq, max);
  132. if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
  133. return sg_policy->next_freq;
  134. sg_policy->cached_raw_freq = freq;
  135. return cpufreq_driver_resolve_freq(policy, freq);
  136. }
  137. static void sugov_get_util(struct sugov_cpu *sg_cpu)
  138. {
  139. struct rq *rq = cpu_rq(sg_cpu->cpu);
  140. sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
  141. sg_cpu->bw_dl = cpu_bw_dl(rq);
  142. sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
  143. FREQUENCY_UTIL, NULL);
  144. }
  145. /**
  146. * sugov_iowait_reset() - Reset the IO boost status of a CPU.
  147. * @sg_cpu: the sugov data for the CPU to boost
  148. * @time: the update time from the caller
  149. * @set_iowait_boost: true if an IO boost has been requested
  150. *
  151. * The IO wait boost of a task is disabled after a tick since the last update
  152. * of a CPU. If a new IO wait boost is requested after more then a tick, then
  153. * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
  154. * efficiency by ignoring sporadic wakeups from IO.
  155. */
  156. static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
  157. bool set_iowait_boost)
  158. {
  159. s64 delta_ns = time - sg_cpu->last_update;
  160. /* Reset boost only if a tick has elapsed since last request */
  161. if (delta_ns <= TICK_NSEC)
  162. return false;
  163. sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
  164. sg_cpu->iowait_boost_pending = set_iowait_boost;
  165. return true;
  166. }
  167. /**
  168. * sugov_iowait_boost() - Updates the IO boost status of a CPU.
  169. * @sg_cpu: the sugov data for the CPU to boost
  170. * @time: the update time from the caller
  171. * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
  172. *
  173. * Each time a task wakes up after an IO operation, the CPU utilization can be
  174. * boosted to a certain utilization which doubles at each "frequent and
  175. * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
  176. * of the maximum OPP.
  177. *
  178. * To keep doubling, an IO boost has to be requested at least once per tick,
  179. * otherwise we restart from the utilization of the minimum OPP.
  180. */
  181. static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  182. unsigned int flags)
  183. {
  184. bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
  185. /* Reset boost if the CPU appears to have been idle enough */
  186. if (sg_cpu->iowait_boost &&
  187. sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
  188. return;
  189. /* Boost only tasks waking up after IO */
  190. if (!set_iowait_boost)
  191. return;
  192. /* Ensure boost doubles only one time at each request */
  193. if (sg_cpu->iowait_boost_pending)
  194. return;
  195. sg_cpu->iowait_boost_pending = true;
  196. /* Double the boost at each request */
  197. if (sg_cpu->iowait_boost) {
  198. sg_cpu->iowait_boost =
  199. min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
  200. return;
  201. }
  202. /* First wakeup after IO: start with minimum boost */
  203. sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
  204. }
  205. /**
  206. * sugov_iowait_apply() - Apply the IO boost to a CPU.
  207. * @sg_cpu: the sugov data for the cpu to boost
  208. * @time: the update time from the caller
  209. *
  210. * A CPU running a task which woken up after an IO operation can have its
  211. * utilization boosted to speed up the completion of those IO operations.
  212. * The IO boost value is increased each time a task wakes up from IO, in
  213. * sugov_iowait_apply(), and it's instead decreased by this function,
  214. * each time an increase has not been requested (!iowait_boost_pending).
  215. *
  216. * A CPU which also appears to have been idle for at least one tick has also
  217. * its IO boost utilization reset.
  218. *
  219. * This mechanism is designed to boost high frequently IO waiting tasks, while
  220. * being more conservative on tasks which does sporadic IO operations.
  221. */
  222. static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
  223. {
  224. unsigned long boost;
  225. /* No boost currently required */
  226. if (!sg_cpu->iowait_boost)
  227. return;
  228. /* Reset boost if the CPU appears to have been idle enough */
  229. if (sugov_iowait_reset(sg_cpu, time, false))
  230. return;
  231. if (!sg_cpu->iowait_boost_pending) {
  232. /*
  233. * No boost pending; reduce the boost value.
  234. */
  235. sg_cpu->iowait_boost >>= 1;
  236. if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
  237. sg_cpu->iowait_boost = 0;
  238. return;
  239. }
  240. }
  241. sg_cpu->iowait_boost_pending = false;
  242. /*
  243. * sg_cpu->util is already in capacity scale; convert iowait_boost
  244. * into the same scale so we can compare.
  245. */
  246. boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
  247. boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
  248. if (sg_cpu->util < boost)
  249. sg_cpu->util = boost;
  250. }
  251. #ifdef CONFIG_NO_HZ_COMMON
  252. static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
  253. {
  254. unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
  255. bool ret = idle_calls == sg_cpu->saved_idle_calls;
  256. sg_cpu->saved_idle_calls = idle_calls;
  257. return ret;
  258. }
  259. #else
  260. static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
  261. #endif /* CONFIG_NO_HZ_COMMON */
  262. /*
  263. * Make sugov_should_update_freq() ignore the rate limit when DL
  264. * has increased the utilization.
  265. */
  266. static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
  267. {
  268. if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
  269. sg_cpu->sg_policy->limits_changed = true;
  270. }
  271. static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
  272. u64 time, unsigned int flags)
  273. {
  274. sugov_iowait_boost(sg_cpu, time, flags);
  275. sg_cpu->last_update = time;
  276. ignore_dl_rate_limit(sg_cpu);
  277. if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
  278. return false;
  279. sugov_get_util(sg_cpu);
  280. sugov_iowait_apply(sg_cpu, time);
  281. return true;
  282. }
  283. static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
  284. unsigned int flags)
  285. {
  286. struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
  287. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  288. unsigned int cached_freq = sg_policy->cached_raw_freq;
  289. unsigned int next_f;
  290. if (!sugov_update_single_common(sg_cpu, time, flags))
  291. return;
  292. next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
  293. /*
  294. * Do not reduce the frequency if the CPU has not been idle
  295. * recently, as the reduction is likely to be premature then.
  296. *
  297. * Except when the rq is capped by uclamp_max.
  298. */
  299. if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
  300. sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq &&
  301. !sg_policy->need_freq_update) {
  302. next_f = sg_policy->next_freq;
  303. /* Restore cached freq as next_freq has changed */
  304. sg_policy->cached_raw_freq = cached_freq;
  305. }
  306. if (!sugov_update_next_freq(sg_policy, time, next_f))
  307. return;
  308. /*
  309. * This code runs under rq->lock for the target CPU, so it won't run
  310. * concurrently on two different CPUs for the same target and it is not
  311. * necessary to acquire the lock in the fast switch case.
  312. */
  313. if (sg_policy->policy->fast_switch_enabled) {
  314. cpufreq_driver_fast_switch(sg_policy->policy, next_f);
  315. } else {
  316. raw_spin_lock(&sg_policy->update_lock);
  317. sugov_deferred_update(sg_policy);
  318. raw_spin_unlock(&sg_policy->update_lock);
  319. }
  320. }
  321. static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
  322. unsigned int flags)
  323. {
  324. struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
  325. unsigned long prev_util = sg_cpu->util;
  326. /*
  327. * Fall back to the "frequency" path if frequency invariance is not
  328. * supported, because the direct mapping between the utilization and
  329. * the performance levels depends on the frequency invariance.
  330. */
  331. if (!arch_scale_freq_invariant()) {
  332. sugov_update_single_freq(hook, time, flags);
  333. return;
  334. }
  335. if (!sugov_update_single_common(sg_cpu, time, flags))
  336. return;
  337. /*
  338. * Do not reduce the target performance level if the CPU has not been
  339. * idle recently, as the reduction is likely to be premature then.
  340. *
  341. * Except when the rq is capped by uclamp_max.
  342. */
  343. if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
  344. sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
  345. sg_cpu->util = prev_util;
  346. cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
  347. map_util_perf(sg_cpu->util), sg_cpu->max);
  348. sg_cpu->sg_policy->last_freq_update_time = time;
  349. }
  350. static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
  351. {
  352. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  353. struct cpufreq_policy *policy = sg_policy->policy;
  354. unsigned long util = 0, max = 1;
  355. unsigned int j;
  356. for_each_cpu(j, policy->cpus) {
  357. struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
  358. unsigned long j_util, j_max;
  359. sugov_get_util(j_sg_cpu);
  360. sugov_iowait_apply(j_sg_cpu, time);
  361. j_util = j_sg_cpu->util;
  362. j_max = j_sg_cpu->max;
  363. if (j_util * max > j_max * util) {
  364. util = j_util;
  365. max = j_max;
  366. }
  367. }
  368. return get_next_freq(sg_policy, util, max);
  369. }
  370. static void
  371. sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
  372. {
  373. struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
  374. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  375. unsigned int next_f;
  376. raw_spin_lock(&sg_policy->update_lock);
  377. sugov_iowait_boost(sg_cpu, time, flags);
  378. sg_cpu->last_update = time;
  379. ignore_dl_rate_limit(sg_cpu);
  380. if (sugov_should_update_freq(sg_policy, time)) {
  381. next_f = sugov_next_freq_shared(sg_cpu, time);
  382. if (!sugov_update_next_freq(sg_policy, time, next_f))
  383. goto unlock;
  384. if (sg_policy->policy->fast_switch_enabled)
  385. cpufreq_driver_fast_switch(sg_policy->policy, next_f);
  386. else
  387. sugov_deferred_update(sg_policy);
  388. }
  389. unlock:
  390. raw_spin_unlock(&sg_policy->update_lock);
  391. }
  392. static void sugov_work(struct kthread_work *work)
  393. {
  394. struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
  395. unsigned int freq;
  396. unsigned long flags;
  397. /*
  398. * Hold sg_policy->update_lock shortly to handle the case where:
  399. * in case sg_policy->next_freq is read here, and then updated by
  400. * sugov_deferred_update() just before work_in_progress is set to false
  401. * here, we may miss queueing the new update.
  402. *
  403. * Note: If a work was queued after the update_lock is released,
  404. * sugov_work() will just be called again by kthread_work code; and the
  405. * request will be proceed before the sugov thread sleeps.
  406. */
  407. raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
  408. freq = sg_policy->next_freq;
  409. sg_policy->work_in_progress = false;
  410. raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
  411. mutex_lock(&sg_policy->work_lock);
  412. __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L);
  413. mutex_unlock(&sg_policy->work_lock);
  414. }
  415. static void sugov_irq_work(struct irq_work *irq_work)
  416. {
  417. struct sugov_policy *sg_policy;
  418. sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
  419. kthread_queue_work(&sg_policy->worker, &sg_policy->work);
  420. }
  421. /************************** sysfs interface ************************/
  422. static struct sugov_tunables *global_tunables;
  423. static DEFINE_MUTEX(global_tunables_lock);
  424. static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
  425. {
  426. return container_of(attr_set, struct sugov_tunables, attr_set);
  427. }
  428. static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
  429. {
  430. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  431. return sprintf(buf, "%u\n", tunables->rate_limit_us);
  432. }
  433. static ssize_t
  434. rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
  435. {
  436. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  437. struct sugov_policy *sg_policy;
  438. unsigned int rate_limit_us;
  439. if (kstrtouint(buf, 10, &rate_limit_us))
  440. return -EINVAL;
  441. tunables->rate_limit_us = rate_limit_us;
  442. list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
  443. sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
  444. return count;
  445. }
  446. static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
  447. static struct attribute *sugov_attrs[] = {
  448. &rate_limit_us.attr,
  449. NULL
  450. };
  451. ATTRIBUTE_GROUPS(sugov);
  452. static void sugov_tunables_free(struct kobject *kobj)
  453. {
  454. struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
  455. kfree(to_sugov_tunables(attr_set));
  456. }
  457. static struct kobj_type sugov_tunables_ktype = {
  458. .default_groups = sugov_groups,
  459. .sysfs_ops = &governor_sysfs_ops,
  460. .release = &sugov_tunables_free,
  461. };
  462. /********************** cpufreq governor interface *********************/
  463. struct cpufreq_governor schedutil_gov;
  464. static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
  465. {
  466. struct sugov_policy *sg_policy;
  467. sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
  468. if (!sg_policy)
  469. return NULL;
  470. sg_policy->policy = policy;
  471. raw_spin_lock_init(&sg_policy->update_lock);
  472. return sg_policy;
  473. }
  474. static void sugov_policy_free(struct sugov_policy *sg_policy)
  475. {
  476. kfree(sg_policy);
  477. }
  478. static int sugov_kthread_create(struct sugov_policy *sg_policy)
  479. {
  480. struct task_struct *thread;
  481. struct sched_attr attr = {
  482. .size = sizeof(struct sched_attr),
  483. .sched_policy = SCHED_DEADLINE,
  484. .sched_flags = SCHED_FLAG_SUGOV,
  485. .sched_nice = 0,
  486. .sched_priority = 0,
  487. /*
  488. * Fake (unused) bandwidth; workaround to "fix"
  489. * priority inheritance.
  490. */
  491. .sched_runtime = 1000000,
  492. .sched_deadline = 10000000,
  493. .sched_period = 10000000,
  494. };
  495. struct cpufreq_policy *policy = sg_policy->policy;
  496. int ret;
  497. /* kthread only required for slow path */
  498. if (policy->fast_switch_enabled)
  499. return 0;
  500. trace_android_vh_set_sugov_sched_attr(&attr);
  501. kthread_init_work(&sg_policy->work, sugov_work);
  502. kthread_init_worker(&sg_policy->worker);
  503. thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
  504. "sugov:%d",
  505. cpumask_first(policy->related_cpus));
  506. if (IS_ERR(thread)) {
  507. pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
  508. return PTR_ERR(thread);
  509. }
  510. ret = sched_setattr_nocheck(thread, &attr);
  511. if (ret) {
  512. kthread_stop(thread);
  513. pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
  514. return ret;
  515. }
  516. sg_policy->thread = thread;
  517. kthread_bind_mask(thread, policy->related_cpus);
  518. init_irq_work(&sg_policy->irq_work, sugov_irq_work);
  519. mutex_init(&sg_policy->work_lock);
  520. wake_up_process(thread);
  521. return 0;
  522. }
  523. static void sugov_kthread_stop(struct sugov_policy *sg_policy)
  524. {
  525. /* kthread only required for slow path */
  526. if (sg_policy->policy->fast_switch_enabled)
  527. return;
  528. kthread_flush_worker(&sg_policy->worker);
  529. kthread_stop(sg_policy->thread);
  530. mutex_destroy(&sg_policy->work_lock);
  531. }
  532. static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
  533. {
  534. struct sugov_tunables *tunables;
  535. tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
  536. if (tunables) {
  537. gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
  538. if (!have_governor_per_policy())
  539. global_tunables = tunables;
  540. }
  541. return tunables;
  542. }
  543. static void sugov_clear_global_tunables(void)
  544. {
  545. if (!have_governor_per_policy())
  546. global_tunables = NULL;
  547. }
  548. static int sugov_init(struct cpufreq_policy *policy)
  549. {
  550. struct sugov_policy *sg_policy;
  551. struct sugov_tunables *tunables;
  552. int ret = 0;
  553. /* State should be equivalent to EXIT */
  554. if (policy->governor_data)
  555. return -EBUSY;
  556. cpufreq_enable_fast_switch(policy);
  557. sg_policy = sugov_policy_alloc(policy);
  558. if (!sg_policy) {
  559. ret = -ENOMEM;
  560. goto disable_fast_switch;
  561. }
  562. ret = sugov_kthread_create(sg_policy);
  563. if (ret)
  564. goto free_sg_policy;
  565. mutex_lock(&global_tunables_lock);
  566. if (global_tunables) {
  567. if (WARN_ON(have_governor_per_policy())) {
  568. ret = -EINVAL;
  569. goto stop_kthread;
  570. }
  571. policy->governor_data = sg_policy;
  572. sg_policy->tunables = global_tunables;
  573. gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
  574. goto out;
  575. }
  576. tunables = sugov_tunables_alloc(sg_policy);
  577. if (!tunables) {
  578. ret = -ENOMEM;
  579. goto stop_kthread;
  580. }
  581. tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
  582. policy->governor_data = sg_policy;
  583. sg_policy->tunables = tunables;
  584. ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
  585. get_governor_parent_kobj(policy), "%s",
  586. schedutil_gov.name);
  587. if (ret)
  588. goto fail;
  589. out:
  590. mutex_unlock(&global_tunables_lock);
  591. return 0;
  592. fail:
  593. kobject_put(&tunables->attr_set.kobj);
  594. policy->governor_data = NULL;
  595. sugov_clear_global_tunables();
  596. stop_kthread:
  597. sugov_kthread_stop(sg_policy);
  598. mutex_unlock(&global_tunables_lock);
  599. free_sg_policy:
  600. sugov_policy_free(sg_policy);
  601. disable_fast_switch:
  602. cpufreq_disable_fast_switch(policy);
  603. pr_err("initialization failed (error %d)\n", ret);
  604. return ret;
  605. }
  606. static void sugov_exit(struct cpufreq_policy *policy)
  607. {
  608. struct sugov_policy *sg_policy = policy->governor_data;
  609. struct sugov_tunables *tunables = sg_policy->tunables;
  610. unsigned int count;
  611. mutex_lock(&global_tunables_lock);
  612. count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
  613. policy->governor_data = NULL;
  614. if (!count)
  615. sugov_clear_global_tunables();
  616. mutex_unlock(&global_tunables_lock);
  617. sugov_kthread_stop(sg_policy);
  618. sugov_policy_free(sg_policy);
  619. cpufreq_disable_fast_switch(policy);
  620. }
  621. static int sugov_start(struct cpufreq_policy *policy)
  622. {
  623. struct sugov_policy *sg_policy = policy->governor_data;
  624. void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
  625. unsigned int cpu;
  626. sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
  627. sg_policy->last_freq_update_time = 0;
  628. sg_policy->next_freq = 0;
  629. sg_policy->work_in_progress = false;
  630. sg_policy->limits_changed = false;
  631. sg_policy->cached_raw_freq = 0;
  632. sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
  633. for_each_cpu(cpu, policy->cpus) {
  634. struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
  635. memset(sg_cpu, 0, sizeof(*sg_cpu));
  636. sg_cpu->cpu = cpu;
  637. sg_cpu->sg_policy = sg_policy;
  638. }
  639. if (policy_is_shared(policy))
  640. uu = sugov_update_shared;
  641. else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
  642. uu = sugov_update_single_perf;
  643. else
  644. uu = sugov_update_single_freq;
  645. for_each_cpu(cpu, policy->cpus) {
  646. struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
  647. cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
  648. }
  649. return 0;
  650. }
  651. static void sugov_stop(struct cpufreq_policy *policy)
  652. {
  653. struct sugov_policy *sg_policy = policy->governor_data;
  654. unsigned int cpu;
  655. for_each_cpu(cpu, policy->cpus)
  656. cpufreq_remove_update_util_hook(cpu);
  657. synchronize_rcu();
  658. if (!policy->fast_switch_enabled) {
  659. irq_work_sync(&sg_policy->irq_work);
  660. kthread_cancel_work_sync(&sg_policy->work);
  661. }
  662. }
  663. static void sugov_limits(struct cpufreq_policy *policy)
  664. {
  665. struct sugov_policy *sg_policy = policy->governor_data;
  666. if (!policy->fast_switch_enabled) {
  667. mutex_lock(&sg_policy->work_lock);
  668. cpufreq_policy_apply_limits(policy);
  669. mutex_unlock(&sg_policy->work_lock);
  670. }
  671. sg_policy->limits_changed = true;
  672. }
  673. struct cpufreq_governor schedutil_gov = {
  674. .name = "schedutil",
  675. .owner = THIS_MODULE,
  676. .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
  677. .init = sugov_init,
  678. .exit = sugov_exit,
  679. .start = sugov_start,
  680. .stop = sugov_stop,
  681. .limits = sugov_limits,
  682. };
  683. #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
  684. struct cpufreq_governor *cpufreq_default_governor(void)
  685. {
  686. return &schedutil_gov;
  687. }
  688. #endif
  689. cpufreq_governor_init(schedutil_gov);