teo.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Timer events oriented CPU idle governor
  4. *
  5. * TEO governor:
  6. * Copyright (C) 2018 - 2021 Intel Corporation
  7. * Author: Rafael J. Wysocki <[email protected]>
  8. *
  9. * Util-awareness mechanism:
  10. * Copyright (C) 2022 Arm Ltd.
  11. * Author: Kajetan Puchalski <[email protected]>
  12. */
  13. /**
  14. * DOC: teo-description
  15. *
  16. * The idea of this governor is based on the observation that on many systems
  17. * timer events are two or more orders of magnitude more frequent than any
  18. * other interrupts, so they are likely to be the most significant cause of CPU
  19. * wakeups from idle states. Moreover, information about what happened in the
  20. * (relatively recent) past can be used to estimate whether or not the deepest
  21. * idle state with target residency within the (known) time till the closest
  22. * timer event, referred to as the sleep length, is likely to be suitable for
  23. * the upcoming CPU idle period and, if not, then which of the shallower idle
  24. * states to choose instead of it.
  25. *
  26. * Of course, non-timer wakeup sources are more important in some use cases
  27. * which can be covered by taking a few most recent idle time intervals of the
  28. * CPU into account. However, even in that context it is not necessary to
  29. * consider idle duration values greater than the sleep length, because the
  30. * closest timer will ultimately wake up the CPU anyway unless it is woken up
  31. * earlier.
  32. *
  33. * Thus this governor estimates whether or not the prospective idle duration of
  34. * a CPU is likely to be significantly shorter than the sleep length and selects
  35. * an idle state for it accordingly.
  36. *
  37. * The computations carried out by this governor are based on using bins whose
  38. * boundaries are aligned with the target residency parameter values of the CPU
  39. * idle states provided by the %CPUIdle driver in the ascending order. That is,
  40. * the first bin spans from 0 up to, but not including, the target residency of
  41. * the second idle state (idle state 1), the second bin spans from the target
  42. * residency of idle state 1 up to, but not including, the target residency of
  43. * idle state 2, the third bin spans from the target residency of idle state 2
  44. * up to, but not including, the target residency of idle state 3 and so on.
  45. * The last bin spans from the target residency of the deepest idle state
  46. * supplied by the driver to infinity.
  47. *
  48. * Two metrics called "hits" and "intercepts" are associated with each bin.
  49. * They are updated every time before selecting an idle state for the given CPU
  50. * in accordance with what happened last time.
  51. *
  52. * The "hits" metric reflects the relative frequency of situations in which the
  53. * sleep length and the idle duration measured after CPU wakeup fall into the
  54. * same bin (that is, the CPU appears to wake up "on time" relative to the sleep
  55. * length). In turn, the "intercepts" metric reflects the relative frequency of
  56. * situations in which the measured idle duration is so much shorter than the
  57. * sleep length that the bin it falls into corresponds to an idle state
  58. * shallower than the one whose bin is fallen into by the sleep length (these
  59. * situations are referred to as "intercepts" below).
  60. *
  61. * In addition to the metrics described above, the governor counts recent
  62. * intercepts (that is, intercepts that have occurred during the last
  63. * %NR_RECENT invocations of it for the given CPU) for each bin.
  64. *
  65. * In order to select an idle state for a CPU, the governor takes the following
  66. * steps (modulo the possible latency constraint that must be taken into account
  67. * too):
  68. *
  69. * 1. Find the deepest CPU idle state whose target residency does not exceed
  70. * the current sleep length (the candidate idle state) and compute 3 sums as
  71. * follows:
  72. *
  73. * - The sum of the "hits" and "intercepts" metrics for the candidate state
  74. * and all of the deeper idle states (it represents the cases in which the
  75. * CPU was idle long enough to avoid being intercepted if the sleep length
  76. * had been equal to the current one).
  77. *
  78. * - The sum of the "intercepts" metrics for all of the idle states shallower
  79. * than the candidate one (it represents the cases in which the CPU was not
  80. * idle long enough to avoid being intercepted if the sleep length had been
  81. * equal to the current one).
  82. *
  83. * - The sum of the numbers of recent intercepts for all of the idle states
  84. * shallower than the candidate one.
  85. *
  86. * 2. If the second sum is greater than the first one or the third sum is
  87. * greater than %NR_RECENT / 2, the CPU is likely to wake up early, so look
  88. * for an alternative idle state to select.
  89. *
  90. * - Traverse the idle states shallower than the candidate one in the
  91. * descending order.
  92. *
  93. * - For each of them compute the sum of the "intercepts" metrics and the sum
  94. * of the numbers of recent intercepts over all of the idle states between
  95. * it and the candidate one (including the former and excluding the
  96. * latter).
  97. *
  98. * - If each of these sums that needs to be taken into account (because the
  99. * check related to it has indicated that the CPU is likely to wake up
  100. * early) is greater than a half of the corresponding sum computed in step
  101. * 1 (which means that the target residency of the state in question had
  102. * not exceeded the idle duration in over a half of the relevant cases),
  103. * select the given idle state instead of the candidate one.
  104. *
  105. * 3. By default, select the candidate state.
  106. *
  107. * Util-awareness mechanism:
  108. *
  109. * The idea behind the util-awareness extension is that there are two distinct
  110. * scenarios for the CPU which should result in two different approaches to idle
  111. * state selection - utilized and not utilized.
  112. *
  113. * In this case, 'utilized' means that the average runqueue util of the CPU is
  114. * above a certain threshold.
  115. *
  116. * When the CPU is utilized while going into idle, more likely than not it will
  117. * be woken up to do more work soon and so a shallower idle state should be
  118. * selected to minimise latency and maximise performance. When the CPU is not
  119. * being utilized, the usual metrics-based approach to selecting the deepest
  120. * available idle state should be preferred to take advantage of the power
  121. * saving.
  122. *
  123. * In order to achieve this, the governor uses a utilization threshold.
  124. * The threshold is computed per-CPU as a percentage of the CPU's capacity
  125. * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%)
  126. * seems to be getting the best results.
  127. *
  128. * Before selecting the next idle state, the governor compares the current CPU
  129. * util to the precomputed util threshold. If it's below, it defaults to the
  130. * TEO metrics mechanism. If it's above, the closest shallower idle state will
  131. * be selected instead, as long as is not a polling state.
  132. */
  133. #include <linux/cpuidle.h>
  134. #include <linux/jiffies.h>
  135. #include <linux/kernel.h>
  136. #include <linux/sched.h>
  137. #include <linux/sched/clock.h>
  138. #include <linux/sched/topology.h>
  139. #include <linux/tick.h>
  140. /*
  141. * The number of bits to shift the CPU's capacity by in order to determine
  142. * the utilized threshold.
  143. *
  144. * 6 was chosen based on testing as the number that achieved the best balance
  145. * of power and performance on average.
  146. *
  147. * The resulting threshold is high enough to not be triggered by background
  148. * noise and low enough to react quickly when activity starts to ramp up.
  149. */
  150. #define UTIL_THRESHOLD_SHIFT 6
  151. /*
  152. * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
  153. * is used for decreasing metrics on a regular basis.
  154. */
  155. #define PULSE 1024
  156. #define DECAY_SHIFT 3
  157. /*
  158. * Number of the most recent idle duration values to take into consideration for
  159. * the detection of recent early wakeup patterns.
  160. */
  161. #define NR_RECENT 9
  162. /**
  163. * struct teo_bin - Metrics used by the TEO cpuidle governor.
  164. * @intercepts: The "intercepts" metric.
  165. * @hits: The "hits" metric.
  166. * @recent: The number of recent "intercepts".
  167. */
  168. struct teo_bin {
  169. unsigned int intercepts;
  170. unsigned int hits;
  171. unsigned int recent;
  172. };
  173. /**
  174. * struct teo_cpu - CPU data used by the TEO cpuidle governor.
  175. * @time_span_ns: Time between idle state selection and post-wakeup update.
  176. * @sleep_length_ns: Time till the closest timer event (at the selection time).
  177. * @state_bins: Idle state data bins for this CPU.
  178. * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  179. * @next_recent_idx: Index of the next @recent_idx entry to update.
  180. * @recent_idx: Indices of bins corresponding to recent "intercepts".
  181. * @util_threshold: Threshold above which the CPU is considered utilized
  182. * @utilized: Whether the last sleep on the CPU happened while utilized
  183. */
  184. struct teo_cpu {
  185. s64 time_span_ns;
  186. s64 sleep_length_ns;
  187. struct teo_bin state_bins[CPUIDLE_STATE_MAX];
  188. unsigned int total;
  189. int next_recent_idx;
  190. int recent_idx[NR_RECENT];
  191. unsigned long util_threshold;
  192. bool utilized;
  193. };
  194. static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
  195. unsigned long teo_cpu_get_util_threshold(int cpu)
  196. {
  197. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, cpu);
  198. return cpu_data->util_threshold;
  199. }
  200. EXPORT_SYMBOL_GPL(teo_cpu_get_util_threshold);
  201. void teo_cpu_set_util_threshold(int cpu, unsigned long util)
  202. {
  203. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, cpu);
  204. cpu_data->util_threshold = util;
  205. }
  206. EXPORT_SYMBOL_GPL(teo_cpu_set_util_threshold);
  207. /**
  208. * teo_cpu_is_utilized - Check if the CPU's util is above the threshold
  209. * @cpu: Target CPU
  210. * @cpu_data: Governor CPU data for the target CPU
  211. */
  212. #ifdef CONFIG_SMP
  213. static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
  214. {
  215. return sched_cpu_util(cpu) > cpu_data->util_threshold;
  216. }
  217. #else
  218. static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
  219. {
  220. return false;
  221. }
  222. #endif
  223. /**
  224. * teo_update - Update CPU metrics after wakeup.
  225. * @drv: cpuidle driver containing state data.
  226. * @dev: Target CPU.
  227. */
  228. static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
  229. {
  230. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
  231. int i, idx_timer = 0, idx_duration = 0;
  232. u64 measured_ns;
  233. if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
  234. /*
  235. * One of the safety nets has triggered or the wakeup was close
  236. * enough to the closest timer event expected at the idle state
  237. * selection time to be discarded.
  238. */
  239. measured_ns = U64_MAX;
  240. } else {
  241. u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
  242. /*
  243. * The computations below are to determine whether or not the
  244. * (saved) time till the next timer event and the measured idle
  245. * duration fall into the same "bin", so use last_residency_ns
  246. * for that instead of time_span_ns which includes the cpuidle
  247. * overhead.
  248. */
  249. measured_ns = dev->last_residency_ns;
  250. /*
  251. * The delay between the wakeup and the first instruction
  252. * executed by the CPU is not likely to be worst-case every
  253. * time, so take 1/2 of the exit latency as a very rough
  254. * approximation of the average of it.
  255. */
  256. if (measured_ns >= lat_ns)
  257. measured_ns -= lat_ns / 2;
  258. else
  259. measured_ns /= 2;
  260. }
  261. cpu_data->total = 0;
  262. /*
  263. * Decay the "hits" and "intercepts" metrics for all of the bins and
  264. * find the bins that the sleep length and the measured idle duration
  265. * fall into.
  266. */
  267. for (i = 0; i < drv->state_count; i++) {
  268. s64 target_residency_ns = drv->states[i].target_residency_ns;
  269. struct teo_bin *bin = &cpu_data->state_bins[i];
  270. bin->hits -= bin->hits >> DECAY_SHIFT;
  271. bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
  272. cpu_data->total += bin->hits + bin->intercepts;
  273. if (target_residency_ns <= cpu_data->sleep_length_ns) {
  274. idx_timer = i;
  275. if (target_residency_ns <= measured_ns)
  276. idx_duration = i;
  277. }
  278. }
  279. i = cpu_data->next_recent_idx++;
  280. if (cpu_data->next_recent_idx >= NR_RECENT)
  281. cpu_data->next_recent_idx = 0;
  282. if (cpu_data->recent_idx[i] >= 0)
  283. cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
  284. /*
  285. * If the measured idle duration falls into the same bin as the sleep
  286. * length, this is a "hit", so update the "hits" metric for that bin.
  287. * Otherwise, update the "intercepts" metric for the bin fallen into by
  288. * the measured idle duration.
  289. */
  290. if (idx_timer == idx_duration) {
  291. cpu_data->state_bins[idx_timer].hits += PULSE;
  292. cpu_data->recent_idx[i] = -1;
  293. } else {
  294. cpu_data->state_bins[idx_duration].intercepts += PULSE;
  295. cpu_data->state_bins[idx_duration].recent++;
  296. cpu_data->recent_idx[i] = idx_duration;
  297. }
  298. cpu_data->total += PULSE;
  299. }
  300. static bool teo_time_ok(u64 interval_ns)
  301. {
  302. return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
  303. }
  304. static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
  305. {
  306. return (drv->states[idx].target_residency_ns +
  307. drv->states[idx+1].target_residency_ns) / 2;
  308. }
  309. /**
  310. * teo_find_shallower_state - Find shallower idle state matching given duration.
  311. * @drv: cpuidle driver containing state data.
  312. * @dev: Target CPU.
  313. * @state_idx: Index of the capping idle state.
  314. * @duration_ns: Idle duration value to match.
  315. * @no_poll: Don't consider polling states.
  316. */
  317. static int teo_find_shallower_state(struct cpuidle_driver *drv,
  318. struct cpuidle_device *dev, int state_idx,
  319. s64 duration_ns, bool no_poll)
  320. {
  321. int i;
  322. for (i = state_idx - 1; i >= 0; i--) {
  323. if (dev->states_usage[i].disable ||
  324. (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
  325. continue;
  326. state_idx = i;
  327. if (drv->states[i].target_residency_ns <= duration_ns)
  328. break;
  329. }
  330. return state_idx;
  331. }
  332. /**
  333. * teo_select - Selects the next idle state to enter.
  334. * @drv: cpuidle driver containing state data.
  335. * @dev: Target CPU.
  336. * @stop_tick: Indication on whether or not to stop the scheduler tick.
  337. */
  338. static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
  339. bool *stop_tick)
  340. {
  341. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
  342. s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
  343. unsigned int idx_intercept_sum = 0;
  344. unsigned int intercept_sum = 0;
  345. unsigned int idx_recent_sum = 0;
  346. unsigned int recent_sum = 0;
  347. unsigned int idx_hit_sum = 0;
  348. unsigned int hit_sum = 0;
  349. int constraint_idx = 0;
  350. int idx0 = 0, idx = -1;
  351. bool alt_intercepts, alt_recent;
  352. ktime_t delta_tick;
  353. s64 duration_ns;
  354. int i;
  355. if (dev->last_state_idx >= 0) {
  356. teo_update(drv, dev);
  357. dev->last_state_idx = -1;
  358. }
  359. cpu_data->time_span_ns = local_clock();
  360. duration_ns = tick_nohz_get_sleep_length(&delta_tick);
  361. cpu_data->sleep_length_ns = duration_ns;
  362. /* Check if there is any choice in the first place. */
  363. if (drv->state_count < 2) {
  364. idx = 0;
  365. goto end;
  366. }
  367. if (!dev->states_usage[0].disable) {
  368. idx = 0;
  369. if (drv->states[1].target_residency_ns > duration_ns)
  370. goto end;
  371. }
  372. cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
  373. /*
  374. * If the CPU is being utilized over the threshold and there are only 2
  375. * states to choose from, the metrics need not be considered, so choose
  376. * the shallowest non-polling state and exit.
  377. */
  378. if (drv->state_count < 3 && cpu_data->utilized) {
  379. /* The CPU is utilized, so assume a short idle duration. */
  380. duration_ns = teo_middle_of_bin(0, drv);
  381. /*
  382. * If state 0 is enabled and it is not a polling one, select it
  383. * right away unless the scheduler tick has been stopped, in
  384. * which case care needs to be taken to leave the CPU in a deep
  385. * enough state in case it is not woken up any time soon after
  386. * all. If state 1 is disabled, though, state 0 must be used
  387. * anyway.
  388. */
  389. if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
  390. teo_time_ok(duration_ns)) || dev->states_usage[1].disable)
  391. idx = 0;
  392. else /* Assume that state 1 is not a polling one and use it. */
  393. idx = 1;
  394. goto end;
  395. }
  396. /*
  397. * Find the deepest idle state whose target residency does not exceed
  398. * the current sleep length and the deepest idle state not deeper than
  399. * the former whose exit latency does not exceed the current latency
  400. * constraint. Compute the sums of metrics for early wakeup pattern
  401. * detection.
  402. */
  403. for (i = 1; i < drv->state_count; i++) {
  404. struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
  405. struct cpuidle_state *s = &drv->states[i];
  406. /*
  407. * Update the sums of idle state mertics for all of the states
  408. * shallower than the current one.
  409. */
  410. intercept_sum += prev_bin->intercepts;
  411. hit_sum += prev_bin->hits;
  412. recent_sum += prev_bin->recent;
  413. if (dev->states_usage[i].disable)
  414. continue;
  415. if (idx < 0) {
  416. idx = i; /* first enabled state */
  417. idx0 = i;
  418. }
  419. if (s->target_residency_ns > duration_ns)
  420. break;
  421. idx = i;
  422. if (s->exit_latency_ns <= latency_req)
  423. constraint_idx = i;
  424. idx_intercept_sum = intercept_sum;
  425. idx_hit_sum = hit_sum;
  426. idx_recent_sum = recent_sum;
  427. }
  428. /* Avoid unnecessary overhead. */
  429. if (idx < 0) {
  430. idx = 0; /* No states enabled, must use 0. */
  431. goto end;
  432. } else if (idx == idx0) {
  433. goto end;
  434. }
  435. /*
  436. * If the sum of the intercepts metric for all of the idle states
  437. * shallower than the current candidate one (idx) is greater than the
  438. * sum of the intercepts and hits metrics for the candidate state and
  439. * all of the deeper states, or the sum of the numbers of recent
  440. * intercepts over all of the states shallower than the candidate one
  441. * is greater than a half of the number of recent events taken into
  442. * account, the CPU is likely to wake up early, so find an alternative
  443. * idle state to select.
  444. */
  445. alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
  446. alt_recent = idx_recent_sum > NR_RECENT / 2;
  447. if (alt_recent || alt_intercepts) {
  448. s64 first_suitable_span_ns = duration_ns;
  449. int first_suitable_idx = idx;
  450. /*
  451. * Look for the deepest idle state whose target residency had
  452. * not exceeded the idle duration in over a half of the relevant
  453. * cases (both with respect to intercepts overall and with
  454. * respect to the recent intercepts only) in the past.
  455. *
  456. * Take the possible latency constraint and duration limitation
  457. * present if the tick has been stopped already into account.
  458. */
  459. intercept_sum = 0;
  460. recent_sum = 0;
  461. for (i = idx - 1; i >= 0; i--) {
  462. struct teo_bin *bin = &cpu_data->state_bins[i];
  463. s64 span_ns;
  464. intercept_sum += bin->intercepts;
  465. recent_sum += bin->recent;
  466. span_ns = teo_middle_of_bin(i, drv);
  467. if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
  468. (!alt_intercepts ||
  469. 2 * intercept_sum > idx_intercept_sum)) {
  470. if (teo_time_ok(span_ns) &&
  471. !dev->states_usage[i].disable) {
  472. idx = i;
  473. duration_ns = span_ns;
  474. } else {
  475. /*
  476. * The current state is too shallow or
  477. * disabled, so take the first enabled
  478. * deeper state with suitable time span.
  479. */
  480. idx = first_suitable_idx;
  481. duration_ns = first_suitable_span_ns;
  482. }
  483. break;
  484. }
  485. if (dev->states_usage[i].disable)
  486. continue;
  487. if (!teo_time_ok(span_ns)) {
  488. /*
  489. * The current state is too shallow, but if an
  490. * alternative candidate state has been found,
  491. * it may still turn out to be a better choice.
  492. */
  493. if (first_suitable_idx != idx)
  494. continue;
  495. break;
  496. }
  497. first_suitable_span_ns = span_ns;
  498. first_suitable_idx = i;
  499. }
  500. }
  501. /*
  502. * If there is a latency constraint, it may be necessary to select an
  503. * idle state shallower than the current candidate one.
  504. */
  505. if (idx > constraint_idx)
  506. idx = constraint_idx;
  507. /*
  508. * If the CPU is being utilized over the threshold, choose a shallower
  509. * non-polling state to improve latency, unless the scheduler tick has
  510. * been stopped already and the shallower state's target residency is
  511. * not sufficiently large.
  512. */
  513. if (cpu_data->utilized) {
  514. s64 span_ns;
  515. i = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
  516. span_ns = teo_middle_of_bin(i, drv);
  517. if (teo_time_ok(span_ns)) {
  518. idx = i;
  519. duration_ns = span_ns;
  520. }
  521. }
  522. end:
  523. /*
  524. * Don't stop the tick if the selected state is a polling one or if the
  525. * expected idle duration is shorter than the tick period length.
  526. */
  527. if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
  528. duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
  529. *stop_tick = false;
  530. /*
  531. * The tick is not going to be stopped, so if the target
  532. * residency of the state to be returned is not within the time
  533. * till the closest timer including the tick, try to correct
  534. * that.
  535. */
  536. if (idx > idx0 &&
  537. drv->states[idx].target_residency_ns > delta_tick)
  538. idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
  539. }
  540. return idx;
  541. }
  542. /**
  543. * teo_reflect - Note that governor data for the CPU need to be updated.
  544. * @dev: Target CPU.
  545. * @state: Entered state.
  546. */
  547. static void teo_reflect(struct cpuidle_device *dev, int state)
  548. {
  549. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
  550. dev->last_state_idx = state;
  551. /*
  552. * If the wakeup was not "natural", but triggered by one of the safety
  553. * nets, assume that the CPU might have been idle for the entire sleep
  554. * length time.
  555. */
  556. if (dev->poll_time_limit ||
  557. (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
  558. dev->poll_time_limit = false;
  559. cpu_data->time_span_ns = cpu_data->sleep_length_ns;
  560. } else {
  561. cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
  562. }
  563. }
  564. /**
  565. * teo_enable_device - Initialize the governor's data for the target CPU.
  566. * @drv: cpuidle driver (not used).
  567. * @dev: Target CPU.
  568. */
  569. static int teo_enable_device(struct cpuidle_driver *drv,
  570. struct cpuidle_device *dev)
  571. {
  572. struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
  573. unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu);
  574. int i;
  575. memset(cpu_data, 0, sizeof(*cpu_data));
  576. cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT;
  577. for (i = 0; i < NR_RECENT; i++)
  578. cpu_data->recent_idx[i] = -1;
  579. return 0;
  580. }
  581. static struct cpuidle_governor teo_governor = {
  582. .name = "teo",
  583. .rating = 19,
  584. .enable = teo_enable_device,
  585. .select = teo_select,
  586. .reflect = teo_reflect,
  587. };
  588. static int __init teo_governor_init(void)
  589. {
  590. return cpuidle_register_governor(&teo_governor);
  591. }
  592. postcore_initcall(teo_governor_init);