rt.c 75 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  4. * policies)
  5. */
  6. #include <trace/hooks/sched.h>
  7. int sched_rr_timeslice = RR_TIMESLICE;
  8. /* More than 4 hours if BW_SHIFT equals 20. */
  9. static const u64 max_rt_runtime = MAX_BW;
  10. static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
  11. struct rt_bandwidth def_rt_bandwidth;
  12. /*
  13. * period over which we measure -rt task CPU usage in us.
  14. * default: 1s
  15. */
  16. unsigned int sysctl_sched_rt_period = 1000000;
  17. /*
  18. * part of the period that we allow rt tasks to run in us.
  19. * default: 0.95s
  20. */
  21. int sysctl_sched_rt_runtime = 950000;
  22. #ifdef CONFIG_SYSCTL
  23. static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
  24. static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
  25. size_t *lenp, loff_t *ppos);
  26. static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
  27. size_t *lenp, loff_t *ppos);
  28. static struct ctl_table sched_rt_sysctls[] = {
  29. {
  30. .procname = "sched_rt_period_us",
  31. .data = &sysctl_sched_rt_period,
  32. .maxlen = sizeof(unsigned int),
  33. .mode = 0644,
  34. .proc_handler = sched_rt_handler,
  35. },
  36. {
  37. .procname = "sched_rt_runtime_us",
  38. .data = &sysctl_sched_rt_runtime,
  39. .maxlen = sizeof(int),
  40. .mode = 0644,
  41. .proc_handler = sched_rt_handler,
  42. },
  43. {
  44. .procname = "sched_rr_timeslice_ms",
  45. .data = &sysctl_sched_rr_timeslice,
  46. .maxlen = sizeof(int),
  47. .mode = 0644,
  48. .proc_handler = sched_rr_handler,
  49. },
  50. {}
  51. };
  52. static int __init sched_rt_sysctl_init(void)
  53. {
  54. register_sysctl_init("kernel", sched_rt_sysctls);
  55. return 0;
  56. }
  57. late_initcall(sched_rt_sysctl_init);
  58. #endif
  59. static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
  60. {
  61. struct rt_bandwidth *rt_b =
  62. container_of(timer, struct rt_bandwidth, rt_period_timer);
  63. int idle = 0;
  64. int overrun;
  65. raw_spin_lock(&rt_b->rt_runtime_lock);
  66. for (;;) {
  67. overrun = hrtimer_forward_now(timer, rt_b->rt_period);
  68. if (!overrun)
  69. break;
  70. raw_spin_unlock(&rt_b->rt_runtime_lock);
  71. idle = do_sched_rt_period_timer(rt_b, overrun);
  72. raw_spin_lock(&rt_b->rt_runtime_lock);
  73. }
  74. if (idle)
  75. rt_b->rt_period_active = 0;
  76. raw_spin_unlock(&rt_b->rt_runtime_lock);
  77. return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  78. }
  79. void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  80. {
  81. rt_b->rt_period = ns_to_ktime(period);
  82. rt_b->rt_runtime = runtime;
  83. raw_spin_lock_init(&rt_b->rt_runtime_lock);
  84. hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
  85. HRTIMER_MODE_REL_HARD);
  86. rt_b->rt_period_timer.function = sched_rt_period_timer;
  87. }
  88. static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
  89. {
  90. raw_spin_lock(&rt_b->rt_runtime_lock);
  91. if (!rt_b->rt_period_active) {
  92. rt_b->rt_period_active = 1;
  93. /*
  94. * SCHED_DEADLINE updates the bandwidth, as a run away
  95. * RT task with a DL task could hog a CPU. But DL does
  96. * not reset the period. If a deadline task was running
  97. * without an RT task running, it can cause RT tasks to
  98. * throttle when they start up. Kick the timer right away
  99. * to update the period.
  100. */
  101. hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
  102. hrtimer_start_expires(&rt_b->rt_period_timer,
  103. HRTIMER_MODE_ABS_PINNED_HARD);
  104. }
  105. raw_spin_unlock(&rt_b->rt_runtime_lock);
  106. }
  107. static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  108. {
  109. if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
  110. return;
  111. do_start_rt_bandwidth(rt_b);
  112. }
  113. void init_rt_rq(struct rt_rq *rt_rq)
  114. {
  115. struct rt_prio_array *array;
  116. int i;
  117. array = &rt_rq->active;
  118. for (i = 0; i < MAX_RT_PRIO; i++) {
  119. INIT_LIST_HEAD(array->queue + i);
  120. __clear_bit(i, array->bitmap);
  121. }
  122. /* delimiter for bitsearch: */
  123. __set_bit(MAX_RT_PRIO, array->bitmap);
  124. #if defined CONFIG_SMP
  125. rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
  126. rt_rq->highest_prio.next = MAX_RT_PRIO-1;
  127. rt_rq->rt_nr_migratory = 0;
  128. rt_rq->overloaded = 0;
  129. plist_head_init(&rt_rq->pushable_tasks);
  130. #endif /* CONFIG_SMP */
  131. /* We start is dequeued state, because no RT tasks are queued */
  132. rt_rq->rt_queued = 0;
  133. rt_rq->rt_time = 0;
  134. rt_rq->rt_throttled = 0;
  135. rt_rq->rt_runtime = 0;
  136. raw_spin_lock_init(&rt_rq->rt_runtime_lock);
  137. }
  138. #ifdef CONFIG_RT_GROUP_SCHED
  139. static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
  140. {
  141. hrtimer_cancel(&rt_b->rt_period_timer);
  142. }
  143. #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
  144. static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
  145. {
  146. #ifdef CONFIG_SCHED_DEBUG
  147. WARN_ON_ONCE(!rt_entity_is_task(rt_se));
  148. #endif
  149. return container_of(rt_se, struct task_struct, rt);
  150. }
  151. static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
  152. {
  153. return rt_rq->rq;
  154. }
  155. static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
  156. {
  157. return rt_se->rt_rq;
  158. }
  159. static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
  160. {
  161. struct rt_rq *rt_rq = rt_se->rt_rq;
  162. return rt_rq->rq;
  163. }
  164. void unregister_rt_sched_group(struct task_group *tg)
  165. {
  166. if (tg->rt_se)
  167. destroy_rt_bandwidth(&tg->rt_bandwidth);
  168. }
  169. void free_rt_sched_group(struct task_group *tg)
  170. {
  171. int i;
  172. for_each_possible_cpu(i) {
  173. if (tg->rt_rq)
  174. kfree(tg->rt_rq[i]);
  175. if (tg->rt_se)
  176. kfree(tg->rt_se[i]);
  177. }
  178. kfree(tg->rt_rq);
  179. kfree(tg->rt_se);
  180. }
  181. void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
  182. struct sched_rt_entity *rt_se, int cpu,
  183. struct sched_rt_entity *parent)
  184. {
  185. struct rq *rq = cpu_rq(cpu);
  186. rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
  187. rt_rq->rt_nr_boosted = 0;
  188. rt_rq->rq = rq;
  189. rt_rq->tg = tg;
  190. tg->rt_rq[cpu] = rt_rq;
  191. tg->rt_se[cpu] = rt_se;
  192. if (!rt_se)
  193. return;
  194. if (!parent)
  195. rt_se->rt_rq = &rq->rt;
  196. else
  197. rt_se->rt_rq = parent->my_q;
  198. rt_se->my_q = rt_rq;
  199. rt_se->parent = parent;
  200. INIT_LIST_HEAD(&rt_se->run_list);
  201. }
  202. int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  203. {
  204. struct rt_rq *rt_rq;
  205. struct sched_rt_entity *rt_se;
  206. int i;
  207. tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
  208. if (!tg->rt_rq)
  209. goto err;
  210. tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
  211. if (!tg->rt_se)
  212. goto err;
  213. init_rt_bandwidth(&tg->rt_bandwidth,
  214. ktime_to_ns(def_rt_bandwidth.rt_period), 0);
  215. for_each_possible_cpu(i) {
  216. rt_rq = kzalloc_node(sizeof(struct rt_rq),
  217. GFP_KERNEL, cpu_to_node(i));
  218. if (!rt_rq)
  219. goto err;
  220. rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
  221. GFP_KERNEL, cpu_to_node(i));
  222. if (!rt_se)
  223. goto err_free_rq;
  224. init_rt_rq(rt_rq);
  225. rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
  226. init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
  227. }
  228. return 1;
  229. err_free_rq:
  230. kfree(rt_rq);
  231. err:
  232. return 0;
  233. }
  234. #else /* CONFIG_RT_GROUP_SCHED */
  235. #define rt_entity_is_task(rt_se) (1)
  236. static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
  237. {
  238. return container_of(rt_se, struct task_struct, rt);
  239. }
  240. static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
  241. {
  242. return container_of(rt_rq, struct rq, rt);
  243. }
  244. static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
  245. {
  246. struct task_struct *p = rt_task_of(rt_se);
  247. return task_rq(p);
  248. }
  249. static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
  250. {
  251. struct rq *rq = rq_of_rt_se(rt_se);
  252. return &rq->rt;
  253. }
  254. void unregister_rt_sched_group(struct task_group *tg) { }
  255. void free_rt_sched_group(struct task_group *tg) { }
  256. int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  257. {
  258. return 1;
  259. }
  260. #endif /* CONFIG_RT_GROUP_SCHED */
  261. #ifdef CONFIG_SMP
  262. static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
  263. {
  264. /* Try to pull RT tasks here if we lower this rq's prio */
  265. return rq->online && rq->rt.highest_prio.curr > prev->prio;
  266. }
  267. static inline int rt_overloaded(struct rq *rq)
  268. {
  269. return atomic_read(&rq->rd->rto_count);
  270. }
  271. static inline void rt_set_overload(struct rq *rq)
  272. {
  273. if (!rq->online)
  274. return;
  275. cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
  276. /*
  277. * Make sure the mask is visible before we set
  278. * the overload count. That is checked to determine
  279. * if we should look at the mask. It would be a shame
  280. * if we looked at the mask, but the mask was not
  281. * updated yet.
  282. *
  283. * Matched by the barrier in pull_rt_task().
  284. */
  285. smp_wmb();
  286. atomic_inc(&rq->rd->rto_count);
  287. }
  288. static inline void rt_clear_overload(struct rq *rq)
  289. {
  290. if (!rq->online)
  291. return;
  292. /* the order here really doesn't matter */
  293. atomic_dec(&rq->rd->rto_count);
  294. cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
  295. }
  296. static void update_rt_migration(struct rt_rq *rt_rq)
  297. {
  298. if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
  299. if (!rt_rq->overloaded) {
  300. rt_set_overload(rq_of_rt_rq(rt_rq));
  301. rt_rq->overloaded = 1;
  302. }
  303. } else if (rt_rq->overloaded) {
  304. rt_clear_overload(rq_of_rt_rq(rt_rq));
  305. rt_rq->overloaded = 0;
  306. }
  307. }
  308. static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  309. {
  310. struct task_struct *p;
  311. if (!rt_entity_is_task(rt_se))
  312. return;
  313. p = rt_task_of(rt_se);
  314. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  315. rt_rq->rt_nr_total++;
  316. if (p->nr_cpus_allowed > 1)
  317. rt_rq->rt_nr_migratory++;
  318. update_rt_migration(rt_rq);
  319. }
  320. static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  321. {
  322. struct task_struct *p;
  323. if (!rt_entity_is_task(rt_se))
  324. return;
  325. p = rt_task_of(rt_se);
  326. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  327. rt_rq->rt_nr_total--;
  328. if (p->nr_cpus_allowed > 1)
  329. rt_rq->rt_nr_migratory--;
  330. update_rt_migration(rt_rq);
  331. }
  332. static inline int has_pushable_tasks(struct rq *rq)
  333. {
  334. return !plist_head_empty(&rq->rt.pushable_tasks);
  335. }
  336. static DEFINE_PER_CPU(struct balance_callback, rt_push_head);
  337. static DEFINE_PER_CPU(struct balance_callback, rt_pull_head);
  338. static void push_rt_tasks(struct rq *);
  339. static void pull_rt_task(struct rq *);
  340. static inline void rt_queue_push_tasks(struct rq *rq)
  341. {
  342. if (!has_pushable_tasks(rq))
  343. return;
  344. queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
  345. }
  346. static inline void rt_queue_pull_task(struct rq *rq)
  347. {
  348. queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
  349. }
  350. static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
  351. {
  352. plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
  353. plist_node_init(&p->pushable_tasks, p->prio);
  354. plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
  355. /* Update the highest prio pushable task */
  356. if (p->prio < rq->rt.highest_prio.next)
  357. rq->rt.highest_prio.next = p->prio;
  358. }
  359. static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
  360. {
  361. plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
  362. /* Update the new highest prio pushable task */
  363. if (has_pushable_tasks(rq)) {
  364. p = plist_first_entry(&rq->rt.pushable_tasks,
  365. struct task_struct, pushable_tasks);
  366. rq->rt.highest_prio.next = p->prio;
  367. } else {
  368. rq->rt.highest_prio.next = MAX_RT_PRIO-1;
  369. }
  370. }
  371. #else
  372. static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
  373. {
  374. }
  375. static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
  376. {
  377. }
  378. static inline
  379. void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  380. {
  381. }
  382. static inline
  383. void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  384. {
  385. }
  386. static inline void rt_queue_push_tasks(struct rq *rq)
  387. {
  388. }
  389. #endif /* CONFIG_SMP */
  390. static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
  391. static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
  392. static inline int on_rt_rq(struct sched_rt_entity *rt_se)
  393. {
  394. return rt_se->on_rq;
  395. }
  396. #ifdef CONFIG_UCLAMP_TASK
  397. /*
  398. * Verify the fitness of task @p to run on @cpu taking into account the uclamp
  399. * settings.
  400. *
  401. * This check is only important for heterogeneous systems where uclamp_min value
  402. * is higher than the capacity of a @cpu. For non-heterogeneous system this
  403. * function will always return true.
  404. *
  405. * The function will return true if the capacity of the @cpu is >= the
  406. * uclamp_min and false otherwise.
  407. *
  408. * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
  409. * > uclamp_max.
  410. */
  411. static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
  412. {
  413. unsigned int min_cap;
  414. unsigned int max_cap;
  415. unsigned int cpu_cap;
  416. /* Only heterogeneous systems can benefit from this check */
  417. if (!sched_asym_cpucap_active())
  418. return true;
  419. min_cap = uclamp_eff_value(p, UCLAMP_MIN);
  420. max_cap = uclamp_eff_value(p, UCLAMP_MAX);
  421. cpu_cap = capacity_orig_of(cpu);
  422. return cpu_cap >= min(min_cap, max_cap);
  423. }
  424. #else
  425. static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
  426. {
  427. return true;
  428. }
  429. #endif
  430. #ifdef CONFIG_RT_GROUP_SCHED
  431. static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
  432. {
  433. if (!rt_rq->tg)
  434. return RUNTIME_INF;
  435. return rt_rq->rt_runtime;
  436. }
  437. static inline u64 sched_rt_period(struct rt_rq *rt_rq)
  438. {
  439. return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
  440. }
  441. typedef struct task_group *rt_rq_iter_t;
  442. static inline struct task_group *next_task_group(struct task_group *tg)
  443. {
  444. do {
  445. tg = list_entry_rcu(tg->list.next,
  446. typeof(struct task_group), list);
  447. } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
  448. if (&tg->list == &task_groups)
  449. tg = NULL;
  450. return tg;
  451. }
  452. #define for_each_rt_rq(rt_rq, iter, rq) \
  453. for (iter = container_of(&task_groups, typeof(*iter), list); \
  454. (iter = next_task_group(iter)) && \
  455. (rt_rq = iter->rt_rq[cpu_of(rq)]);)
  456. #define for_each_sched_rt_entity(rt_se) \
  457. for (; rt_se; rt_se = rt_se->parent)
  458. static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
  459. {
  460. return rt_se->my_q;
  461. }
  462. static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
  463. static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
  464. static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  465. {
  466. struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
  467. struct rq *rq = rq_of_rt_rq(rt_rq);
  468. struct sched_rt_entity *rt_se;
  469. int cpu = cpu_of(rq);
  470. rt_se = rt_rq->tg->rt_se[cpu];
  471. if (rt_rq->rt_nr_running) {
  472. if (!rt_se)
  473. enqueue_top_rt_rq(rt_rq);
  474. else if (!on_rt_rq(rt_se))
  475. enqueue_rt_entity(rt_se, 0);
  476. if (rt_rq->highest_prio.curr < curr->prio)
  477. resched_curr(rq);
  478. }
  479. }
  480. static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
  481. {
  482. struct sched_rt_entity *rt_se;
  483. int cpu = cpu_of(rq_of_rt_rq(rt_rq));
  484. rt_se = rt_rq->tg->rt_se[cpu];
  485. if (!rt_se) {
  486. dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
  487. /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
  488. cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
  489. }
  490. else if (on_rt_rq(rt_se))
  491. dequeue_rt_entity(rt_se, 0);
  492. }
  493. static inline int rt_rq_throttled(struct rt_rq *rt_rq)
  494. {
  495. return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
  496. }
  497. static int rt_se_boosted(struct sched_rt_entity *rt_se)
  498. {
  499. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  500. struct task_struct *p;
  501. if (rt_rq)
  502. return !!rt_rq->rt_nr_boosted;
  503. p = rt_task_of(rt_se);
  504. return p->prio != p->normal_prio;
  505. }
  506. #ifdef CONFIG_SMP
  507. static inline const struct cpumask *sched_rt_period_mask(void)
  508. {
  509. return this_rq()->rd->span;
  510. }
  511. #else
  512. static inline const struct cpumask *sched_rt_period_mask(void)
  513. {
  514. return cpu_online_mask;
  515. }
  516. #endif
  517. static inline
  518. struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
  519. {
  520. return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
  521. }
  522. static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  523. {
  524. return &rt_rq->tg->rt_bandwidth;
  525. }
  526. #else /* !CONFIG_RT_GROUP_SCHED */
  527. static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
  528. {
  529. return rt_rq->rt_runtime;
  530. }
  531. static inline u64 sched_rt_period(struct rt_rq *rt_rq)
  532. {
  533. return ktime_to_ns(def_rt_bandwidth.rt_period);
  534. }
  535. typedef struct rt_rq *rt_rq_iter_t;
  536. #define for_each_rt_rq(rt_rq, iter, rq) \
  537. for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
  538. #define for_each_sched_rt_entity(rt_se) \
  539. for (; rt_se; rt_se = NULL)
  540. static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
  541. {
  542. return NULL;
  543. }
  544. static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  545. {
  546. struct rq *rq = rq_of_rt_rq(rt_rq);
  547. if (!rt_rq->rt_nr_running)
  548. return;
  549. enqueue_top_rt_rq(rt_rq);
  550. resched_curr(rq);
  551. }
  552. static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
  553. {
  554. dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
  555. }
  556. static inline int rt_rq_throttled(struct rt_rq *rt_rq)
  557. {
  558. return rt_rq->rt_throttled;
  559. }
  560. static inline const struct cpumask *sched_rt_period_mask(void)
  561. {
  562. return cpu_online_mask;
  563. }
  564. static inline
  565. struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
  566. {
  567. return &cpu_rq(cpu)->rt;
  568. }
  569. static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  570. {
  571. return &def_rt_bandwidth;
  572. }
  573. #endif /* CONFIG_RT_GROUP_SCHED */
  574. bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
  575. {
  576. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  577. return (hrtimer_active(&rt_b->rt_period_timer) ||
  578. rt_rq->rt_time < rt_b->rt_runtime);
  579. }
  580. #ifdef CONFIG_SMP
  581. /*
  582. * We ran out of runtime, see if we can borrow some from our neighbours.
  583. */
  584. static void do_balance_runtime(struct rt_rq *rt_rq)
  585. {
  586. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  587. struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
  588. int i, weight;
  589. u64 rt_period;
  590. weight = cpumask_weight(rd->span);
  591. raw_spin_lock(&rt_b->rt_runtime_lock);
  592. rt_period = ktime_to_ns(rt_b->rt_period);
  593. for_each_cpu(i, rd->span) {
  594. struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
  595. s64 diff;
  596. if (iter == rt_rq)
  597. continue;
  598. raw_spin_lock(&iter->rt_runtime_lock);
  599. /*
  600. * Either all rqs have inf runtime and there's nothing to steal
  601. * or __disable_runtime() below sets a specific rq to inf to
  602. * indicate its been disabled and disallow stealing.
  603. */
  604. if (iter->rt_runtime == RUNTIME_INF)
  605. goto next;
  606. /*
  607. * From runqueues with spare time, take 1/n part of their
  608. * spare time, but no more than our period.
  609. */
  610. diff = iter->rt_runtime - iter->rt_time;
  611. if (diff > 0) {
  612. diff = div_u64((u64)diff, weight);
  613. if (rt_rq->rt_runtime + diff > rt_period)
  614. diff = rt_period - rt_rq->rt_runtime;
  615. iter->rt_runtime -= diff;
  616. rt_rq->rt_runtime += diff;
  617. if (rt_rq->rt_runtime == rt_period) {
  618. raw_spin_unlock(&iter->rt_runtime_lock);
  619. break;
  620. }
  621. }
  622. next:
  623. raw_spin_unlock(&iter->rt_runtime_lock);
  624. }
  625. raw_spin_unlock(&rt_b->rt_runtime_lock);
  626. }
  627. /*
  628. * Ensure this RQ takes back all the runtime it lend to its neighbours.
  629. */
  630. static void __disable_runtime(struct rq *rq)
  631. {
  632. struct root_domain *rd = rq->rd;
  633. rt_rq_iter_t iter;
  634. struct rt_rq *rt_rq;
  635. if (unlikely(!scheduler_running))
  636. return;
  637. for_each_rt_rq(rt_rq, iter, rq) {
  638. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  639. s64 want;
  640. int i;
  641. raw_spin_lock(&rt_b->rt_runtime_lock);
  642. raw_spin_lock(&rt_rq->rt_runtime_lock);
  643. /*
  644. * Either we're all inf and nobody needs to borrow, or we're
  645. * already disabled and thus have nothing to do, or we have
  646. * exactly the right amount of runtime to take out.
  647. */
  648. if (rt_rq->rt_runtime == RUNTIME_INF ||
  649. rt_rq->rt_runtime == rt_b->rt_runtime)
  650. goto balanced;
  651. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  652. /*
  653. * Calculate the difference between what we started out with
  654. * and what we current have, that's the amount of runtime
  655. * we lend and now have to reclaim.
  656. */
  657. want = rt_b->rt_runtime - rt_rq->rt_runtime;
  658. /*
  659. * Greedy reclaim, take back as much as we can.
  660. */
  661. for_each_cpu(i, rd->span) {
  662. struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
  663. s64 diff;
  664. /*
  665. * Can't reclaim from ourselves or disabled runqueues.
  666. */
  667. if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
  668. continue;
  669. raw_spin_lock(&iter->rt_runtime_lock);
  670. if (want > 0) {
  671. diff = min_t(s64, iter->rt_runtime, want);
  672. iter->rt_runtime -= diff;
  673. want -= diff;
  674. } else {
  675. iter->rt_runtime -= want;
  676. want -= want;
  677. }
  678. raw_spin_unlock(&iter->rt_runtime_lock);
  679. if (!want)
  680. break;
  681. }
  682. raw_spin_lock(&rt_rq->rt_runtime_lock);
  683. /*
  684. * We cannot be left wanting - that would mean some runtime
  685. * leaked out of the system.
  686. */
  687. WARN_ON_ONCE(want);
  688. balanced:
  689. /*
  690. * Disable all the borrow logic by pretending we have inf
  691. * runtime - in which case borrowing doesn't make sense.
  692. */
  693. rt_rq->rt_runtime = RUNTIME_INF;
  694. rt_rq->rt_throttled = 0;
  695. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  696. raw_spin_unlock(&rt_b->rt_runtime_lock);
  697. /* Make rt_rq available for pick_next_task() */
  698. sched_rt_rq_enqueue(rt_rq);
  699. }
  700. }
  701. static void __enable_runtime(struct rq *rq)
  702. {
  703. rt_rq_iter_t iter;
  704. struct rt_rq *rt_rq;
  705. if (unlikely(!scheduler_running))
  706. return;
  707. /*
  708. * Reset each runqueue's bandwidth settings
  709. */
  710. for_each_rt_rq(rt_rq, iter, rq) {
  711. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  712. raw_spin_lock(&rt_b->rt_runtime_lock);
  713. raw_spin_lock(&rt_rq->rt_runtime_lock);
  714. rt_rq->rt_runtime = rt_b->rt_runtime;
  715. rt_rq->rt_time = 0;
  716. rt_rq->rt_throttled = 0;
  717. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  718. raw_spin_unlock(&rt_b->rt_runtime_lock);
  719. }
  720. }
  721. static void balance_runtime(struct rt_rq *rt_rq)
  722. {
  723. if (!sched_feat(RT_RUNTIME_SHARE))
  724. return;
  725. if (rt_rq->rt_time > rt_rq->rt_runtime) {
  726. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  727. do_balance_runtime(rt_rq);
  728. raw_spin_lock(&rt_rq->rt_runtime_lock);
  729. }
  730. }
  731. #else /* !CONFIG_SMP */
  732. static inline void balance_runtime(struct rt_rq *rt_rq) {}
  733. #endif /* CONFIG_SMP */
  734. static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
  735. {
  736. int i, idle = 1, throttled = 0;
  737. const struct cpumask *span;
  738. span = sched_rt_period_mask();
  739. #ifdef CONFIG_RT_GROUP_SCHED
  740. /*
  741. * FIXME: isolated CPUs should really leave the root task group,
  742. * whether they are isolcpus or were isolated via cpusets, lest
  743. * the timer run on a CPU which does not service all runqueues,
  744. * potentially leaving other CPUs indefinitely throttled. If
  745. * isolation is really required, the user will turn the throttle
  746. * off to kill the perturbations it causes anyway. Meanwhile,
  747. * this maintains functionality for boot and/or troubleshooting.
  748. */
  749. if (rt_b == &root_task_group.rt_bandwidth)
  750. span = cpu_online_mask;
  751. #endif
  752. for_each_cpu(i, span) {
  753. int enqueue = 0;
  754. struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
  755. struct rq *rq = rq_of_rt_rq(rt_rq);
  756. struct rq_flags rf;
  757. int skip;
  758. /*
  759. * When span == cpu_online_mask, taking each rq->lock
  760. * can be time-consuming. Try to avoid it when possible.
  761. */
  762. raw_spin_lock(&rt_rq->rt_runtime_lock);
  763. if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
  764. rt_rq->rt_runtime = rt_b->rt_runtime;
  765. skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
  766. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  767. if (skip)
  768. continue;
  769. rq_lock(rq, &rf);
  770. update_rq_clock(rq);
  771. if (rt_rq->rt_time) {
  772. u64 runtime;
  773. raw_spin_lock(&rt_rq->rt_runtime_lock);
  774. if (rt_rq->rt_throttled)
  775. balance_runtime(rt_rq);
  776. runtime = rt_rq->rt_runtime;
  777. rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
  778. if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
  779. rt_rq->rt_throttled = 0;
  780. enqueue = 1;
  781. /*
  782. * When we're idle and a woken (rt) task is
  783. * throttled check_preempt_curr() will set
  784. * skip_update and the time between the wakeup
  785. * and this unthrottle will get accounted as
  786. * 'runtime'.
  787. */
  788. if (rt_rq->rt_nr_running && rq->curr == rq->idle)
  789. rq_clock_cancel_skipupdate(rq);
  790. }
  791. if (rt_rq->rt_time || rt_rq->rt_nr_running)
  792. idle = 0;
  793. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  794. } else if (rt_rq->rt_nr_running) {
  795. idle = 0;
  796. if (!rt_rq_throttled(rt_rq))
  797. enqueue = 1;
  798. }
  799. if (rt_rq->rt_throttled)
  800. throttled = 1;
  801. if (enqueue)
  802. sched_rt_rq_enqueue(rt_rq);
  803. rq_unlock(rq, &rf);
  804. }
  805. if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
  806. return 1;
  807. return idle;
  808. }
  809. static inline int rt_se_prio(struct sched_rt_entity *rt_se)
  810. {
  811. #ifdef CONFIG_RT_GROUP_SCHED
  812. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  813. if (rt_rq)
  814. return rt_rq->highest_prio.curr;
  815. #endif
  816. return rt_task_of(rt_se)->prio;
  817. }
  818. static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
  819. {
  820. u64 runtime = sched_rt_runtime(rt_rq);
  821. if (rt_rq->rt_throttled)
  822. return rt_rq_throttled(rt_rq);
  823. if (runtime >= sched_rt_period(rt_rq))
  824. return 0;
  825. balance_runtime(rt_rq);
  826. runtime = sched_rt_runtime(rt_rq);
  827. if (runtime == RUNTIME_INF)
  828. return 0;
  829. if (rt_rq->rt_time > runtime) {
  830. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  831. /*
  832. * Don't actually throttle groups that have no runtime assigned
  833. * but accrue some time due to boosting.
  834. */
  835. if (likely(rt_b->rt_runtime)) {
  836. rt_rq->rt_throttled = 1;
  837. printk_deferred_once("sched: RT throttling activated\n");
  838. trace_android_vh_dump_throttled_rt_tasks(
  839. raw_smp_processor_id(),
  840. rq_clock(rq_of_rt_rq(rt_rq)),
  841. sched_rt_period(rt_rq),
  842. runtime,
  843. hrtimer_get_expires_ns(&rt_b->rt_period_timer));
  844. } else {
  845. /*
  846. * In case we did anyway, make it go away,
  847. * replenishment is a joke, since it will replenish us
  848. * with exactly 0 ns.
  849. */
  850. rt_rq->rt_time = 0;
  851. }
  852. if (rt_rq_throttled(rt_rq)) {
  853. sched_rt_rq_dequeue(rt_rq);
  854. return 1;
  855. }
  856. }
  857. return 0;
  858. }
  859. /*
  860. * Update the current task's runtime statistics. Skip current tasks that
  861. * are not in our scheduling class.
  862. */
  863. static void update_curr_rt(struct rq *rq)
  864. {
  865. struct task_struct *curr = rq->curr;
  866. struct sched_rt_entity *rt_se = &curr->rt;
  867. u64 delta_exec;
  868. u64 now;
  869. if (curr->sched_class != &rt_sched_class)
  870. return;
  871. now = rq_clock_task(rq);
  872. delta_exec = now - curr->se.exec_start;
  873. if (unlikely((s64)delta_exec <= 0))
  874. return;
  875. schedstat_set(curr->stats.exec_max,
  876. max(curr->stats.exec_max, delta_exec));
  877. trace_sched_stat_runtime(curr, delta_exec, 0);
  878. update_current_exec_runtime(curr, now, delta_exec);
  879. trace_android_vh_sched_stat_runtime_rt(curr, delta_exec);
  880. if (!rt_bandwidth_enabled())
  881. return;
  882. for_each_sched_rt_entity(rt_se) {
  883. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  884. int exceeded;
  885. if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
  886. raw_spin_lock(&rt_rq->rt_runtime_lock);
  887. rt_rq->rt_time += delta_exec;
  888. exceeded = sched_rt_runtime_exceeded(rt_rq);
  889. if (exceeded)
  890. resched_curr(rq);
  891. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  892. if (exceeded)
  893. do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
  894. }
  895. }
  896. }
  897. static void
  898. dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
  899. {
  900. struct rq *rq = rq_of_rt_rq(rt_rq);
  901. BUG_ON(&rq->rt != rt_rq);
  902. if (!rt_rq->rt_queued)
  903. return;
  904. BUG_ON(!rq->nr_running);
  905. sub_nr_running(rq, count);
  906. rt_rq->rt_queued = 0;
  907. }
  908. static void
  909. enqueue_top_rt_rq(struct rt_rq *rt_rq)
  910. {
  911. struct rq *rq = rq_of_rt_rq(rt_rq);
  912. BUG_ON(&rq->rt != rt_rq);
  913. if (rt_rq->rt_queued)
  914. return;
  915. if (rt_rq_throttled(rt_rq))
  916. return;
  917. if (rt_rq->rt_nr_running) {
  918. add_nr_running(rq, rt_rq->rt_nr_running);
  919. rt_rq->rt_queued = 1;
  920. }
  921. /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
  922. cpufreq_update_util(rq, 0);
  923. }
  924. #if defined CONFIG_SMP
  925. static void
  926. inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  927. {
  928. struct rq *rq = rq_of_rt_rq(rt_rq);
  929. #ifdef CONFIG_RT_GROUP_SCHED
  930. /*
  931. * Change rq's cpupri only if rt_rq is the top queue.
  932. */
  933. if (&rq->rt != rt_rq)
  934. return;
  935. #endif
  936. if (rq->online && prio < prev_prio)
  937. cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
  938. }
  939. static void
  940. dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  941. {
  942. struct rq *rq = rq_of_rt_rq(rt_rq);
  943. #ifdef CONFIG_RT_GROUP_SCHED
  944. /*
  945. * Change rq's cpupri only if rt_rq is the top queue.
  946. */
  947. if (&rq->rt != rt_rq)
  948. return;
  949. #endif
  950. if (rq->online && rt_rq->highest_prio.curr != prev_prio)
  951. cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
  952. }
  953. #else /* CONFIG_SMP */
  954. static inline
  955. void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
  956. static inline
  957. void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
  958. #endif /* CONFIG_SMP */
  959. #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
  960. static void
  961. inc_rt_prio(struct rt_rq *rt_rq, int prio)
  962. {
  963. int prev_prio = rt_rq->highest_prio.curr;
  964. if (prio < prev_prio)
  965. rt_rq->highest_prio.curr = prio;
  966. inc_rt_prio_smp(rt_rq, prio, prev_prio);
  967. }
  968. static void
  969. dec_rt_prio(struct rt_rq *rt_rq, int prio)
  970. {
  971. int prev_prio = rt_rq->highest_prio.curr;
  972. if (rt_rq->rt_nr_running) {
  973. WARN_ON(prio < prev_prio);
  974. /*
  975. * This may have been our highest task, and therefore
  976. * we may have some recomputation to do
  977. */
  978. if (prio == prev_prio) {
  979. struct rt_prio_array *array = &rt_rq->active;
  980. rt_rq->highest_prio.curr =
  981. sched_find_first_bit(array->bitmap);
  982. }
  983. } else {
  984. rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
  985. }
  986. dec_rt_prio_smp(rt_rq, prio, prev_prio);
  987. }
  988. #else
  989. static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
  990. static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
  991. #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
  992. #ifdef CONFIG_RT_GROUP_SCHED
  993. static void
  994. inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  995. {
  996. if (rt_se_boosted(rt_se))
  997. rt_rq->rt_nr_boosted++;
  998. if (rt_rq->tg)
  999. start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
  1000. }
  1001. static void
  1002. dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  1003. {
  1004. if (rt_se_boosted(rt_se))
  1005. rt_rq->rt_nr_boosted--;
  1006. WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
  1007. }
  1008. #else /* CONFIG_RT_GROUP_SCHED */
  1009. static void
  1010. inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  1011. {
  1012. start_rt_bandwidth(&def_rt_bandwidth);
  1013. }
  1014. static inline
  1015. void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
  1016. #endif /* CONFIG_RT_GROUP_SCHED */
  1017. static inline
  1018. unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
  1019. {
  1020. struct rt_rq *group_rq = group_rt_rq(rt_se);
  1021. if (group_rq)
  1022. return group_rq->rt_nr_running;
  1023. else
  1024. return 1;
  1025. }
  1026. static inline
  1027. unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
  1028. {
  1029. struct rt_rq *group_rq = group_rt_rq(rt_se);
  1030. struct task_struct *tsk;
  1031. if (group_rq)
  1032. return group_rq->rr_nr_running;
  1033. tsk = rt_task_of(rt_se);
  1034. return (tsk->policy == SCHED_RR) ? 1 : 0;
  1035. }
  1036. static inline
  1037. void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  1038. {
  1039. int prio = rt_se_prio(rt_se);
  1040. WARN_ON(!rt_prio(prio));
  1041. rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
  1042. rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
  1043. inc_rt_prio(rt_rq, prio);
  1044. inc_rt_migration(rt_se, rt_rq);
  1045. inc_rt_group(rt_se, rt_rq);
  1046. }
  1047. static inline
  1048. void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  1049. {
  1050. WARN_ON(!rt_prio(rt_se_prio(rt_se)));
  1051. WARN_ON(!rt_rq->rt_nr_running);
  1052. rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
  1053. rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
  1054. dec_rt_prio(rt_rq, rt_se_prio(rt_se));
  1055. dec_rt_migration(rt_se, rt_rq);
  1056. dec_rt_group(rt_se, rt_rq);
  1057. }
  1058. /*
  1059. * Change rt_se->run_list location unless SAVE && !MOVE
  1060. *
  1061. * assumes ENQUEUE/DEQUEUE flags match
  1062. */
  1063. static inline bool move_entity(unsigned int flags)
  1064. {
  1065. if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
  1066. return false;
  1067. return true;
  1068. }
  1069. static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
  1070. {
  1071. list_del_init(&rt_se->run_list);
  1072. if (list_empty(array->queue + rt_se_prio(rt_se)))
  1073. __clear_bit(rt_se_prio(rt_se), array->bitmap);
  1074. rt_se->on_list = 0;
  1075. }
  1076. static inline struct sched_statistics *
  1077. __schedstats_from_rt_se(struct sched_rt_entity *rt_se)
  1078. {
  1079. #ifdef CONFIG_RT_GROUP_SCHED
  1080. /* schedstats is not supported for rt group. */
  1081. if (!rt_entity_is_task(rt_se))
  1082. return NULL;
  1083. #endif
  1084. return &rt_task_of(rt_se)->stats;
  1085. }
  1086. static inline void
  1087. update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
  1088. {
  1089. struct sched_statistics *stats;
  1090. struct task_struct *p = NULL;
  1091. if (!schedstat_enabled())
  1092. return;
  1093. if (rt_entity_is_task(rt_se))
  1094. p = rt_task_of(rt_se);
  1095. stats = __schedstats_from_rt_se(rt_se);
  1096. if (!stats)
  1097. return;
  1098. __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats);
  1099. }
  1100. static inline void
  1101. update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
  1102. {
  1103. struct sched_statistics *stats;
  1104. struct task_struct *p = NULL;
  1105. if (!schedstat_enabled())
  1106. return;
  1107. if (rt_entity_is_task(rt_se))
  1108. p = rt_task_of(rt_se);
  1109. stats = __schedstats_from_rt_se(rt_se);
  1110. if (!stats)
  1111. return;
  1112. __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats);
  1113. }
  1114. static inline void
  1115. update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
  1116. int flags)
  1117. {
  1118. if (!schedstat_enabled())
  1119. return;
  1120. if (flags & ENQUEUE_WAKEUP)
  1121. update_stats_enqueue_sleeper_rt(rt_rq, rt_se);
  1122. }
  1123. static inline void
  1124. update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
  1125. {
  1126. struct sched_statistics *stats;
  1127. struct task_struct *p = NULL;
  1128. if (!schedstat_enabled())
  1129. return;
  1130. if (rt_entity_is_task(rt_se))
  1131. p = rt_task_of(rt_se);
  1132. stats = __schedstats_from_rt_se(rt_se);
  1133. if (!stats)
  1134. return;
  1135. __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats);
  1136. }
  1137. static inline void
  1138. update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
  1139. int flags)
  1140. {
  1141. struct task_struct *p = NULL;
  1142. if (!schedstat_enabled())
  1143. return;
  1144. if (rt_entity_is_task(rt_se))
  1145. p = rt_task_of(rt_se);
  1146. if ((flags & DEQUEUE_SLEEP) && p) {
  1147. unsigned int state;
  1148. state = READ_ONCE(p->__state);
  1149. if (state & TASK_INTERRUPTIBLE)
  1150. __schedstat_set(p->stats.sleep_start,
  1151. rq_clock(rq_of_rt_rq(rt_rq)));
  1152. if (state & TASK_UNINTERRUPTIBLE)
  1153. __schedstat_set(p->stats.block_start,
  1154. rq_clock(rq_of_rt_rq(rt_rq)));
  1155. }
  1156. }
  1157. static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1158. {
  1159. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  1160. struct rt_prio_array *array = &rt_rq->active;
  1161. struct rt_rq *group_rq = group_rt_rq(rt_se);
  1162. struct list_head *queue = array->queue + rt_se_prio(rt_se);
  1163. /*
  1164. * Don't enqueue the group if its throttled, or when empty.
  1165. * The latter is a consequence of the former when a child group
  1166. * get throttled and the current group doesn't have any other
  1167. * active members.
  1168. */
  1169. if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
  1170. if (rt_se->on_list)
  1171. __delist_rt_entity(rt_se, array);
  1172. return;
  1173. }
  1174. if (move_entity(flags)) {
  1175. WARN_ON_ONCE(rt_se->on_list);
  1176. if (flags & ENQUEUE_HEAD)
  1177. list_add(&rt_se->run_list, queue);
  1178. else
  1179. list_add_tail(&rt_se->run_list, queue);
  1180. __set_bit(rt_se_prio(rt_se), array->bitmap);
  1181. rt_se->on_list = 1;
  1182. }
  1183. rt_se->on_rq = 1;
  1184. inc_rt_tasks(rt_se, rt_rq);
  1185. }
  1186. static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1187. {
  1188. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  1189. struct rt_prio_array *array = &rt_rq->active;
  1190. if (move_entity(flags)) {
  1191. WARN_ON_ONCE(!rt_se->on_list);
  1192. __delist_rt_entity(rt_se, array);
  1193. }
  1194. rt_se->on_rq = 0;
  1195. dec_rt_tasks(rt_se, rt_rq);
  1196. }
  1197. /*
  1198. * Because the prio of an upper entry depends on the lower
  1199. * entries, we must remove entries top - down.
  1200. */
  1201. static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
  1202. {
  1203. struct sched_rt_entity *back = NULL;
  1204. unsigned int rt_nr_running;
  1205. for_each_sched_rt_entity(rt_se) {
  1206. rt_se->back = back;
  1207. back = rt_se;
  1208. }
  1209. rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
  1210. for (rt_se = back; rt_se; rt_se = rt_se->back) {
  1211. if (on_rt_rq(rt_se))
  1212. __dequeue_rt_entity(rt_se, flags);
  1213. }
  1214. dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
  1215. }
  1216. static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1217. {
  1218. struct rq *rq = rq_of_rt_se(rt_se);
  1219. update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
  1220. dequeue_rt_stack(rt_se, flags);
  1221. for_each_sched_rt_entity(rt_se)
  1222. __enqueue_rt_entity(rt_se, flags);
  1223. enqueue_top_rt_rq(&rq->rt);
  1224. }
  1225. static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1226. {
  1227. struct rq *rq = rq_of_rt_se(rt_se);
  1228. update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
  1229. dequeue_rt_stack(rt_se, flags);
  1230. for_each_sched_rt_entity(rt_se) {
  1231. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  1232. if (rt_rq && rt_rq->rt_nr_running)
  1233. __enqueue_rt_entity(rt_se, flags);
  1234. }
  1235. enqueue_top_rt_rq(&rq->rt);
  1236. }
  1237. #ifdef CONFIG_SMP
  1238. static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p,
  1239. bool sync)
  1240. {
  1241. /*
  1242. * If the waker is CFS, then an RT sync wakeup would preempt the waker
  1243. * and force it to run for a likely small time after the RT wakee is
  1244. * done. So, only honor RT sync wakeups from RT wakers.
  1245. */
  1246. return sync && task_has_rt_policy(rq->curr) &&
  1247. p->prio <= rq->rt.highest_prio.next &&
  1248. rq->rt.rt_nr_running <= 2;
  1249. }
  1250. #else
  1251. static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p,
  1252. bool sync)
  1253. {
  1254. return 0;
  1255. }
  1256. #endif
  1257. /*
  1258. * Adding/removing a task to/from a priority array:
  1259. */
  1260. static void
  1261. enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  1262. {
  1263. struct sched_rt_entity *rt_se = &p->rt;
  1264. bool sync = !!(flags & ENQUEUE_WAKEUP_SYNC);
  1265. if (flags & ENQUEUE_WAKEUP)
  1266. rt_se->timeout = 0;
  1267. check_schedstat_required();
  1268. update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
  1269. enqueue_rt_entity(rt_se, flags);
  1270. if (!task_current(rq, p) && p->nr_cpus_allowed > 1 &&
  1271. !should_honor_rt_sync(rq, p, sync))
  1272. enqueue_pushable_task(rq, p);
  1273. }
  1274. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  1275. {
  1276. struct sched_rt_entity *rt_se = &p->rt;
  1277. update_curr_rt(rq);
  1278. dequeue_rt_entity(rt_se, flags);
  1279. dequeue_pushable_task(rq, p);
  1280. }
  1281. /*
  1282. * Put task to the head or the end of the run list without the overhead of
  1283. * dequeue followed by enqueue.
  1284. */
  1285. static void
  1286. requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
  1287. {
  1288. if (on_rt_rq(rt_se)) {
  1289. struct rt_prio_array *array = &rt_rq->active;
  1290. struct list_head *queue = array->queue + rt_se_prio(rt_se);
  1291. if (head)
  1292. list_move(&rt_se->run_list, queue);
  1293. else
  1294. list_move_tail(&rt_se->run_list, queue);
  1295. }
  1296. }
  1297. static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
  1298. {
  1299. struct sched_rt_entity *rt_se = &p->rt;
  1300. struct rt_rq *rt_rq;
  1301. for_each_sched_rt_entity(rt_se) {
  1302. rt_rq = rt_rq_of_se(rt_se);
  1303. requeue_rt_entity(rt_rq, rt_se, head);
  1304. }
  1305. }
  1306. static void yield_task_rt(struct rq *rq)
  1307. {
  1308. requeue_task_rt(rq, rq->curr, 0);
  1309. }
  1310. #ifdef CONFIG_SMP
  1311. static int find_lowest_rq(struct task_struct *task);
  1312. #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
  1313. /*
  1314. * Return whether the given cpu is currently non-preemptible
  1315. * while handling a potentially long softirq, or if the current
  1316. * task is likely to block preemptions soon because it is a
  1317. * ksoftirq thread that is handling softirqs.
  1318. */
  1319. static bool cpu_busy_with_softirqs(int cpu)
  1320. {
  1321. u32 softirqs = per_cpu(active_softirqs, cpu) |
  1322. __cpu_softirq_pending(cpu);
  1323. return softirqs & LONG_SOFTIRQ_MASK;
  1324. }
  1325. #else
  1326. static bool cpu_busy_with_softirqs(int cpu)
  1327. {
  1328. return false;
  1329. }
  1330. #endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */
  1331. static bool rt_task_fits_cpu(struct task_struct *p, int cpu)
  1332. {
  1333. return rt_task_fits_capacity(p, cpu) && !cpu_busy_with_softirqs(cpu);
  1334. }
  1335. static int
  1336. select_task_rq_rt(struct task_struct *p, int cpu, int flags)
  1337. {
  1338. struct task_struct *curr;
  1339. struct rq *rq;
  1340. struct rq *this_cpu_rq;
  1341. bool test;
  1342. int target_cpu = -1;
  1343. bool sync = !!(flags & WF_SYNC);
  1344. int this_cpu;
  1345. trace_android_rvh_select_task_rq_rt(p, cpu, flags & 0xF,
  1346. flags, &target_cpu);
  1347. if (target_cpu >= 0)
  1348. return target_cpu;
  1349. /* For anything but wake ups, just return the task_cpu */
  1350. if (!(flags & (WF_TTWU | WF_FORK)))
  1351. goto out;
  1352. rq = cpu_rq(cpu);
  1353. rcu_read_lock();
  1354. curr = READ_ONCE(rq->curr); /* unlocked access */
  1355. this_cpu = smp_processor_id();
  1356. this_cpu_rq = cpu_rq(this_cpu);
  1357. /*
  1358. * If the current task on @p's runqueue is an RT task, then
  1359. * try to see if we can wake this RT task up on another
  1360. * runqueue. Otherwise simply start this RT task
  1361. * on its current runqueue.
  1362. *
  1363. * We want to avoid overloading runqueues. If the woken
  1364. * task is a higher priority, then it will stay on this CPU
  1365. * and the lower prio task should be moved to another CPU.
  1366. * Even though this will probably make the lower prio task
  1367. * lose its cache, we do not want to bounce a higher task
  1368. * around just because it gave up its CPU, perhaps for a
  1369. * lock?
  1370. *
  1371. * For equal prio tasks, we just let the scheduler sort it out.
  1372. *
  1373. * Otherwise, just let it ride on the affined RQ and the
  1374. * post-schedule router will push the preempted task away
  1375. *
  1376. * This test is optimistic, if we get it wrong the load-balancer
  1377. * will have to sort it out.
  1378. *
  1379. * We use rt_task_fits_cpu() to evaluate if the CPU is busy with
  1380. * potentially long-running softirq work, as well as take into
  1381. * account the capacity of the CPU to ensure it fits the
  1382. * requirement of the task - which is only important on
  1383. * heterogeneous systems like big.LITTLE.
  1384. */
  1385. test = curr &&
  1386. unlikely(rt_task(curr)) &&
  1387. (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
  1388. /*
  1389. * Respect the sync flag as long as the task can run on this CPU.
  1390. */
  1391. if (should_honor_rt_sync(this_cpu_rq, p, sync) &&
  1392. cpumask_test_cpu(this_cpu, p->cpus_ptr)) {
  1393. cpu = this_cpu;
  1394. goto out_unlock;
  1395. }
  1396. if (test || !rt_task_fits_cpu(p, cpu)) {
  1397. int target = find_lowest_rq(p);
  1398. /*
  1399. * Bail out if we were forcing a migration to find a better
  1400. * fitting CPU but our search failed.
  1401. */
  1402. if (!test && target != -1 && !rt_task_fits_cpu(p, target))
  1403. goto out_unlock;
  1404. /*
  1405. * Don't bother moving it if the destination CPU is
  1406. * not running a lower priority task.
  1407. */
  1408. if (target != -1 &&
  1409. p->prio < cpu_rq(target)->rt.highest_prio.curr)
  1410. cpu = target;
  1411. }
  1412. out_unlock:
  1413. rcu_read_unlock();
  1414. out:
  1415. return cpu;
  1416. }
  1417. static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  1418. {
  1419. /*
  1420. * Current can't be migrated, useless to reschedule,
  1421. * let's hope p can move out.
  1422. */
  1423. if (rq->curr->nr_cpus_allowed == 1 ||
  1424. !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
  1425. return;
  1426. /*
  1427. * p is migratable, so let's not schedule it and
  1428. * see if it is pushed or pulled somewhere else.
  1429. */
  1430. if (p->nr_cpus_allowed != 1 &&
  1431. cpupri_find(&rq->rd->cpupri, p, NULL))
  1432. return;
  1433. /*
  1434. * There appear to be other CPUs that can accept
  1435. * the current task but none can run 'p', so lets reschedule
  1436. * to try and push the current task away:
  1437. */
  1438. requeue_task_rt(rq, p, 1);
  1439. resched_curr(rq);
  1440. }
  1441. static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
  1442. {
  1443. if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
  1444. int done = 0;
  1445. /*
  1446. * This is OK, because current is on_cpu, which avoids it being
  1447. * picked for load-balance and preemption/IRQs are still
  1448. * disabled avoiding further scheduler activity on it and we've
  1449. * not yet started the picking loop.
  1450. */
  1451. rq_unpin_lock(rq, rf);
  1452. trace_android_rvh_sched_balance_rt(rq, p, &done);
  1453. if (!done)
  1454. pull_rt_task(rq);
  1455. rq_repin_lock(rq, rf);
  1456. }
  1457. return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
  1458. }
  1459. #endif /* CONFIG_SMP */
  1460. /*
  1461. * Preempt the current task with a newly woken task if needed:
  1462. */
  1463. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
  1464. {
  1465. if (p->prio < rq->curr->prio) {
  1466. resched_curr(rq);
  1467. return;
  1468. }
  1469. #ifdef CONFIG_SMP
  1470. /*
  1471. * If:
  1472. *
  1473. * - the newly woken task is of equal priority to the current task
  1474. * - the newly woken task is non-migratable while current is migratable
  1475. * - current will be preempted on the next reschedule
  1476. *
  1477. * we should check to see if current can readily move to a different
  1478. * cpu. If so, we will reschedule to allow the push logic to try
  1479. * to move current somewhere else, making room for our non-migratable
  1480. * task.
  1481. */
  1482. if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
  1483. check_preempt_equal_prio(rq, p);
  1484. #endif
  1485. }
  1486. static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
  1487. {
  1488. struct sched_rt_entity *rt_se = &p->rt;
  1489. struct rt_rq *rt_rq = &rq->rt;
  1490. p->se.exec_start = rq_clock_task(rq);
  1491. if (on_rt_rq(&p->rt))
  1492. update_stats_wait_end_rt(rt_rq, rt_se);
  1493. /* The running task is never eligible for pushing */
  1494. dequeue_pushable_task(rq, p);
  1495. if (!first)
  1496. return;
  1497. /*
  1498. * If prev task was rt, put_prev_task() has already updated the
  1499. * utilization. We only care of the case where we start to schedule a
  1500. * rt task
  1501. */
  1502. if (rq->curr->sched_class != &rt_sched_class)
  1503. update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
  1504. trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 0);
  1505. rt_queue_push_tasks(rq);
  1506. }
  1507. static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
  1508. {
  1509. struct rt_prio_array *array = &rt_rq->active;
  1510. struct sched_rt_entity *next = NULL;
  1511. struct list_head *queue;
  1512. int idx;
  1513. idx = sched_find_first_bit(array->bitmap);
  1514. BUG_ON(idx >= MAX_RT_PRIO);
  1515. queue = array->queue + idx;
  1516. if (SCHED_WARN_ON(list_empty(queue)))
  1517. return NULL;
  1518. next = list_entry(queue->next, struct sched_rt_entity, run_list);
  1519. return next;
  1520. }
  1521. static struct task_struct *_pick_next_task_rt(struct rq *rq)
  1522. {
  1523. struct sched_rt_entity *rt_se;
  1524. struct rt_rq *rt_rq = &rq->rt;
  1525. do {
  1526. rt_se = pick_next_rt_entity(rt_rq);
  1527. if (unlikely(!rt_se))
  1528. return NULL;
  1529. rt_rq = group_rt_rq(rt_se);
  1530. } while (rt_rq);
  1531. return rt_task_of(rt_se);
  1532. }
  1533. static struct task_struct *pick_task_rt(struct rq *rq)
  1534. {
  1535. struct task_struct *p;
  1536. if (!sched_rt_runnable(rq))
  1537. return NULL;
  1538. p = _pick_next_task_rt(rq);
  1539. return p;
  1540. }
  1541. static struct task_struct *pick_next_task_rt(struct rq *rq)
  1542. {
  1543. struct task_struct *p = pick_task_rt(rq);
  1544. if (p)
  1545. set_next_task_rt(rq, p, true);
  1546. return p;
  1547. }
  1548. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  1549. {
  1550. struct sched_rt_entity *rt_se = &p->rt;
  1551. struct rt_rq *rt_rq = &rq->rt;
  1552. if (on_rt_rq(&p->rt))
  1553. update_stats_wait_start_rt(rt_rq, rt_se);
  1554. update_curr_rt(rq);
  1555. update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
  1556. trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 1);
  1557. /*
  1558. * The previous task needs to be made eligible for pushing
  1559. * if it is still active
  1560. */
  1561. if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
  1562. enqueue_pushable_task(rq, p);
  1563. }
  1564. #ifdef CONFIG_SMP
  1565. /* Only try algorithms three times */
  1566. #define RT_MAX_TRIES 3
  1567. static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  1568. {
  1569. if (!task_on_cpu(rq, p) &&
  1570. cpumask_test_cpu(cpu, &p->cpus_mask))
  1571. return 1;
  1572. return 0;
  1573. }
  1574. /*
  1575. * Return the highest pushable rq's task, which is suitable to be executed
  1576. * on the CPU, NULL otherwise
  1577. */
  1578. struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
  1579. {
  1580. struct plist_head *head = &rq->rt.pushable_tasks;
  1581. struct task_struct *p;
  1582. if (!has_pushable_tasks(rq))
  1583. return NULL;
  1584. plist_for_each_entry(p, head, pushable_tasks) {
  1585. if (pick_rt_task(rq, p, cpu))
  1586. return p;
  1587. }
  1588. return NULL;
  1589. }
  1590. EXPORT_SYMBOL_GPL(pick_highest_pushable_task);
  1591. static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
  1592. static int find_lowest_rq(struct task_struct *task)
  1593. {
  1594. struct sched_domain *sd;
  1595. struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
  1596. int this_cpu = smp_processor_id();
  1597. int cpu = -1;
  1598. int ret;
  1599. /* Make sure the mask is initialized first */
  1600. if (unlikely(!lowest_mask))
  1601. return -1;
  1602. if (task->nr_cpus_allowed == 1)
  1603. return -1; /* No other targets possible */
  1604. /*
  1605. * If we're using the softirq optimization or if we are
  1606. * on asym system, ensure we consider the softirq processing
  1607. * or different capacities of the CPUs when searching for the
  1608. * lowest_mask.
  1609. */
  1610. if (IS_ENABLED(CONFIG_RT_SOFTIRQ_AWARE_SCHED) ||
  1611. sched_asym_cpucap_active()) {
  1612. ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
  1613. task, lowest_mask,
  1614. rt_task_fits_cpu);
  1615. } else {
  1616. ret = cpupri_find(&task_rq(task)->rd->cpupri,
  1617. task, lowest_mask);
  1618. }
  1619. trace_android_rvh_find_lowest_rq(task, lowest_mask, ret, &cpu);
  1620. if (cpu >= 0)
  1621. return cpu;
  1622. if (!ret)
  1623. return -1; /* No targets found */
  1624. cpu = task_cpu(task);
  1625. /*
  1626. * At this point we have built a mask of CPUs representing the
  1627. * lowest priority tasks in the system. Now we want to elect
  1628. * the best one based on our affinity and topology.
  1629. *
  1630. * We prioritize the last CPU that the task executed on since
  1631. * it is most likely cache-hot in that location.
  1632. */
  1633. if (cpumask_test_cpu(cpu, lowest_mask))
  1634. return cpu;
  1635. /*
  1636. * Otherwise, we consult the sched_domains span maps to figure
  1637. * out which CPU is logically closest to our hot cache data.
  1638. */
  1639. if (!cpumask_test_cpu(this_cpu, lowest_mask))
  1640. this_cpu = -1; /* Skip this_cpu opt if not among lowest */
  1641. rcu_read_lock();
  1642. for_each_domain(cpu, sd) {
  1643. if (sd->flags & SD_WAKE_AFFINE) {
  1644. int best_cpu;
  1645. /*
  1646. * "this_cpu" is cheaper to preempt than a
  1647. * remote processor.
  1648. */
  1649. if (this_cpu != -1 &&
  1650. cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
  1651. rcu_read_unlock();
  1652. return this_cpu;
  1653. }
  1654. best_cpu = cpumask_any_and_distribute(lowest_mask,
  1655. sched_domain_span(sd));
  1656. if (best_cpu < nr_cpu_ids) {
  1657. rcu_read_unlock();
  1658. return best_cpu;
  1659. }
  1660. }
  1661. }
  1662. rcu_read_unlock();
  1663. /*
  1664. * And finally, if there were no matches within the domains
  1665. * just give the caller *something* to work with from the compatible
  1666. * locations.
  1667. */
  1668. if (this_cpu != -1)
  1669. return this_cpu;
  1670. cpu = cpumask_any_distribute(lowest_mask);
  1671. if (cpu < nr_cpu_ids)
  1672. return cpu;
  1673. return -1;
  1674. }
  1675. /* Will lock the rq it finds */
  1676. static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
  1677. {
  1678. struct rq *lowest_rq = NULL;
  1679. int tries;
  1680. int cpu;
  1681. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  1682. cpu = find_lowest_rq(task);
  1683. if ((cpu == -1) || (cpu == rq->cpu))
  1684. break;
  1685. lowest_rq = cpu_rq(cpu);
  1686. if (lowest_rq->rt.highest_prio.curr <= task->prio) {
  1687. /*
  1688. * Target rq has tasks of equal or higher priority,
  1689. * retrying does not release any lock and is unlikely
  1690. * to yield a different result.
  1691. */
  1692. lowest_rq = NULL;
  1693. break;
  1694. }
  1695. /* if the prio of this runqueue changed, try again */
  1696. if (double_lock_balance(rq, lowest_rq)) {
  1697. /*
  1698. * We had to unlock the run queue. In
  1699. * the mean time, task could have
  1700. * migrated already or had its affinity changed.
  1701. * Also make sure that it wasn't scheduled on its rq.
  1702. * It is possible the task was scheduled, set
  1703. * "migrate_disabled" and then got preempted, so we must
  1704. * check the task migration disable flag here too.
  1705. */
  1706. if (unlikely(task_rq(task) != rq ||
  1707. !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
  1708. task_on_cpu(rq, task) ||
  1709. !rt_task(task) ||
  1710. is_migration_disabled(task) ||
  1711. !task_on_rq_queued(task))) {
  1712. double_unlock_balance(rq, lowest_rq);
  1713. lowest_rq = NULL;
  1714. break;
  1715. }
  1716. }
  1717. /* If this rq is still suitable use it. */
  1718. if (lowest_rq->rt.highest_prio.curr > task->prio)
  1719. break;
  1720. /* try again */
  1721. double_unlock_balance(rq, lowest_rq);
  1722. lowest_rq = NULL;
  1723. }
  1724. return lowest_rq;
  1725. }
  1726. static struct task_struct *pick_next_pushable_task(struct rq *rq)
  1727. {
  1728. struct task_struct *p;
  1729. if (!has_pushable_tasks(rq))
  1730. return NULL;
  1731. p = plist_first_entry(&rq->rt.pushable_tasks,
  1732. struct task_struct, pushable_tasks);
  1733. BUG_ON(rq->cpu != task_cpu(p));
  1734. BUG_ON(task_current(rq, p));
  1735. BUG_ON(p->nr_cpus_allowed <= 1);
  1736. BUG_ON(!task_on_rq_queued(p));
  1737. BUG_ON(!rt_task(p));
  1738. return p;
  1739. }
  1740. /*
  1741. * If the current CPU has more than one RT task, see if the non
  1742. * running task can migrate over to a CPU that is running a task
  1743. * of lesser priority.
  1744. */
  1745. static int push_rt_task(struct rq *rq, bool pull)
  1746. {
  1747. struct task_struct *next_task;
  1748. struct rq *lowest_rq;
  1749. int ret = 0;
  1750. if (!rq->rt.overloaded)
  1751. return 0;
  1752. next_task = pick_next_pushable_task(rq);
  1753. if (!next_task)
  1754. return 0;
  1755. retry:
  1756. /*
  1757. * It's possible that the next_task slipped in of
  1758. * higher priority than current. If that's the case
  1759. * just reschedule current.
  1760. */
  1761. if (unlikely(next_task->prio < rq->curr->prio)) {
  1762. resched_curr(rq);
  1763. return 0;
  1764. }
  1765. if (is_migration_disabled(next_task)) {
  1766. struct task_struct *push_task = NULL;
  1767. int cpu;
  1768. if (!pull || rq->push_busy)
  1769. return 0;
  1770. /*
  1771. * Invoking find_lowest_rq() on anything but an RT task doesn't
  1772. * make sense. Per the above priority check, curr has to
  1773. * be of higher priority than next_task, so no need to
  1774. * reschedule when bailing out.
  1775. *
  1776. * Note that the stoppers are masqueraded as SCHED_FIFO
  1777. * (cf. sched_set_stop_task()), so we can't rely on rt_task().
  1778. */
  1779. if (rq->curr->sched_class != &rt_sched_class)
  1780. return 0;
  1781. cpu = find_lowest_rq(rq->curr);
  1782. if (cpu == -1 || cpu == rq->cpu)
  1783. return 0;
  1784. /*
  1785. * Given we found a CPU with lower priority than @next_task,
  1786. * therefore it should be running. However we cannot migrate it
  1787. * to this other CPU, instead attempt to push the current
  1788. * running task on this CPU away.
  1789. */
  1790. push_task = get_push_task(rq);
  1791. if (push_task) {
  1792. preempt_disable();
  1793. raw_spin_rq_unlock(rq);
  1794. stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
  1795. push_task, &rq->push_work);
  1796. preempt_enable();
  1797. raw_spin_rq_lock(rq);
  1798. }
  1799. return 0;
  1800. }
  1801. if (WARN_ON(next_task == rq->curr))
  1802. return 0;
  1803. /* We might release rq lock */
  1804. get_task_struct(next_task);
  1805. /* find_lock_lowest_rq locks the rq if found */
  1806. lowest_rq = find_lock_lowest_rq(next_task, rq);
  1807. if (!lowest_rq) {
  1808. struct task_struct *task;
  1809. /*
  1810. * find_lock_lowest_rq releases rq->lock
  1811. * so it is possible that next_task has migrated.
  1812. *
  1813. * We need to make sure that the task is still on the same
  1814. * run-queue and is also still the next task eligible for
  1815. * pushing.
  1816. */
  1817. task = pick_next_pushable_task(rq);
  1818. if (task == next_task) {
  1819. /*
  1820. * The task hasn't migrated, and is still the next
  1821. * eligible task, but we failed to find a run-queue
  1822. * to push it to. Do not retry in this case, since
  1823. * other CPUs will pull from us when ready.
  1824. */
  1825. goto out;
  1826. }
  1827. if (!task)
  1828. /* No more tasks, just exit */
  1829. goto out;
  1830. /*
  1831. * Something has shifted, try again.
  1832. */
  1833. put_task_struct(next_task);
  1834. next_task = task;
  1835. goto retry;
  1836. }
  1837. deactivate_task(rq, next_task, 0);
  1838. set_task_cpu(next_task, lowest_rq->cpu);
  1839. activate_task(lowest_rq, next_task, 0);
  1840. resched_curr(lowest_rq);
  1841. ret = 1;
  1842. double_unlock_balance(rq, lowest_rq);
  1843. out:
  1844. put_task_struct(next_task);
  1845. return ret;
  1846. }
  1847. static void push_rt_tasks(struct rq *rq)
  1848. {
  1849. /* push_rt_task will return true if it moved an RT */
  1850. while (push_rt_task(rq, false))
  1851. ;
  1852. }
  1853. #ifdef HAVE_RT_PUSH_IPI
  1854. /*
  1855. * When a high priority task schedules out from a CPU and a lower priority
  1856. * task is scheduled in, a check is made to see if there's any RT tasks
  1857. * on other CPUs that are waiting to run because a higher priority RT task
  1858. * is currently running on its CPU. In this case, the CPU with multiple RT
  1859. * tasks queued on it (overloaded) needs to be notified that a CPU has opened
  1860. * up that may be able to run one of its non-running queued RT tasks.
  1861. *
  1862. * All CPUs with overloaded RT tasks need to be notified as there is currently
  1863. * no way to know which of these CPUs have the highest priority task waiting
  1864. * to run. Instead of trying to take a spinlock on each of these CPUs,
  1865. * which has shown to cause large latency when done on machines with many
  1866. * CPUs, sending an IPI to the CPUs to have them push off the overloaded
  1867. * RT tasks waiting to run.
  1868. *
  1869. * Just sending an IPI to each of the CPUs is also an issue, as on large
  1870. * count CPU machines, this can cause an IPI storm on a CPU, especially
  1871. * if its the only CPU with multiple RT tasks queued, and a large number
  1872. * of CPUs scheduling a lower priority task at the same time.
  1873. *
  1874. * Each root domain has its own irq work function that can iterate over
  1875. * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
  1876. * task must be checked if there's one or many CPUs that are lowering
  1877. * their priority, there's a single irq work iterator that will try to
  1878. * push off RT tasks that are waiting to run.
  1879. *
  1880. * When a CPU schedules a lower priority task, it will kick off the
  1881. * irq work iterator that will jump to each CPU with overloaded RT tasks.
  1882. * As it only takes the first CPU that schedules a lower priority task
  1883. * to start the process, the rto_start variable is incremented and if
  1884. * the atomic result is one, then that CPU will try to take the rto_lock.
  1885. * This prevents high contention on the lock as the process handles all
  1886. * CPUs scheduling lower priority tasks.
  1887. *
  1888. * All CPUs that are scheduling a lower priority task will increment the
  1889. * rt_loop_next variable. This will make sure that the irq work iterator
  1890. * checks all RT overloaded CPUs whenever a CPU schedules a new lower
  1891. * priority task, even if the iterator is in the middle of a scan. Incrementing
  1892. * the rt_loop_next will cause the iterator to perform another scan.
  1893. *
  1894. */
  1895. static int rto_next_cpu(struct root_domain *rd)
  1896. {
  1897. int next;
  1898. int cpu;
  1899. /*
  1900. * When starting the IPI RT pushing, the rto_cpu is set to -1,
  1901. * rt_next_cpu() will simply return the first CPU found in
  1902. * the rto_mask.
  1903. *
  1904. * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
  1905. * will return the next CPU found in the rto_mask.
  1906. *
  1907. * If there are no more CPUs left in the rto_mask, then a check is made
  1908. * against rto_loop and rto_loop_next. rto_loop is only updated with
  1909. * the rto_lock held, but any CPU may increment the rto_loop_next
  1910. * without any locking.
  1911. */
  1912. for (;;) {
  1913. /* When rto_cpu is -1 this acts like cpumask_first() */
  1914. cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
  1915. /* this will be any CPU in the rd->rto_mask, and can be a halted cpu update it */
  1916. trace_android_rvh_rto_next_cpu(rd->rto_cpu, rd->rto_mask, &cpu);
  1917. rd->rto_cpu = cpu;
  1918. if (cpu < nr_cpu_ids)
  1919. return cpu;
  1920. rd->rto_cpu = -1;
  1921. /*
  1922. * ACQUIRE ensures we see the @rto_mask changes
  1923. * made prior to the @next value observed.
  1924. *
  1925. * Matches WMB in rt_set_overload().
  1926. */
  1927. next = atomic_read_acquire(&rd->rto_loop_next);
  1928. if (rd->rto_loop == next)
  1929. break;
  1930. rd->rto_loop = next;
  1931. }
  1932. return -1;
  1933. }
  1934. static inline bool rto_start_trylock(atomic_t *v)
  1935. {
  1936. return !atomic_cmpxchg_acquire(v, 0, 1);
  1937. }
  1938. static inline void rto_start_unlock(atomic_t *v)
  1939. {
  1940. atomic_set_release(v, 0);
  1941. }
  1942. static void tell_cpu_to_push(struct rq *rq)
  1943. {
  1944. int cpu = -1;
  1945. /* Keep the loop going if the IPI is currently active */
  1946. atomic_inc(&rq->rd->rto_loop_next);
  1947. /* Only one CPU can initiate a loop at a time */
  1948. if (!rto_start_trylock(&rq->rd->rto_loop_start))
  1949. return;
  1950. raw_spin_lock(&rq->rd->rto_lock);
  1951. /*
  1952. * The rto_cpu is updated under the lock, if it has a valid CPU
  1953. * then the IPI is still running and will continue due to the
  1954. * update to loop_next, and nothing needs to be done here.
  1955. * Otherwise it is finishing up and an ipi needs to be sent.
  1956. */
  1957. if (rq->rd->rto_cpu < 0)
  1958. cpu = rto_next_cpu(rq->rd);
  1959. raw_spin_unlock(&rq->rd->rto_lock);
  1960. rto_start_unlock(&rq->rd->rto_loop_start);
  1961. if (cpu >= 0) {
  1962. /* Make sure the rd does not get freed while pushing */
  1963. sched_get_rd(rq->rd);
  1964. irq_work_queue_on(&rq->rd->rto_push_work, cpu);
  1965. }
  1966. }
  1967. /* Called from hardirq context */
  1968. void rto_push_irq_work_func(struct irq_work *work)
  1969. {
  1970. struct root_domain *rd =
  1971. container_of(work, struct root_domain, rto_push_work);
  1972. struct rq *rq;
  1973. int cpu;
  1974. rq = this_rq();
  1975. /*
  1976. * We do not need to grab the lock to check for has_pushable_tasks.
  1977. * When it gets updated, a check is made if a push is possible.
  1978. */
  1979. if (has_pushable_tasks(rq)) {
  1980. raw_spin_rq_lock(rq);
  1981. while (push_rt_task(rq, true))
  1982. ;
  1983. raw_spin_rq_unlock(rq);
  1984. }
  1985. raw_spin_lock(&rd->rto_lock);
  1986. /* Pass the IPI to the next rt overloaded queue */
  1987. cpu = rto_next_cpu(rd);
  1988. raw_spin_unlock(&rd->rto_lock);
  1989. if (cpu < 0) {
  1990. sched_put_rd(rd);
  1991. return;
  1992. }
  1993. /* Try the next RT overloaded CPU */
  1994. irq_work_queue_on(&rd->rto_push_work, cpu);
  1995. }
  1996. #endif /* HAVE_RT_PUSH_IPI */
  1997. static void pull_rt_task(struct rq *this_rq)
  1998. {
  1999. int this_cpu = this_rq->cpu, cpu;
  2000. bool resched = false;
  2001. struct task_struct *p, *push_task;
  2002. struct rq *src_rq;
  2003. int rt_overload_count = rt_overloaded(this_rq);
  2004. if (likely(!rt_overload_count))
  2005. return;
  2006. /*
  2007. * Match the barrier from rt_set_overloaded; this guarantees that if we
  2008. * see overloaded we must also see the rto_mask bit.
  2009. */
  2010. smp_rmb();
  2011. /* If we are the only overloaded CPU do nothing */
  2012. if (rt_overload_count == 1 &&
  2013. cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
  2014. return;
  2015. #ifdef HAVE_RT_PUSH_IPI
  2016. if (sched_feat(RT_PUSH_IPI)) {
  2017. tell_cpu_to_push(this_rq);
  2018. return;
  2019. }
  2020. #endif
  2021. for_each_cpu(cpu, this_rq->rd->rto_mask) {
  2022. if (this_cpu == cpu)
  2023. continue;
  2024. src_rq = cpu_rq(cpu);
  2025. /*
  2026. * Don't bother taking the src_rq->lock if the next highest
  2027. * task is known to be lower-priority than our current task.
  2028. * This may look racy, but if this value is about to go
  2029. * logically higher, the src_rq will push this task away.
  2030. * And if its going logically lower, we do not care
  2031. */
  2032. if (src_rq->rt.highest_prio.next >=
  2033. this_rq->rt.highest_prio.curr)
  2034. continue;
  2035. /*
  2036. * We can potentially drop this_rq's lock in
  2037. * double_lock_balance, and another CPU could
  2038. * alter this_rq
  2039. */
  2040. push_task = NULL;
  2041. double_lock_balance(this_rq, src_rq);
  2042. /*
  2043. * We can pull only a task, which is pushable
  2044. * on its rq, and no others.
  2045. */
  2046. p = pick_highest_pushable_task(src_rq, this_cpu);
  2047. /*
  2048. * Do we have an RT task that preempts
  2049. * the to-be-scheduled task?
  2050. */
  2051. if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
  2052. WARN_ON(p == src_rq->curr);
  2053. WARN_ON(!task_on_rq_queued(p));
  2054. /*
  2055. * There's a chance that p is higher in priority
  2056. * than what's currently running on its CPU.
  2057. * This is just that p is waking up and hasn't
  2058. * had a chance to schedule. We only pull
  2059. * p if it is lower in priority than the
  2060. * current task on the run queue
  2061. */
  2062. if (p->prio < src_rq->curr->prio)
  2063. goto skip;
  2064. if (is_migration_disabled(p)) {
  2065. push_task = get_push_task(src_rq);
  2066. } else {
  2067. deactivate_task(src_rq, p, 0);
  2068. set_task_cpu(p, this_cpu);
  2069. activate_task(this_rq, p, 0);
  2070. resched = true;
  2071. }
  2072. /*
  2073. * We continue with the search, just in
  2074. * case there's an even higher prio task
  2075. * in another runqueue. (low likelihood
  2076. * but possible)
  2077. */
  2078. }
  2079. skip:
  2080. double_unlock_balance(this_rq, src_rq);
  2081. if (push_task) {
  2082. preempt_disable();
  2083. raw_spin_rq_unlock(this_rq);
  2084. stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
  2085. push_task, &src_rq->push_work);
  2086. preempt_enable();
  2087. raw_spin_rq_lock(this_rq);
  2088. }
  2089. }
  2090. if (resched)
  2091. resched_curr(this_rq);
  2092. }
  2093. /*
  2094. * If we are not running and we are not going to reschedule soon, we should
  2095. * try to push tasks away now
  2096. */
  2097. static void task_woken_rt(struct rq *rq, struct task_struct *p)
  2098. {
  2099. bool need_to_push = !task_on_cpu(rq, p) &&
  2100. !test_tsk_need_resched(rq->curr) &&
  2101. p->nr_cpus_allowed > 1 &&
  2102. (dl_task(rq->curr) || rt_task(rq->curr)) &&
  2103. (rq->curr->nr_cpus_allowed < 2 ||
  2104. rq->curr->prio <= p->prio);
  2105. if (need_to_push)
  2106. push_rt_tasks(rq);
  2107. }
  2108. /* Assumes rq->lock is held */
  2109. static void rq_online_rt(struct rq *rq)
  2110. {
  2111. if (rq->rt.overloaded)
  2112. rt_set_overload(rq);
  2113. __enable_runtime(rq);
  2114. cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
  2115. }
  2116. /* Assumes rq->lock is held */
  2117. static void rq_offline_rt(struct rq *rq)
  2118. {
  2119. if (rq->rt.overloaded)
  2120. rt_clear_overload(rq);
  2121. __disable_runtime(rq);
  2122. cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
  2123. }
  2124. /*
  2125. * When switch from the rt queue, we bring ourselves to a position
  2126. * that we might want to pull RT tasks from other runqueues.
  2127. */
  2128. static void switched_from_rt(struct rq *rq, struct task_struct *p)
  2129. {
  2130. /*
  2131. * If there are other RT tasks then we will reschedule
  2132. * and the scheduling of the other RT tasks will handle
  2133. * the balancing. But if we are the last RT task
  2134. * we may need to handle the pulling of RT tasks
  2135. * now.
  2136. */
  2137. if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
  2138. return;
  2139. rt_queue_pull_task(rq);
  2140. }
  2141. void __init init_sched_rt_class(void)
  2142. {
  2143. unsigned int i;
  2144. for_each_possible_cpu(i) {
  2145. zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
  2146. GFP_KERNEL, cpu_to_node(i));
  2147. }
  2148. }
  2149. #endif /* CONFIG_SMP */
  2150. /*
  2151. * When switching a task to RT, we may overload the runqueue
  2152. * with RT tasks. In this case we try to push them off to
  2153. * other runqueues.
  2154. */
  2155. static void switched_to_rt(struct rq *rq, struct task_struct *p)
  2156. {
  2157. /*
  2158. * If we are running, update the avg_rt tracking, as the running time
  2159. * will now on be accounted into the latter.
  2160. */
  2161. if (task_current(rq, p)) {
  2162. update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
  2163. return;
  2164. }
  2165. /*
  2166. * If we are not running we may need to preempt the current
  2167. * running task. If that current running task is also an RT task
  2168. * then see if we can move to another run queue.
  2169. */
  2170. if (task_on_rq_queued(p)) {
  2171. #ifdef CONFIG_SMP
  2172. if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
  2173. rt_queue_push_tasks(rq);
  2174. #endif /* CONFIG_SMP */
  2175. if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
  2176. resched_curr(rq);
  2177. }
  2178. }
  2179. /*
  2180. * Priority of the task has changed. This may cause
  2181. * us to initiate a push or pull.
  2182. */
  2183. static void
  2184. prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
  2185. {
  2186. if (!task_on_rq_queued(p))
  2187. return;
  2188. if (task_current(rq, p)) {
  2189. #ifdef CONFIG_SMP
  2190. /*
  2191. * If our priority decreases while running, we
  2192. * may need to pull tasks to this runqueue.
  2193. */
  2194. if (oldprio < p->prio)
  2195. rt_queue_pull_task(rq);
  2196. /*
  2197. * If there's a higher priority task waiting to run
  2198. * then reschedule.
  2199. */
  2200. if (p->prio > rq->rt.highest_prio.curr)
  2201. resched_curr(rq);
  2202. #else
  2203. /* For UP simply resched on drop of prio */
  2204. if (oldprio < p->prio)
  2205. resched_curr(rq);
  2206. #endif /* CONFIG_SMP */
  2207. } else {
  2208. /*
  2209. * This task is not running, but if it is
  2210. * greater than the current running task
  2211. * then reschedule.
  2212. */
  2213. if (p->prio < rq->curr->prio)
  2214. resched_curr(rq);
  2215. }
  2216. }
  2217. #ifdef CONFIG_POSIX_TIMERS
  2218. static void watchdog(struct rq *rq, struct task_struct *p)
  2219. {
  2220. unsigned long soft, hard;
  2221. /* max may change after cur was read, this will be fixed next tick */
  2222. soft = task_rlimit(p, RLIMIT_RTTIME);
  2223. hard = task_rlimit_max(p, RLIMIT_RTTIME);
  2224. if (soft != RLIM_INFINITY) {
  2225. unsigned long next;
  2226. if (p->rt.watchdog_stamp != jiffies) {
  2227. p->rt.timeout++;
  2228. p->rt.watchdog_stamp = jiffies;
  2229. }
  2230. next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
  2231. if (p->rt.timeout > next) {
  2232. posix_cputimers_rt_watchdog(&p->posix_cputimers,
  2233. p->se.sum_exec_runtime);
  2234. }
  2235. }
  2236. }
  2237. #else
  2238. static inline void watchdog(struct rq *rq, struct task_struct *p) { }
  2239. #endif
  2240. /*
  2241. * scheduler tick hitting a task of our scheduling class.
  2242. *
  2243. * NOTE: This function can be called remotely by the tick offload that
  2244. * goes along full dynticks. Therefore no local assumption can be made
  2245. * and everything must be accessed through the @rq and @curr passed in
  2246. * parameters.
  2247. */
  2248. static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
  2249. {
  2250. struct sched_rt_entity *rt_se = &p->rt;
  2251. update_curr_rt(rq);
  2252. update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
  2253. trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 1);
  2254. watchdog(rq, p);
  2255. /*
  2256. * RR tasks need a special form of timeslice management.
  2257. * FIFO tasks have no timeslices.
  2258. */
  2259. if (p->policy != SCHED_RR)
  2260. return;
  2261. if (--p->rt.time_slice)
  2262. return;
  2263. p->rt.time_slice = sched_rr_timeslice;
  2264. /*
  2265. * Requeue to the end of queue if we (and all of our ancestors) are not
  2266. * the only element on the queue
  2267. */
  2268. for_each_sched_rt_entity(rt_se) {
  2269. if (rt_se->run_list.prev != rt_se->run_list.next) {
  2270. requeue_task_rt(rq, p, 0);
  2271. resched_curr(rq);
  2272. return;
  2273. }
  2274. }
  2275. }
  2276. static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
  2277. {
  2278. /*
  2279. * Time slice is 0 for SCHED_FIFO tasks
  2280. */
  2281. if (task->policy == SCHED_RR)
  2282. return sched_rr_timeslice;
  2283. else
  2284. return 0;
  2285. }
  2286. DEFINE_SCHED_CLASS(rt) = {
  2287. .enqueue_task = enqueue_task_rt,
  2288. .dequeue_task = dequeue_task_rt,
  2289. .yield_task = yield_task_rt,
  2290. .check_preempt_curr = check_preempt_curr_rt,
  2291. .pick_next_task = pick_next_task_rt,
  2292. .put_prev_task = put_prev_task_rt,
  2293. .set_next_task = set_next_task_rt,
  2294. #ifdef CONFIG_SMP
  2295. .balance = balance_rt,
  2296. .pick_task = pick_task_rt,
  2297. .select_task_rq = select_task_rq_rt,
  2298. .set_cpus_allowed = set_cpus_allowed_common,
  2299. .rq_online = rq_online_rt,
  2300. .rq_offline = rq_offline_rt,
  2301. .task_woken = task_woken_rt,
  2302. .switched_from = switched_from_rt,
  2303. .find_lock_rq = find_lock_lowest_rq,
  2304. #endif
  2305. .task_tick = task_tick_rt,
  2306. .get_rr_interval = get_rr_interval_rt,
  2307. .prio_changed = prio_changed_rt,
  2308. .switched_to = switched_to_rt,
  2309. .update_curr = update_curr_rt,
  2310. #ifdef CONFIG_UCLAMP_TASK
  2311. .uclamp_enabled = 1,
  2312. #endif
  2313. };
  2314. #ifdef CONFIG_RT_GROUP_SCHED
  2315. /*
  2316. * Ensure that the real time constraints are schedulable.
  2317. */
  2318. static DEFINE_MUTEX(rt_constraints_mutex);
  2319. static inline int tg_has_rt_tasks(struct task_group *tg)
  2320. {
  2321. struct task_struct *task;
  2322. struct css_task_iter it;
  2323. int ret = 0;
  2324. /*
  2325. * Autogroups do not have RT tasks; see autogroup_create().
  2326. */
  2327. if (task_group_is_autogroup(tg))
  2328. return 0;
  2329. css_task_iter_start(&tg->css, 0, &it);
  2330. while (!ret && (task = css_task_iter_next(&it)))
  2331. ret |= rt_task(task);
  2332. css_task_iter_end(&it);
  2333. return ret;
  2334. }
  2335. struct rt_schedulable_data {
  2336. struct task_group *tg;
  2337. u64 rt_period;
  2338. u64 rt_runtime;
  2339. };
  2340. static int tg_rt_schedulable(struct task_group *tg, void *data)
  2341. {
  2342. struct rt_schedulable_data *d = data;
  2343. struct task_group *child;
  2344. unsigned long total, sum = 0;
  2345. u64 period, runtime;
  2346. period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  2347. runtime = tg->rt_bandwidth.rt_runtime;
  2348. if (tg == d->tg) {
  2349. period = d->rt_period;
  2350. runtime = d->rt_runtime;
  2351. }
  2352. /*
  2353. * Cannot have more runtime than the period.
  2354. */
  2355. if (runtime > period && runtime != RUNTIME_INF)
  2356. return -EINVAL;
  2357. /*
  2358. * Ensure we don't starve existing RT tasks if runtime turns zero.
  2359. */
  2360. if (rt_bandwidth_enabled() && !runtime &&
  2361. tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
  2362. return -EBUSY;
  2363. total = to_ratio(period, runtime);
  2364. /*
  2365. * Nobody can have more than the global setting allows.
  2366. */
  2367. if (total > to_ratio(global_rt_period(), global_rt_runtime()))
  2368. return -EINVAL;
  2369. /*
  2370. * The sum of our children's runtime should not exceed our own.
  2371. */
  2372. list_for_each_entry_rcu(child, &tg->children, siblings) {
  2373. period = ktime_to_ns(child->rt_bandwidth.rt_period);
  2374. runtime = child->rt_bandwidth.rt_runtime;
  2375. if (child == d->tg) {
  2376. period = d->rt_period;
  2377. runtime = d->rt_runtime;
  2378. }
  2379. sum += to_ratio(period, runtime);
  2380. }
  2381. if (sum > total)
  2382. return -EINVAL;
  2383. return 0;
  2384. }
  2385. static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
  2386. {
  2387. int ret;
  2388. struct rt_schedulable_data data = {
  2389. .tg = tg,
  2390. .rt_period = period,
  2391. .rt_runtime = runtime,
  2392. };
  2393. rcu_read_lock();
  2394. ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
  2395. rcu_read_unlock();
  2396. return ret;
  2397. }
  2398. static int tg_set_rt_bandwidth(struct task_group *tg,
  2399. u64 rt_period, u64 rt_runtime)
  2400. {
  2401. int i, err = 0;
  2402. /*
  2403. * Disallowing the root group RT runtime is BAD, it would disallow the
  2404. * kernel creating (and or operating) RT threads.
  2405. */
  2406. if (tg == &root_task_group && rt_runtime == 0)
  2407. return -EINVAL;
  2408. /* No period doesn't make any sense. */
  2409. if (rt_period == 0)
  2410. return -EINVAL;
  2411. /*
  2412. * Bound quota to defend quota against overflow during bandwidth shift.
  2413. */
  2414. if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
  2415. return -EINVAL;
  2416. mutex_lock(&rt_constraints_mutex);
  2417. err = __rt_schedulable(tg, rt_period, rt_runtime);
  2418. if (err)
  2419. goto unlock;
  2420. raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
  2421. tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
  2422. tg->rt_bandwidth.rt_runtime = rt_runtime;
  2423. for_each_possible_cpu(i) {
  2424. struct rt_rq *rt_rq = tg->rt_rq[i];
  2425. raw_spin_lock(&rt_rq->rt_runtime_lock);
  2426. rt_rq->rt_runtime = rt_runtime;
  2427. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  2428. }
  2429. raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
  2430. unlock:
  2431. mutex_unlock(&rt_constraints_mutex);
  2432. return err;
  2433. }
  2434. int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
  2435. {
  2436. u64 rt_runtime, rt_period;
  2437. rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  2438. rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
  2439. if (rt_runtime_us < 0)
  2440. rt_runtime = RUNTIME_INF;
  2441. else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
  2442. return -EINVAL;
  2443. return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
  2444. }
  2445. long sched_group_rt_runtime(struct task_group *tg)
  2446. {
  2447. u64 rt_runtime_us;
  2448. if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
  2449. return -1;
  2450. rt_runtime_us = tg->rt_bandwidth.rt_runtime;
  2451. do_div(rt_runtime_us, NSEC_PER_USEC);
  2452. return rt_runtime_us;
  2453. }
  2454. int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
  2455. {
  2456. u64 rt_runtime, rt_period;
  2457. if (rt_period_us > U64_MAX / NSEC_PER_USEC)
  2458. return -EINVAL;
  2459. rt_period = rt_period_us * NSEC_PER_USEC;
  2460. rt_runtime = tg->rt_bandwidth.rt_runtime;
  2461. return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
  2462. }
  2463. long sched_group_rt_period(struct task_group *tg)
  2464. {
  2465. u64 rt_period_us;
  2466. rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
  2467. do_div(rt_period_us, NSEC_PER_USEC);
  2468. return rt_period_us;
  2469. }
  2470. #ifdef CONFIG_SYSCTL
  2471. static int sched_rt_global_constraints(void)
  2472. {
  2473. int ret = 0;
  2474. mutex_lock(&rt_constraints_mutex);
  2475. ret = __rt_schedulable(NULL, 0, 0);
  2476. mutex_unlock(&rt_constraints_mutex);
  2477. return ret;
  2478. }
  2479. #endif /* CONFIG_SYSCTL */
  2480. int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
  2481. {
  2482. /* Don't accept realtime tasks when there is no way for them to run */
  2483. if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
  2484. return 0;
  2485. return 1;
  2486. }
  2487. #else /* !CONFIG_RT_GROUP_SCHED */
  2488. #ifdef CONFIG_SYSCTL
  2489. static int sched_rt_global_constraints(void)
  2490. {
  2491. unsigned long flags;
  2492. int i;
  2493. raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
  2494. for_each_possible_cpu(i) {
  2495. struct rt_rq *rt_rq = &cpu_rq(i)->rt;
  2496. raw_spin_lock(&rt_rq->rt_runtime_lock);
  2497. rt_rq->rt_runtime = global_rt_runtime();
  2498. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  2499. }
  2500. raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
  2501. return 0;
  2502. }
  2503. #endif /* CONFIG_SYSCTL */
  2504. #endif /* CONFIG_RT_GROUP_SCHED */
  2505. #ifdef CONFIG_SYSCTL
  2506. static int sched_rt_global_validate(void)
  2507. {
  2508. if (sysctl_sched_rt_period <= 0)
  2509. return -EINVAL;
  2510. if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
  2511. ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
  2512. ((u64)sysctl_sched_rt_runtime *
  2513. NSEC_PER_USEC > max_rt_runtime)))
  2514. return -EINVAL;
  2515. return 0;
  2516. }
  2517. static void sched_rt_do_global(void)
  2518. {
  2519. unsigned long flags;
  2520. raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
  2521. def_rt_bandwidth.rt_runtime = global_rt_runtime();
  2522. def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
  2523. raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
  2524. }
  2525. static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
  2526. size_t *lenp, loff_t *ppos)
  2527. {
  2528. int old_period, old_runtime;
  2529. static DEFINE_MUTEX(mutex);
  2530. int ret;
  2531. mutex_lock(&mutex);
  2532. old_period = sysctl_sched_rt_period;
  2533. old_runtime = sysctl_sched_rt_runtime;
  2534. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  2535. if (!ret && write) {
  2536. ret = sched_rt_global_validate();
  2537. if (ret)
  2538. goto undo;
  2539. ret = sched_dl_global_validate();
  2540. if (ret)
  2541. goto undo;
  2542. ret = sched_rt_global_constraints();
  2543. if (ret)
  2544. goto undo;
  2545. sched_rt_do_global();
  2546. sched_dl_do_global();
  2547. }
  2548. if (0) {
  2549. undo:
  2550. sysctl_sched_rt_period = old_period;
  2551. sysctl_sched_rt_runtime = old_runtime;
  2552. }
  2553. mutex_unlock(&mutex);
  2554. return ret;
  2555. }
  2556. static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
  2557. size_t *lenp, loff_t *ppos)
  2558. {
  2559. int ret;
  2560. static DEFINE_MUTEX(mutex);
  2561. mutex_lock(&mutex);
  2562. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  2563. /*
  2564. * Make sure that internally we keep jiffies.
  2565. * Also, writing zero resets the timeslice to default:
  2566. */
  2567. if (!ret && write) {
  2568. sched_rr_timeslice =
  2569. sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
  2570. msecs_to_jiffies(sysctl_sched_rr_timeslice);
  2571. }
  2572. mutex_unlock(&mutex);
  2573. return ret;
  2574. }
  2575. #endif /* CONFIG_SYSCTL */
  2576. #ifdef CONFIG_SCHED_DEBUG
  2577. void print_rt_stats(struct seq_file *m, int cpu)
  2578. {
  2579. rt_rq_iter_t iter;
  2580. struct rt_rq *rt_rq;
  2581. rcu_read_lock();
  2582. for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
  2583. print_rt_rq(m, cpu, rt_rq);
  2584. rcu_read_unlock();
  2585. }
  2586. #endif /* CONFIG_SCHED_DEBUG */