tree_nocb.h 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770
  1. /* SPDX-License-Identifier: GPL-2.0+ */
  2. /*
  3. * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  4. * Internal non-public definitions that provide either classic
  5. * or preemptible semantics.
  6. *
  7. * Copyright Red Hat, 2009
  8. * Copyright IBM Corporation, 2009
  9. * Copyright SUSE, 2021
  10. *
  11. * Author: Ingo Molnar <[email protected]>
  12. * Paul E. McKenney <[email protected]>
  13. * Frederic Weisbecker <[email protected]>
  14. */
  15. #ifdef CONFIG_RCU_NOCB_CPU
  16. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  17. static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
  18. static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
  19. {
  20. return lockdep_is_held(&rdp->nocb_lock);
  21. }
  22. static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
  23. {
  24. /* Race on early boot between thread creation and assignment */
  25. if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread)
  26. return true;
  27. if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread)
  28. if (in_task())
  29. return true;
  30. return false;
  31. }
  32. /*
  33. * Offload callback processing from the boot-time-specified set of CPUs
  34. * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
  35. * created that pull the callbacks from the corresponding CPU, wait for
  36. * a grace period to elapse, and invoke the callbacks. These kthreads
  37. * are organized into GP kthreads, which manage incoming callbacks, wait for
  38. * grace periods, and awaken CB kthreads, and the CB kthreads, which only
  39. * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
  40. * do a wake_up() on their GP kthread when they insert a callback into any
  41. * empty list, unless the rcu_nocb_poll boot parameter has been specified,
  42. * in which case each kthread actively polls its CPU. (Which isn't so great
  43. * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
  44. *
  45. * This is intended to be used in conjunction with Frederic Weisbecker's
  46. * adaptive-idle work, which would seriously reduce OS jitter on CPUs
  47. * running CPU-bound user-mode computations.
  48. *
  49. * Offloading of callbacks can also be used as an energy-efficiency
  50. * measure because CPUs with no RCU callbacks queued are more aggressive
  51. * about entering dyntick-idle mode.
  52. */
  53. /*
  54. * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
  55. * If the list is invalid, a warning is emitted and all CPUs are offloaded.
  56. */
  57. static int __init rcu_nocb_setup(char *str)
  58. {
  59. alloc_bootmem_cpumask_var(&rcu_nocb_mask);
  60. if (*str == '=') {
  61. if (cpulist_parse(++str, rcu_nocb_mask)) {
  62. pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
  63. cpumask_setall(rcu_nocb_mask);
  64. }
  65. }
  66. rcu_state.nocb_is_setup = true;
  67. return 1;
  68. }
  69. __setup("rcu_nocbs", rcu_nocb_setup);
  70. static int __init parse_rcu_nocb_poll(char *arg)
  71. {
  72. rcu_nocb_poll = true;
  73. return 0;
  74. }
  75. early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
  76. /*
  77. * Don't bother bypassing ->cblist if the call_rcu() rate is low.
  78. * After all, the main point of bypassing is to avoid lock contention
  79. * on ->nocb_lock, which only can happen at high call_rcu() rates.
  80. */
  81. static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
  82. module_param(nocb_nobypass_lim_per_jiffy, int, 0);
  83. /*
  84. * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
  85. * lock isn't immediately available, increment ->nocb_lock_contended to
  86. * flag the contention.
  87. */
  88. static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
  89. __acquires(&rdp->nocb_bypass_lock)
  90. {
  91. lockdep_assert_irqs_disabled();
  92. if (raw_spin_trylock(&rdp->nocb_bypass_lock))
  93. return;
  94. atomic_inc(&rdp->nocb_lock_contended);
  95. WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
  96. smp_mb__after_atomic(); /* atomic_inc() before lock. */
  97. raw_spin_lock(&rdp->nocb_bypass_lock);
  98. smp_mb__before_atomic(); /* atomic_dec() after lock. */
  99. atomic_dec(&rdp->nocb_lock_contended);
  100. }
  101. /*
  102. * Spinwait until the specified rcu_data structure's ->nocb_lock is
  103. * not contended. Please note that this is extremely special-purpose,
  104. * relying on the fact that at most two kthreads and one CPU contend for
  105. * this lock, and also that the two kthreads are guaranteed to have frequent
  106. * grace-period-duration time intervals between successive acquisitions
  107. * of the lock. This allows us to use an extremely simple throttling
  108. * mechanism, and further to apply it only to the CPU doing floods of
  109. * call_rcu() invocations. Don't try this at home!
  110. */
  111. static void rcu_nocb_wait_contended(struct rcu_data *rdp)
  112. {
  113. WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
  114. while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
  115. cpu_relax();
  116. }
  117. /*
  118. * Conditionally acquire the specified rcu_data structure's
  119. * ->nocb_bypass_lock.
  120. */
  121. static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
  122. {
  123. lockdep_assert_irqs_disabled();
  124. return raw_spin_trylock(&rdp->nocb_bypass_lock);
  125. }
  126. /*
  127. * Release the specified rcu_data structure's ->nocb_bypass_lock.
  128. */
  129. static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
  130. __releases(&rdp->nocb_bypass_lock)
  131. {
  132. lockdep_assert_irqs_disabled();
  133. raw_spin_unlock(&rdp->nocb_bypass_lock);
  134. }
  135. /*
  136. * Acquire the specified rcu_data structure's ->nocb_lock, but only
  137. * if it corresponds to a no-CBs CPU.
  138. */
  139. static void rcu_nocb_lock(struct rcu_data *rdp)
  140. {
  141. lockdep_assert_irqs_disabled();
  142. if (!rcu_rdp_is_offloaded(rdp))
  143. return;
  144. raw_spin_lock(&rdp->nocb_lock);
  145. }
  146. /*
  147. * Release the specified rcu_data structure's ->nocb_lock, but only
  148. * if it corresponds to a no-CBs CPU.
  149. */
  150. static void rcu_nocb_unlock(struct rcu_data *rdp)
  151. {
  152. if (rcu_rdp_is_offloaded(rdp)) {
  153. lockdep_assert_irqs_disabled();
  154. raw_spin_unlock(&rdp->nocb_lock);
  155. }
  156. }
  157. /*
  158. * Release the specified rcu_data structure's ->nocb_lock and restore
  159. * interrupts, but only if it corresponds to a no-CBs CPU.
  160. */
  161. static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
  162. unsigned long flags)
  163. {
  164. if (rcu_rdp_is_offloaded(rdp)) {
  165. lockdep_assert_irqs_disabled();
  166. raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
  167. } else {
  168. local_irq_restore(flags);
  169. }
  170. }
  171. /* Lockdep check that ->cblist may be safely accessed. */
  172. static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
  173. {
  174. lockdep_assert_irqs_disabled();
  175. if (rcu_rdp_is_offloaded(rdp))
  176. lockdep_assert_held(&rdp->nocb_lock);
  177. }
  178. /*
  179. * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  180. * grace period.
  181. */
  182. static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  183. {
  184. swake_up_all(sq);
  185. }
  186. static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
  187. {
  188. return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
  189. }
  190. static void rcu_init_one_nocb(struct rcu_node *rnp)
  191. {
  192. init_swait_queue_head(&rnp->nocb_gp_wq[0]);
  193. init_swait_queue_head(&rnp->nocb_gp_wq[1]);
  194. }
  195. static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
  196. struct rcu_data *rdp,
  197. bool force, unsigned long flags)
  198. __releases(rdp_gp->nocb_gp_lock)
  199. {
  200. bool needwake = false;
  201. if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
  202. raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
  203. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  204. TPS("AlreadyAwake"));
  205. return false;
  206. }
  207. if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
  208. WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
  209. del_timer(&rdp_gp->nocb_timer);
  210. }
  211. if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
  212. WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
  213. needwake = true;
  214. }
  215. raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
  216. if (needwake) {
  217. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
  218. wake_up_process(rdp_gp->nocb_gp_kthread);
  219. }
  220. return needwake;
  221. }
  222. /*
  223. * Kick the GP kthread for this NOCB group.
  224. */
  225. static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
  226. {
  227. unsigned long flags;
  228. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  229. raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
  230. return __wake_nocb_gp(rdp_gp, rdp, force, flags);
  231. }
  232. /*
  233. * LAZY_FLUSH_JIFFIES decides the maximum amount of time that
  234. * can elapse before lazy callbacks are flushed. Lazy callbacks
  235. * could be flushed much earlier for a number of other reasons
  236. * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
  237. * left unsubmitted to RCU after those many jiffies.
  238. */
  239. #define LAZY_FLUSH_JIFFIES (10 * HZ)
  240. static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
  241. #ifdef CONFIG_RCU_LAZY
  242. // To be called only from test code.
  243. void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
  244. {
  245. jiffies_till_flush = jif;
  246. }
  247. EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
  248. unsigned long rcu_lazy_get_jiffies_till_flush(void)
  249. {
  250. return jiffies_till_flush;
  251. }
  252. EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
  253. #endif
  254. /*
  255. * Arrange to wake the GP kthread for this NOCB group at some future
  256. * time when it is safe to do so.
  257. */
  258. static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
  259. const char *reason)
  260. {
  261. unsigned long flags;
  262. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  263. raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
  264. /*
  265. * Bypass wakeup overrides previous deferments. In case of
  266. * callback storms, no need to wake up too early.
  267. */
  268. if (waketype == RCU_NOCB_WAKE_LAZY &&
  269. rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
  270. mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
  271. WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
  272. } else if (waketype == RCU_NOCB_WAKE_BYPASS) {
  273. mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
  274. WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
  275. } else {
  276. if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
  277. mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
  278. if (rdp_gp->nocb_defer_wakeup < waketype)
  279. WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
  280. }
  281. raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
  282. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
  283. }
  284. /*
  285. * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
  286. * However, if there is a callback to be enqueued and if ->nocb_bypass
  287. * proves to be initially empty, just return false because the no-CB GP
  288. * kthread may need to be awakened in this case.
  289. *
  290. * Return true if there was something to be flushed and it succeeded, otherwise
  291. * false.
  292. *
  293. * Note that this function always returns true if rhp is NULL.
  294. */
  295. static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in,
  296. unsigned long j, bool lazy)
  297. {
  298. struct rcu_cblist rcl;
  299. struct rcu_head *rhp = rhp_in;
  300. WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
  301. rcu_lockdep_assert_cblist_protected(rdp);
  302. lockdep_assert_held(&rdp->nocb_bypass_lock);
  303. if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
  304. raw_spin_unlock(&rdp->nocb_bypass_lock);
  305. return false;
  306. }
  307. /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
  308. if (rhp)
  309. rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
  310. /*
  311. * If the new CB requested was a lazy one, queue it onto the main
  312. * ->cblist so that we can take advantage of the grace-period that will
  313. * happen regardless. But queue it onto the bypass list first so that
  314. * the lazy CB is ordered with the existing CBs in the bypass list.
  315. */
  316. if (lazy && rhp) {
  317. rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
  318. rhp = NULL;
  319. }
  320. rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
  321. WRITE_ONCE(rdp->lazy_len, 0);
  322. rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
  323. WRITE_ONCE(rdp->nocb_bypass_first, j);
  324. rcu_nocb_bypass_unlock(rdp);
  325. return true;
  326. }
  327. /*
  328. * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
  329. * However, if there is a callback to be enqueued and if ->nocb_bypass
  330. * proves to be initially empty, just return false because the no-CB GP
  331. * kthread may need to be awakened in this case.
  332. *
  333. * Note that this function always returns true if rhp is NULL.
  334. */
  335. static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
  336. unsigned long j, bool lazy)
  337. {
  338. if (!rcu_rdp_is_offloaded(rdp))
  339. return true;
  340. rcu_lockdep_assert_cblist_protected(rdp);
  341. rcu_nocb_bypass_lock(rdp);
  342. return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
  343. }
  344. /*
  345. * If the ->nocb_bypass_lock is immediately available, flush the
  346. * ->nocb_bypass queue into ->cblist.
  347. */
  348. static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
  349. {
  350. rcu_lockdep_assert_cblist_protected(rdp);
  351. if (!rcu_rdp_is_offloaded(rdp) ||
  352. !rcu_nocb_bypass_trylock(rdp))
  353. return;
  354. WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
  355. }
  356. /*
  357. * See whether it is appropriate to use the ->nocb_bypass list in order
  358. * to control contention on ->nocb_lock. A limited number of direct
  359. * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
  360. * is non-empty, further callbacks must be placed into ->nocb_bypass,
  361. * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
  362. * back to direct use of ->cblist. However, ->nocb_bypass should not be
  363. * used if ->cblist is empty, because otherwise callbacks can be stranded
  364. * on ->nocb_bypass because we cannot count on the current CPU ever again
  365. * invoking call_rcu(). The general rule is that if ->nocb_bypass is
  366. * non-empty, the corresponding no-CBs grace-period kthread must not be
  367. * in an indefinite sleep state.
  368. *
  369. * Finally, it is not permitted to use the bypass during early boot,
  370. * as doing so would confuse the auto-initialization code. Besides
  371. * which, there is no point in worrying about lock contention while
  372. * there is only one CPU in operation.
  373. */
  374. static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
  375. bool *was_alldone, unsigned long flags,
  376. bool lazy)
  377. {
  378. unsigned long c;
  379. unsigned long cur_gp_seq;
  380. unsigned long j = jiffies;
  381. long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
  382. bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
  383. lockdep_assert_irqs_disabled();
  384. // Pure softirq/rcuc based processing: no bypassing, no
  385. // locking.
  386. if (!rcu_rdp_is_offloaded(rdp)) {
  387. *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
  388. return false;
  389. }
  390. // In the process of (de-)offloading: no bypassing, but
  391. // locking.
  392. if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
  393. rcu_nocb_lock(rdp);
  394. *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
  395. return false; /* Not offloaded, no bypassing. */
  396. }
  397. // Don't use ->nocb_bypass during early boot.
  398. if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
  399. rcu_nocb_lock(rdp);
  400. WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
  401. *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
  402. return false;
  403. }
  404. // If we have advanced to a new jiffy, reset counts to allow
  405. // moving back from ->nocb_bypass to ->cblist.
  406. if (j == rdp->nocb_nobypass_last) {
  407. c = rdp->nocb_nobypass_count + 1;
  408. } else {
  409. WRITE_ONCE(rdp->nocb_nobypass_last, j);
  410. c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
  411. if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
  412. nocb_nobypass_lim_per_jiffy))
  413. c = 0;
  414. else if (c > nocb_nobypass_lim_per_jiffy)
  415. c = nocb_nobypass_lim_per_jiffy;
  416. }
  417. WRITE_ONCE(rdp->nocb_nobypass_count, c);
  418. // If there hasn't yet been all that many ->cblist enqueues
  419. // this jiffy, tell the caller to enqueue onto ->cblist. But flush
  420. // ->nocb_bypass first.
  421. // Lazy CBs throttle this back and do immediate bypass queuing.
  422. if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
  423. rcu_nocb_lock(rdp);
  424. *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
  425. if (*was_alldone)
  426. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  427. TPS("FirstQ"));
  428. WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
  429. WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
  430. return false; // Caller must enqueue the callback.
  431. }
  432. // If ->nocb_bypass has been used too long or is too full,
  433. // flush ->nocb_bypass to ->cblist.
  434. if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
  435. (ncbs && bypass_is_lazy &&
  436. (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
  437. ncbs >= qhimark) {
  438. rcu_nocb_lock(rdp);
  439. *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
  440. if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
  441. if (*was_alldone)
  442. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  443. TPS("FirstQ"));
  444. WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
  445. return false; // Caller must enqueue the callback.
  446. }
  447. if (j != rdp->nocb_gp_adv_time &&
  448. rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
  449. rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
  450. rcu_advance_cbs_nowake(rdp->mynode, rdp);
  451. rdp->nocb_gp_adv_time = j;
  452. }
  453. // The flush succeeded and we moved CBs into the regular list.
  454. // Don't wait for the wake up timer as it may be too far ahead.
  455. // Wake up the GP thread now instead, if the cblist was empty.
  456. __call_rcu_nocb_wake(rdp, *was_alldone, flags);
  457. return true; // Callback already enqueued.
  458. }
  459. // We need to use the bypass.
  460. rcu_nocb_wait_contended(rdp);
  461. rcu_nocb_bypass_lock(rdp);
  462. ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
  463. rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
  464. rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
  465. if (lazy)
  466. WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
  467. if (!ncbs) {
  468. WRITE_ONCE(rdp->nocb_bypass_first, j);
  469. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
  470. }
  471. rcu_nocb_bypass_unlock(rdp);
  472. smp_mb(); /* Order enqueue before wake. */
  473. // A wake up of the grace period kthread or timer adjustment
  474. // needs to be done only if:
  475. // 1. Bypass list was fully empty before (this is the first
  476. // bypass list entry), or:
  477. // 2. Both of these conditions are met:
  478. // a. The bypass list previously had only lazy CBs, and:
  479. // b. The new CB is non-lazy.
  480. if (ncbs && (!bypass_is_lazy || lazy)) {
  481. local_irq_restore(flags);
  482. } else {
  483. // No-CBs GP kthread might be indefinitely asleep, if so, wake.
  484. rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
  485. if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
  486. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  487. TPS("FirstBQwake"));
  488. __call_rcu_nocb_wake(rdp, true, flags);
  489. } else {
  490. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  491. TPS("FirstBQnoWake"));
  492. rcu_nocb_unlock_irqrestore(rdp, flags);
  493. }
  494. }
  495. return true; // Callback already enqueued.
  496. }
  497. /*
  498. * Awaken the no-CBs grace-period kthread if needed, either due to it
  499. * legitimately being asleep or due to overload conditions.
  500. *
  501. * If warranted, also wake up the kthread servicing this CPUs queues.
  502. */
  503. static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
  504. unsigned long flags)
  505. __releases(rdp->nocb_lock)
  506. {
  507. long bypass_len;
  508. unsigned long cur_gp_seq;
  509. unsigned long j;
  510. long lazy_len;
  511. long len;
  512. struct task_struct *t;
  513. // If we are being polled or there is no kthread, just leave.
  514. t = READ_ONCE(rdp->nocb_gp_kthread);
  515. if (rcu_nocb_poll || !t) {
  516. rcu_nocb_unlock_irqrestore(rdp, flags);
  517. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  518. TPS("WakeNotPoll"));
  519. return;
  520. }
  521. // Need to actually to a wakeup.
  522. len = rcu_segcblist_n_cbs(&rdp->cblist);
  523. bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
  524. lazy_len = READ_ONCE(rdp->lazy_len);
  525. if (was_alldone) {
  526. rdp->qlen_last_fqs_check = len;
  527. // Only lazy CBs in bypass list
  528. if (lazy_len && bypass_len == lazy_len) {
  529. rcu_nocb_unlock_irqrestore(rdp, flags);
  530. wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
  531. TPS("WakeLazy"));
  532. } else if (!irqs_disabled_flags(flags)) {
  533. /* ... if queue was empty ... */
  534. rcu_nocb_unlock_irqrestore(rdp, flags);
  535. wake_nocb_gp(rdp, false);
  536. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  537. TPS("WakeEmpty"));
  538. } else {
  539. rcu_nocb_unlock_irqrestore(rdp, flags);
  540. wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
  541. TPS("WakeEmptyIsDeferred"));
  542. }
  543. } else if (len > rdp->qlen_last_fqs_check + qhimark) {
  544. /* ... or if many callbacks queued. */
  545. rdp->qlen_last_fqs_check = len;
  546. j = jiffies;
  547. if (j != rdp->nocb_gp_adv_time &&
  548. rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
  549. rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
  550. rcu_advance_cbs_nowake(rdp->mynode, rdp);
  551. rdp->nocb_gp_adv_time = j;
  552. }
  553. smp_mb(); /* Enqueue before timer_pending(). */
  554. if ((rdp->nocb_cb_sleep ||
  555. !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
  556. !timer_pending(&rdp->nocb_timer)) {
  557. rcu_nocb_unlock_irqrestore(rdp, flags);
  558. wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
  559. TPS("WakeOvfIsDeferred"));
  560. } else {
  561. rcu_nocb_unlock_irqrestore(rdp, flags);
  562. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
  563. }
  564. } else {
  565. rcu_nocb_unlock_irqrestore(rdp, flags);
  566. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
  567. }
  568. }
  569. static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
  570. bool *wake_state)
  571. {
  572. struct rcu_segcblist *cblist = &rdp->cblist;
  573. unsigned long flags;
  574. int ret;
  575. rcu_nocb_lock_irqsave(rdp, flags);
  576. if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) &&
  577. !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
  578. /*
  579. * Offloading. Set our flag and notify the offload worker.
  580. * We will handle this rdp until it ever gets de-offloaded.
  581. */
  582. rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
  583. if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
  584. *wake_state = true;
  585. ret = 1;
  586. } else if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) &&
  587. rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
  588. /*
  589. * De-offloading. Clear our flag and notify the de-offload worker.
  590. * We will ignore this rdp until it ever gets re-offloaded.
  591. */
  592. rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
  593. if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
  594. *wake_state = true;
  595. ret = 0;
  596. } else {
  597. WARN_ON_ONCE(1);
  598. ret = -1;
  599. }
  600. rcu_nocb_unlock_irqrestore(rdp, flags);
  601. return ret;
  602. }
  603. static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu)
  604. {
  605. trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
  606. swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
  607. !READ_ONCE(my_rdp->nocb_gp_sleep));
  608. trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
  609. }
  610. /*
  611. * No-CBs GP kthreads come here to wait for additional callbacks to show up
  612. * or for grace periods to end.
  613. */
  614. static void nocb_gp_wait(struct rcu_data *my_rdp)
  615. {
  616. bool bypass = false;
  617. int __maybe_unused cpu = my_rdp->cpu;
  618. unsigned long cur_gp_seq;
  619. unsigned long flags;
  620. bool gotcbs = false;
  621. unsigned long j = jiffies;
  622. bool lazy = false;
  623. bool needwait_gp = false; // This prevents actual uninitialized use.
  624. bool needwake;
  625. bool needwake_gp;
  626. struct rcu_data *rdp, *rdp_toggling = NULL;
  627. struct rcu_node *rnp;
  628. unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
  629. bool wasempty = false;
  630. /*
  631. * Each pass through the following loop checks for CBs and for the
  632. * nearest grace period (if any) to wait for next. The CB kthreads
  633. * and the global grace-period kthread are awakened if needed.
  634. */
  635. WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
  636. /*
  637. * An rcu_data structure is removed from the list after its
  638. * CPU is de-offloaded and added to the list before that CPU is
  639. * (re-)offloaded. If the following loop happens to be referencing
  640. * that rcu_data structure during the time that the corresponding
  641. * CPU is de-offloaded and then immediately re-offloaded, this
  642. * loop's rdp pointer will be carried to the end of the list by
  643. * the resulting pair of list operations. This can cause the loop
  644. * to skip over some of the rcu_data structures that were supposed
  645. * to have been scanned. Fortunately a new iteration through the
  646. * entire loop is forced after a given CPU's rcu_data structure
  647. * is added to the list, so the skipped-over rcu_data structures
  648. * won't be ignored for long.
  649. */
  650. list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) {
  651. long bypass_ncbs;
  652. bool flush_bypass = false;
  653. long lazy_ncbs;
  654. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
  655. rcu_nocb_lock_irqsave(rdp, flags);
  656. lockdep_assert_held(&rdp->nocb_lock);
  657. bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
  658. lazy_ncbs = READ_ONCE(rdp->lazy_len);
  659. if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
  660. (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
  661. bypass_ncbs > 2 * qhimark)) {
  662. flush_bypass = true;
  663. } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
  664. (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
  665. bypass_ncbs > 2 * qhimark)) {
  666. flush_bypass = true;
  667. } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
  668. rcu_nocb_unlock_irqrestore(rdp, flags);
  669. continue; /* No callbacks here, try next. */
  670. }
  671. if (flush_bypass) {
  672. // Bypass full or old, so flush it.
  673. (void)rcu_nocb_try_flush_bypass(rdp, j);
  674. bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
  675. lazy_ncbs = READ_ONCE(rdp->lazy_len);
  676. }
  677. if (bypass_ncbs) {
  678. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  679. bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
  680. if (bypass_ncbs == lazy_ncbs)
  681. lazy = true;
  682. else
  683. bypass = true;
  684. }
  685. rnp = rdp->mynode;
  686. // Advance callbacks if helpful and low contention.
  687. needwake_gp = false;
  688. if (!rcu_segcblist_restempty(&rdp->cblist,
  689. RCU_NEXT_READY_TAIL) ||
  690. (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
  691. rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
  692. raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
  693. needwake_gp = rcu_advance_cbs(rnp, rdp);
  694. wasempty = rcu_segcblist_restempty(&rdp->cblist,
  695. RCU_NEXT_READY_TAIL);
  696. raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
  697. }
  698. // Need to wait on some grace period?
  699. WARN_ON_ONCE(wasempty &&
  700. !rcu_segcblist_restempty(&rdp->cblist,
  701. RCU_NEXT_READY_TAIL));
  702. if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
  703. if (!needwait_gp ||
  704. ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
  705. wait_gp_seq = cur_gp_seq;
  706. needwait_gp = true;
  707. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
  708. TPS("NeedWaitGP"));
  709. }
  710. if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
  711. needwake = rdp->nocb_cb_sleep;
  712. WRITE_ONCE(rdp->nocb_cb_sleep, false);
  713. smp_mb(); /* CB invocation -after- GP end. */
  714. } else {
  715. needwake = false;
  716. }
  717. rcu_nocb_unlock_irqrestore(rdp, flags);
  718. if (needwake) {
  719. swake_up_one(&rdp->nocb_cb_wq);
  720. gotcbs = true;
  721. }
  722. if (needwake_gp)
  723. rcu_gp_kthread_wake();
  724. }
  725. my_rdp->nocb_gp_bypass = bypass;
  726. my_rdp->nocb_gp_gp = needwait_gp;
  727. my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
  728. // At least one child with non-empty ->nocb_bypass, so set
  729. // timer in order to avoid stranding its callbacks.
  730. if (!rcu_nocb_poll) {
  731. // If bypass list only has lazy CBs. Add a deferred lazy wake up.
  732. if (lazy && !bypass) {
  733. wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
  734. TPS("WakeLazyIsDeferred"));
  735. // Otherwise add a deferred bypass wake up.
  736. } else if (bypass) {
  737. wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
  738. TPS("WakeBypassIsDeferred"));
  739. }
  740. }
  741. if (rcu_nocb_poll) {
  742. /* Polling, so trace if first poll in the series. */
  743. if (gotcbs)
  744. trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
  745. if (list_empty(&my_rdp->nocb_head_rdp)) {
  746. raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
  747. if (!my_rdp->nocb_toggling_rdp)
  748. WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
  749. raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
  750. /* Wait for any offloading rdp */
  751. nocb_gp_sleep(my_rdp, cpu);
  752. } else {
  753. schedule_timeout_idle(1);
  754. }
  755. } else if (!needwait_gp) {
  756. /* Wait for callbacks to appear. */
  757. nocb_gp_sleep(my_rdp, cpu);
  758. } else {
  759. rnp = my_rdp->mynode;
  760. trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
  761. swait_event_interruptible_exclusive(
  762. rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
  763. rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
  764. !READ_ONCE(my_rdp->nocb_gp_sleep));
  765. trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
  766. }
  767. if (!rcu_nocb_poll) {
  768. raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
  769. // (De-)queue an rdp to/from the group if its nocb state is changing
  770. rdp_toggling = my_rdp->nocb_toggling_rdp;
  771. if (rdp_toggling)
  772. my_rdp->nocb_toggling_rdp = NULL;
  773. if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
  774. WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
  775. del_timer(&my_rdp->nocb_timer);
  776. }
  777. WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
  778. raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
  779. } else {
  780. rdp_toggling = READ_ONCE(my_rdp->nocb_toggling_rdp);
  781. if (rdp_toggling) {
  782. /*
  783. * Paranoid locking to make sure nocb_toggling_rdp is well
  784. * reset *before* we (re)set SEGCBLIST_KTHREAD_GP or we could
  785. * race with another round of nocb toggling for this rdp.
  786. * Nocb locking should prevent from that already but we stick
  787. * to paranoia, especially in rare path.
  788. */
  789. raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
  790. my_rdp->nocb_toggling_rdp = NULL;
  791. raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
  792. }
  793. }
  794. if (rdp_toggling) {
  795. bool wake_state = false;
  796. int ret;
  797. ret = nocb_gp_toggle_rdp(rdp_toggling, &wake_state);
  798. if (ret == 1)
  799. list_add_tail(&rdp_toggling->nocb_entry_rdp, &my_rdp->nocb_head_rdp);
  800. else if (ret == 0)
  801. list_del(&rdp_toggling->nocb_entry_rdp);
  802. if (wake_state)
  803. swake_up_one(&rdp_toggling->nocb_state_wq);
  804. }
  805. my_rdp->nocb_gp_seq = -1;
  806. WARN_ON(signal_pending(current));
  807. }
  808. /*
  809. * No-CBs grace-period-wait kthread. There is one of these per group
  810. * of CPUs, but only once at least one CPU in that group has come online
  811. * at least once since boot. This kthread checks for newly posted
  812. * callbacks from any of the CPUs it is responsible for, waits for a
  813. * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
  814. * that then have callback-invocation work to do.
  815. */
  816. static int rcu_nocb_gp_kthread(void *arg)
  817. {
  818. struct rcu_data *rdp = arg;
  819. for (;;) {
  820. WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
  821. nocb_gp_wait(rdp);
  822. cond_resched_tasks_rcu_qs();
  823. }
  824. return 0;
  825. }
  826. static inline bool nocb_cb_can_run(struct rcu_data *rdp)
  827. {
  828. u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
  829. return rcu_segcblist_test_flags(&rdp->cblist, flags);
  830. }
  831. static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
  832. {
  833. return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
  834. }
  835. /*
  836. * Invoke any ready callbacks from the corresponding no-CBs CPU,
  837. * then, if there are no more, wait for more to appear.
  838. */
  839. static void nocb_cb_wait(struct rcu_data *rdp)
  840. {
  841. struct rcu_segcblist *cblist = &rdp->cblist;
  842. unsigned long cur_gp_seq;
  843. unsigned long flags;
  844. bool needwake_state = false;
  845. bool needwake_gp = false;
  846. bool can_sleep = true;
  847. struct rcu_node *rnp = rdp->mynode;
  848. do {
  849. swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
  850. nocb_cb_wait_cond(rdp));
  851. // VVV Ensure CB invocation follows _sleep test.
  852. if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
  853. WARN_ON(signal_pending(current));
  854. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
  855. }
  856. } while (!nocb_cb_can_run(rdp));
  857. local_irq_save(flags);
  858. rcu_momentary_dyntick_idle();
  859. local_irq_restore(flags);
  860. /*
  861. * Disable BH to provide the expected environment. Also, when
  862. * transitioning to/from NOCB mode, a self-requeuing callback might
  863. * be invoked from softirq. A short grace period could cause both
  864. * instances of this callback would execute concurrently.
  865. */
  866. local_bh_disable();
  867. rcu_do_batch(rdp);
  868. local_bh_enable();
  869. lockdep_assert_irqs_enabled();
  870. rcu_nocb_lock_irqsave(rdp, flags);
  871. if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) &&
  872. rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
  873. raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
  874. needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
  875. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
  876. }
  877. if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
  878. if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
  879. rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
  880. if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
  881. needwake_state = true;
  882. }
  883. if (rcu_segcblist_ready_cbs(cblist))
  884. can_sleep = false;
  885. } else {
  886. /*
  887. * De-offloading. Clear our flag and notify the de-offload worker.
  888. * We won't touch the callbacks and keep sleeping until we ever
  889. * get re-offloaded.
  890. */
  891. WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
  892. rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
  893. if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
  894. needwake_state = true;
  895. }
  896. WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
  897. if (rdp->nocb_cb_sleep)
  898. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
  899. rcu_nocb_unlock_irqrestore(rdp, flags);
  900. if (needwake_gp)
  901. rcu_gp_kthread_wake();
  902. if (needwake_state)
  903. swake_up_one(&rdp->nocb_state_wq);
  904. }
  905. /*
  906. * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
  907. * nocb_cb_wait() to do the dirty work.
  908. */
  909. static int rcu_nocb_cb_kthread(void *arg)
  910. {
  911. struct rcu_data *rdp = arg;
  912. // Each pass through this loop does one callback batch, and,
  913. // if there are no more ready callbacks, waits for them.
  914. for (;;) {
  915. nocb_cb_wait(rdp);
  916. cond_resched_tasks_rcu_qs();
  917. }
  918. return 0;
  919. }
  920. /* Is a deferred wakeup of rcu_nocb_kthread() required? */
  921. static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
  922. {
  923. return READ_ONCE(rdp->nocb_defer_wakeup) >= level;
  924. }
  925. /* Do a deferred wakeup of rcu_nocb_kthread(). */
  926. static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
  927. struct rcu_data *rdp, int level,
  928. unsigned long flags)
  929. __releases(rdp_gp->nocb_gp_lock)
  930. {
  931. int ndw;
  932. int ret;
  933. if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
  934. raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
  935. return false;
  936. }
  937. ndw = rdp_gp->nocb_defer_wakeup;
  938. ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
  939. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
  940. return ret;
  941. }
  942. /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
  943. static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
  944. {
  945. unsigned long flags;
  946. struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
  947. WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
  948. trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
  949. raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
  950. smp_mb__after_spinlock(); /* Timer expire before wakeup. */
  951. do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
  952. }
  953. /*
  954. * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
  955. * This means we do an inexact common-case check. Note that if
  956. * we miss, ->nocb_timer will eventually clean things up.
  957. */
  958. static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
  959. {
  960. unsigned long flags;
  961. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  962. if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE))
  963. return false;
  964. raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
  965. return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
  966. }
  967. void rcu_nocb_flush_deferred_wakeup(void)
  968. {
  969. do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
  970. }
  971. EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
  972. static int rdp_offload_toggle(struct rcu_data *rdp,
  973. bool offload, unsigned long flags)
  974. __releases(rdp->nocb_lock)
  975. {
  976. struct rcu_segcblist *cblist = &rdp->cblist;
  977. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  978. bool wake_gp = false;
  979. rcu_segcblist_offload(cblist, offload);
  980. if (rdp->nocb_cb_sleep)
  981. rdp->nocb_cb_sleep = false;
  982. rcu_nocb_unlock_irqrestore(rdp, flags);
  983. /*
  984. * Ignore former value of nocb_cb_sleep and force wake up as it could
  985. * have been spuriously set to false already.
  986. */
  987. swake_up_one(&rdp->nocb_cb_wq);
  988. raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
  989. // Queue this rdp for add/del to/from the list to iterate on rcuog
  990. WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp);
  991. if (rdp_gp->nocb_gp_sleep) {
  992. rdp_gp->nocb_gp_sleep = false;
  993. wake_gp = true;
  994. }
  995. raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
  996. return wake_gp;
  997. }
  998. static long rcu_nocb_rdp_deoffload(void *arg)
  999. {
  1000. struct rcu_data *rdp = arg;
  1001. struct rcu_segcblist *cblist = &rdp->cblist;
  1002. unsigned long flags;
  1003. int wake_gp;
  1004. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  1005. /*
  1006. * rcu_nocb_rdp_deoffload() may be called directly if
  1007. * rcuog/o[p] spawn failed, because at this time the rdp->cpu
  1008. * is not online yet.
  1009. */
  1010. WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu));
  1011. pr_info("De-offloading %d\n", rdp->cpu);
  1012. rcu_nocb_lock_irqsave(rdp, flags);
  1013. /*
  1014. * Flush once and for all now. This suffices because we are
  1015. * running on the target CPU holding ->nocb_lock (thus having
  1016. * interrupts disabled), and because rdp_offload_toggle()
  1017. * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
  1018. * Thus future calls to rcu_segcblist_completely_offloaded() will
  1019. * return false, which means that future calls to rcu_nocb_try_bypass()
  1020. * will refuse to put anything into the bypass.
  1021. */
  1022. WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
  1023. /*
  1024. * Start with invoking rcu_core() early. This way if the current thread
  1025. * happens to preempt an ongoing call to rcu_core() in the middle,
  1026. * leaving some work dismissed because rcu_core() still thinks the rdp is
  1027. * completely offloaded, we are guaranteed a nearby future instance of
  1028. * rcu_core() to catch up.
  1029. */
  1030. rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
  1031. invoke_rcu_core();
  1032. wake_gp = rdp_offload_toggle(rdp, false, flags);
  1033. mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
  1034. if (rdp_gp->nocb_gp_kthread) {
  1035. if (wake_gp)
  1036. wake_up_process(rdp_gp->nocb_gp_kthread);
  1037. /*
  1038. * If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB.
  1039. * Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog.
  1040. */
  1041. if (!rdp->nocb_cb_kthread) {
  1042. rcu_nocb_lock_irqsave(rdp, flags);
  1043. rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
  1044. rcu_nocb_unlock_irqrestore(rdp, flags);
  1045. }
  1046. swait_event_exclusive(rdp->nocb_state_wq,
  1047. !rcu_segcblist_test_flags(cblist,
  1048. SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP));
  1049. } else {
  1050. /*
  1051. * No kthread to clear the flags for us or remove the rdp from the nocb list
  1052. * to iterate. Do it here instead. Locking doesn't look stricly necessary
  1053. * but we stick to paranoia in this rare path.
  1054. */
  1055. rcu_nocb_lock_irqsave(rdp, flags);
  1056. rcu_segcblist_clear_flags(&rdp->cblist,
  1057. SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
  1058. rcu_nocb_unlock_irqrestore(rdp, flags);
  1059. list_del(&rdp->nocb_entry_rdp);
  1060. }
  1061. mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
  1062. /*
  1063. * Lock one last time to acquire latest callback updates from kthreads
  1064. * so we can later handle callbacks locally without locking.
  1065. */
  1066. rcu_nocb_lock_irqsave(rdp, flags);
  1067. /*
  1068. * Theoretically we could clear SEGCBLIST_LOCKING after the nocb
  1069. * lock is released but how about being paranoid for once?
  1070. */
  1071. rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING);
  1072. /*
  1073. * Without SEGCBLIST_LOCKING, we can't use
  1074. * rcu_nocb_unlock_irqrestore() anymore.
  1075. */
  1076. raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
  1077. /* Sanity check */
  1078. WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
  1079. return 0;
  1080. }
  1081. int rcu_nocb_cpu_deoffload(int cpu)
  1082. {
  1083. struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
  1084. int ret = 0;
  1085. cpus_read_lock();
  1086. mutex_lock(&rcu_state.barrier_mutex);
  1087. if (rcu_rdp_is_offloaded(rdp)) {
  1088. if (cpu_online(cpu)) {
  1089. ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
  1090. if (!ret)
  1091. cpumask_clear_cpu(cpu, rcu_nocb_mask);
  1092. } else {
  1093. pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu);
  1094. ret = -EINVAL;
  1095. }
  1096. }
  1097. mutex_unlock(&rcu_state.barrier_mutex);
  1098. cpus_read_unlock();
  1099. return ret;
  1100. }
  1101. EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
  1102. static long rcu_nocb_rdp_offload(void *arg)
  1103. {
  1104. struct rcu_data *rdp = arg;
  1105. struct rcu_segcblist *cblist = &rdp->cblist;
  1106. unsigned long flags;
  1107. int wake_gp;
  1108. struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
  1109. WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
  1110. /*
  1111. * For now we only support re-offload, ie: the rdp must have been
  1112. * offloaded on boot first.
  1113. */
  1114. if (!rdp->nocb_gp_rdp)
  1115. return -EINVAL;
  1116. if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
  1117. return -EINVAL;
  1118. pr_info("Offloading %d\n", rdp->cpu);
  1119. /*
  1120. * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
  1121. * is set.
  1122. */
  1123. raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
  1124. /*
  1125. * We didn't take the nocb lock while working on the
  1126. * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
  1127. * Every modifications that have been done previously on
  1128. * rdp->cblist must be visible remotely by the nocb kthreads
  1129. * upon wake up after reading the cblist flags.
  1130. *
  1131. * The layout against nocb_lock enforces that ordering:
  1132. *
  1133. * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
  1134. * ------------------------- ----------------------------
  1135. * WRITE callbacks rcu_nocb_lock()
  1136. * rcu_nocb_lock() READ flags
  1137. * WRITE flags READ callbacks
  1138. * rcu_nocb_unlock() rcu_nocb_unlock()
  1139. */
  1140. wake_gp = rdp_offload_toggle(rdp, true, flags);
  1141. if (wake_gp)
  1142. wake_up_process(rdp_gp->nocb_gp_kthread);
  1143. swait_event_exclusive(rdp->nocb_state_wq,
  1144. rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
  1145. rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
  1146. /*
  1147. * All kthreads are ready to work, we can finally relieve rcu_core() and
  1148. * enable nocb bypass.
  1149. */
  1150. rcu_nocb_lock_irqsave(rdp, flags);
  1151. rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
  1152. rcu_nocb_unlock_irqrestore(rdp, flags);
  1153. return 0;
  1154. }
  1155. int rcu_nocb_cpu_offload(int cpu)
  1156. {
  1157. struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
  1158. int ret = 0;
  1159. cpus_read_lock();
  1160. mutex_lock(&rcu_state.barrier_mutex);
  1161. if (!rcu_rdp_is_offloaded(rdp)) {
  1162. if (cpu_online(cpu)) {
  1163. ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
  1164. if (!ret)
  1165. cpumask_set_cpu(cpu, rcu_nocb_mask);
  1166. } else {
  1167. pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu);
  1168. ret = -EINVAL;
  1169. }
  1170. }
  1171. mutex_unlock(&rcu_state.barrier_mutex);
  1172. cpus_read_unlock();
  1173. return ret;
  1174. }
  1175. EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
  1176. static unsigned long
  1177. lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
  1178. {
  1179. int cpu;
  1180. unsigned long count = 0;
  1181. /* Snapshot count of all CPUs */
  1182. for_each_possible_cpu(cpu) {
  1183. struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
  1184. count += READ_ONCE(rdp->lazy_len);
  1185. }
  1186. return count ? count : SHRINK_EMPTY;
  1187. }
  1188. static unsigned long
  1189. lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
  1190. {
  1191. int cpu;
  1192. unsigned long flags;
  1193. unsigned long count = 0;
  1194. /* Snapshot count of all CPUs */
  1195. for_each_possible_cpu(cpu) {
  1196. struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
  1197. int _count = READ_ONCE(rdp->lazy_len);
  1198. if (_count == 0)
  1199. continue;
  1200. rcu_nocb_lock_irqsave(rdp, flags);
  1201. WRITE_ONCE(rdp->lazy_len, 0);
  1202. rcu_nocb_unlock_irqrestore(rdp, flags);
  1203. wake_nocb_gp(rdp, false);
  1204. sc->nr_to_scan -= _count;
  1205. count += _count;
  1206. if (sc->nr_to_scan <= 0)
  1207. break;
  1208. }
  1209. return count ? count : SHRINK_STOP;
  1210. }
  1211. static struct shrinker lazy_rcu_shrinker = {
  1212. .count_objects = lazy_rcu_shrink_count,
  1213. .scan_objects = lazy_rcu_shrink_scan,
  1214. .batch = 0,
  1215. .seeks = DEFAULT_SEEKS,
  1216. };
  1217. void __init rcu_init_nohz(void)
  1218. {
  1219. int cpu;
  1220. bool need_rcu_nocb_mask = false;
  1221. bool offload_all = false;
  1222. struct rcu_data *rdp;
  1223. #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL)
  1224. if (!rcu_state.nocb_is_setup) {
  1225. need_rcu_nocb_mask = true;
  1226. offload_all = true;
  1227. }
  1228. #endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */
  1229. #if defined(CONFIG_NO_HZ_FULL)
  1230. if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) {
  1231. need_rcu_nocb_mask = true;
  1232. offload_all = false; /* NO_HZ_FULL has its own mask. */
  1233. }
  1234. #endif /* #if defined(CONFIG_NO_HZ_FULL) */
  1235. if (need_rcu_nocb_mask) {
  1236. if (!cpumask_available(rcu_nocb_mask)) {
  1237. if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
  1238. pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
  1239. return;
  1240. }
  1241. }
  1242. rcu_state.nocb_is_setup = true;
  1243. }
  1244. if (!rcu_state.nocb_is_setup)
  1245. return;
  1246. #if defined(CONFIG_NO_HZ_FULL)
  1247. if (tick_nohz_full_running)
  1248. cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
  1249. #endif /* #if defined(CONFIG_NO_HZ_FULL) */
  1250. if (offload_all)
  1251. cpumask_setall(rcu_nocb_mask);
  1252. if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy"))
  1253. pr_err("Failed to register lazy_rcu shrinker!\n");
  1254. if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
  1255. pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
  1256. cpumask_and(rcu_nocb_mask, cpu_possible_mask,
  1257. rcu_nocb_mask);
  1258. }
  1259. if (cpumask_empty(rcu_nocb_mask))
  1260. pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
  1261. else
  1262. pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
  1263. cpumask_pr_args(rcu_nocb_mask));
  1264. if (rcu_nocb_poll)
  1265. pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
  1266. for_each_cpu(cpu, rcu_nocb_mask) {
  1267. rdp = per_cpu_ptr(&rcu_data, cpu);
  1268. if (rcu_segcblist_empty(&rdp->cblist))
  1269. rcu_segcblist_init(&rdp->cblist);
  1270. rcu_segcblist_offload(&rdp->cblist, true);
  1271. rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
  1272. rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
  1273. }
  1274. rcu_organize_nocb_kthreads();
  1275. }
  1276. /* Initialize per-rcu_data variables for no-CBs CPUs. */
  1277. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  1278. {
  1279. init_swait_queue_head(&rdp->nocb_cb_wq);
  1280. init_swait_queue_head(&rdp->nocb_gp_wq);
  1281. init_swait_queue_head(&rdp->nocb_state_wq);
  1282. raw_spin_lock_init(&rdp->nocb_lock);
  1283. raw_spin_lock_init(&rdp->nocb_bypass_lock);
  1284. raw_spin_lock_init(&rdp->nocb_gp_lock);
  1285. timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
  1286. rcu_cblist_init(&rdp->nocb_bypass);
  1287. WRITE_ONCE(rdp->lazy_len, 0);
  1288. mutex_init(&rdp->nocb_gp_kthread_mutex);
  1289. }
  1290. /*
  1291. * If the specified CPU is a no-CBs CPU that does not already have its
  1292. * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
  1293. * for this CPU's group has not yet been created, spawn it as well.
  1294. */
  1295. static void rcu_spawn_cpu_nocb_kthread(int cpu)
  1296. {
  1297. struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
  1298. struct rcu_data *rdp_gp;
  1299. struct task_struct *t;
  1300. struct sched_param sp;
  1301. if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup)
  1302. return;
  1303. /* If there already is an rcuo kthread, then nothing to do. */
  1304. if (rdp->nocb_cb_kthread)
  1305. return;
  1306. /* If we didn't spawn the GP kthread first, reorganize! */
  1307. sp.sched_priority = kthread_prio;
  1308. rdp_gp = rdp->nocb_gp_rdp;
  1309. mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
  1310. if (!rdp_gp->nocb_gp_kthread) {
  1311. t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
  1312. "rcuog/%d", rdp_gp->cpu);
  1313. if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
  1314. mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
  1315. goto end;
  1316. }
  1317. WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
  1318. if (kthread_prio)
  1319. sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
  1320. }
  1321. mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
  1322. /* Spawn the kthread for this CPU. */
  1323. t = kthread_run(rcu_nocb_cb_kthread, rdp,
  1324. "rcuo%c/%d", rcu_state.abbr, cpu);
  1325. if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
  1326. goto end;
  1327. if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio)
  1328. sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
  1329. WRITE_ONCE(rdp->nocb_cb_kthread, t);
  1330. WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
  1331. return;
  1332. end:
  1333. mutex_lock(&rcu_state.barrier_mutex);
  1334. if (rcu_rdp_is_offloaded(rdp)) {
  1335. rcu_nocb_rdp_deoffload(rdp);
  1336. cpumask_clear_cpu(cpu, rcu_nocb_mask);
  1337. }
  1338. mutex_unlock(&rcu_state.barrier_mutex);
  1339. }
  1340. /* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
  1341. static int rcu_nocb_gp_stride = -1;
  1342. module_param(rcu_nocb_gp_stride, int, 0444);
  1343. /*
  1344. * Initialize GP-CB relationships for all no-CBs CPU.
  1345. */
  1346. static void __init rcu_organize_nocb_kthreads(void)
  1347. {
  1348. int cpu;
  1349. bool firsttime = true;
  1350. bool gotnocbs = false;
  1351. bool gotnocbscbs = true;
  1352. int ls = rcu_nocb_gp_stride;
  1353. int nl = 0; /* Next GP kthread. */
  1354. struct rcu_data *rdp;
  1355. struct rcu_data *rdp_gp = NULL; /* Suppress misguided gcc warn. */
  1356. if (!cpumask_available(rcu_nocb_mask))
  1357. return;
  1358. if (ls == -1) {
  1359. ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
  1360. rcu_nocb_gp_stride = ls;
  1361. }
  1362. /*
  1363. * Each pass through this loop sets up one rcu_data structure.
  1364. * Should the corresponding CPU come online in the future, then
  1365. * we will spawn the needed set of rcu_nocb_kthread() kthreads.
  1366. */
  1367. for_each_possible_cpu(cpu) {
  1368. rdp = per_cpu_ptr(&rcu_data, cpu);
  1369. if (rdp->cpu >= nl) {
  1370. /* New GP kthread, set up for CBs & next GP. */
  1371. gotnocbs = true;
  1372. nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
  1373. rdp_gp = rdp;
  1374. INIT_LIST_HEAD(&rdp->nocb_head_rdp);
  1375. if (dump_tree) {
  1376. if (!firsttime)
  1377. pr_cont("%s\n", gotnocbscbs
  1378. ? "" : " (self only)");
  1379. gotnocbscbs = false;
  1380. firsttime = false;
  1381. pr_alert("%s: No-CB GP kthread CPU %d:",
  1382. __func__, cpu);
  1383. }
  1384. } else {
  1385. /* Another CB kthread, link to previous GP kthread. */
  1386. gotnocbscbs = true;
  1387. if (dump_tree)
  1388. pr_cont(" %d", cpu);
  1389. }
  1390. rdp->nocb_gp_rdp = rdp_gp;
  1391. if (cpumask_test_cpu(cpu, rcu_nocb_mask))
  1392. list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp);
  1393. }
  1394. if (gotnocbs && dump_tree)
  1395. pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
  1396. }
  1397. /*
  1398. * Bind the current task to the offloaded CPUs. If there are no offloaded
  1399. * CPUs, leave the task unbound. Splat if the bind attempt fails.
  1400. */
  1401. void rcu_bind_current_to_nocb(void)
  1402. {
  1403. if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask))
  1404. WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
  1405. }
  1406. EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
  1407. // The ->on_cpu field is available only in CONFIG_SMP=y, so...
  1408. #ifdef CONFIG_SMP
  1409. static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
  1410. {
  1411. return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "";
  1412. }
  1413. #else // #ifdef CONFIG_SMP
  1414. static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
  1415. {
  1416. return "";
  1417. }
  1418. #endif // #else #ifdef CONFIG_SMP
  1419. /*
  1420. * Dump out nocb grace-period kthread state for the specified rcu_data
  1421. * structure.
  1422. */
  1423. static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
  1424. {
  1425. struct rcu_node *rnp = rdp->mynode;
  1426. pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
  1427. rdp->cpu,
  1428. "kK"[!!rdp->nocb_gp_kthread],
  1429. "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
  1430. "dD"[!!rdp->nocb_defer_wakeup],
  1431. "tT"[timer_pending(&rdp->nocb_timer)],
  1432. "sS"[!!rdp->nocb_gp_sleep],
  1433. ".W"[swait_active(&rdp->nocb_gp_wq)],
  1434. ".W"[swait_active(&rnp->nocb_gp_wq[0])],
  1435. ".W"[swait_active(&rnp->nocb_gp_wq[1])],
  1436. ".B"[!!rdp->nocb_gp_bypass],
  1437. ".G"[!!rdp->nocb_gp_gp],
  1438. (long)rdp->nocb_gp_seq,
  1439. rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
  1440. rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
  1441. rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
  1442. show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread));
  1443. }
  1444. /* Dump out nocb kthread state for the specified rcu_data structure. */
  1445. static void show_rcu_nocb_state(struct rcu_data *rdp)
  1446. {
  1447. char bufw[20];
  1448. char bufr[20];
  1449. struct rcu_data *nocb_next_rdp;
  1450. struct rcu_segcblist *rsclp = &rdp->cblist;
  1451. bool waslocked;
  1452. bool wassleep;
  1453. if (rdp->nocb_gp_rdp == rdp)
  1454. show_rcu_nocb_gp_state(rdp);
  1455. nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp,
  1456. &rdp->nocb_entry_rdp,
  1457. typeof(*rdp),
  1458. nocb_entry_rdp);
  1459. sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
  1460. sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
  1461. pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
  1462. rdp->cpu, rdp->nocb_gp_rdp->cpu,
  1463. nocb_next_rdp ? nocb_next_rdp->cpu : -1,
  1464. "kK"[!!rdp->nocb_cb_kthread],
  1465. "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
  1466. "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
  1467. "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
  1468. "sS"[!!rdp->nocb_cb_sleep],
  1469. ".W"[swait_active(&rdp->nocb_cb_wq)],
  1470. jiffies - rdp->nocb_bypass_first,
  1471. jiffies - rdp->nocb_nobypass_last,
  1472. rdp->nocb_nobypass_count,
  1473. ".D"[rcu_segcblist_ready_cbs(rsclp)],
  1474. ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
  1475. rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
  1476. ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
  1477. rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
  1478. ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
  1479. ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
  1480. rcu_segcblist_n_cbs(&rdp->cblist),
  1481. rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
  1482. rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1,
  1483. show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
  1484. /* It is OK for GP kthreads to have GP state. */
  1485. if (rdp->nocb_gp_rdp == rdp)
  1486. return;
  1487. waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
  1488. wassleep = swait_active(&rdp->nocb_gp_wq);
  1489. if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
  1490. return; /* Nothing untoward. */
  1491. pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
  1492. "lL"[waslocked],
  1493. "dD"[!!rdp->nocb_defer_wakeup],
  1494. "sS"[!!rdp->nocb_gp_sleep],
  1495. ".W"[wassleep]);
  1496. }
  1497. #else /* #ifdef CONFIG_RCU_NOCB_CPU */
  1498. static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
  1499. {
  1500. return 0;
  1501. }
  1502. static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
  1503. {
  1504. return false;
  1505. }
  1506. /* No ->nocb_lock to acquire. */
  1507. static void rcu_nocb_lock(struct rcu_data *rdp)
  1508. {
  1509. }
  1510. /* No ->nocb_lock to release. */
  1511. static void rcu_nocb_unlock(struct rcu_data *rdp)
  1512. {
  1513. }
  1514. /* No ->nocb_lock to release. */
  1515. static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
  1516. unsigned long flags)
  1517. {
  1518. local_irq_restore(flags);
  1519. }
  1520. /* Lockdep check that ->cblist may be safely accessed. */
  1521. static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
  1522. {
  1523. lockdep_assert_irqs_disabled();
  1524. }
  1525. static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  1526. {
  1527. }
  1528. static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
  1529. {
  1530. return NULL;
  1531. }
  1532. static void rcu_init_one_nocb(struct rcu_node *rnp)
  1533. {
  1534. }
  1535. static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
  1536. {
  1537. return false;
  1538. }
  1539. static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
  1540. unsigned long j, bool lazy)
  1541. {
  1542. return true;
  1543. }
  1544. static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
  1545. bool *was_alldone, unsigned long flags, bool lazy)
  1546. {
  1547. return false;
  1548. }
  1549. static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
  1550. unsigned long flags)
  1551. {
  1552. WARN_ON_ONCE(1); /* Should be dead code! */
  1553. }
  1554. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  1555. {
  1556. }
  1557. static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
  1558. {
  1559. return false;
  1560. }
  1561. static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
  1562. {
  1563. return false;
  1564. }
  1565. static void rcu_spawn_cpu_nocb_kthread(int cpu)
  1566. {
  1567. }
  1568. static void show_rcu_nocb_state(struct rcu_data *rdp)
  1569. {
  1570. }
  1571. #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */