context_tracking.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Context tracking: Probe on high level context boundaries such as kernel,
  4. * userspace, guest or idle.
  5. *
  6. * This is used by RCU to remove its dependency on the timer tick while a CPU
  7. * runs in idle, userspace or guest mode.
  8. *
  9. * User/guest tracking started by Frederic Weisbecker:
  10. *
  11. * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker
  12. *
  13. * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
  14. * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
  15. *
  16. * RCU extended quiescent state bits imported from kernel/rcu/tree.c
  17. * where the relevant authorship may be found.
  18. */
  19. #include <linux/context_tracking.h>
  20. #include <linux/rcupdate.h>
  21. #include <linux/sched.h>
  22. #include <linux/hardirq.h>
  23. #include <linux/export.h>
  24. #include <linux/kprobes.h>
  25. #include <trace/events/rcu.h>
  26. DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
  27. #ifdef CONFIG_CONTEXT_TRACKING_IDLE
  28. .dynticks_nesting = 1,
  29. .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
  30. #endif
  31. .state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
  32. };
  33. EXPORT_SYMBOL_GPL(context_tracking);
  34. #ifdef CONFIG_CONTEXT_TRACKING_IDLE
  35. #define TPS(x) tracepoint_string(x)
  36. /* Record the current task on dyntick-idle entry. */
  37. static __always_inline void rcu_dynticks_task_enter(void)
  38. {
  39. #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
  40. WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
  41. #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
  42. }
  43. /* Record no current task on dyntick-idle exit. */
  44. static __always_inline void rcu_dynticks_task_exit(void)
  45. {
  46. #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
  47. WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
  48. #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
  49. }
  50. /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
  51. static __always_inline void rcu_dynticks_task_trace_enter(void)
  52. {
  53. #ifdef CONFIG_TASKS_TRACE_RCU
  54. if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
  55. current->trc_reader_special.b.need_mb = true;
  56. #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
  57. }
  58. /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
  59. static __always_inline void rcu_dynticks_task_trace_exit(void)
  60. {
  61. #ifdef CONFIG_TASKS_TRACE_RCU
  62. if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
  63. current->trc_reader_special.b.need_mb = false;
  64. #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
  65. }
  66. /*
  67. * Record entry into an extended quiescent state. This is only to be
  68. * called when not already in an extended quiescent state, that is,
  69. * RCU is watching prior to the call to this function and is no longer
  70. * watching upon return.
  71. */
  72. static noinstr void ct_kernel_exit_state(int offset)
  73. {
  74. int seq;
  75. /*
  76. * CPUs seeing atomic_add_return() must see prior RCU read-side
  77. * critical sections, and we also must force ordering with the
  78. * next idle sojourn.
  79. */
  80. rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
  81. seq = ct_state_inc(offset);
  82. // RCU is no longer watching. Better be in extended quiescent state!
  83. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
  84. }
  85. /*
  86. * Record exit from an extended quiescent state. This is only to be
  87. * called from an extended quiescent state, that is, RCU is not watching
  88. * prior to the call to this function and is watching upon return.
  89. */
  90. static noinstr void ct_kernel_enter_state(int offset)
  91. {
  92. int seq;
  93. /*
  94. * CPUs seeing atomic_add_return() must see prior idle sojourns,
  95. * and we also must force ordering with the next RCU read-side
  96. * critical section.
  97. */
  98. seq = ct_state_inc(offset);
  99. // RCU is now watching. Better not be in an extended quiescent state!
  100. rcu_dynticks_task_trace_exit(); // After ->dynticks update!
  101. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
  102. }
  103. /*
  104. * Enter an RCU extended quiescent state, which can be either the
  105. * idle loop or adaptive-tickless usermode execution.
  106. *
  107. * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
  108. * the possibility of usermode upcalls having messed up our count
  109. * of interrupt nesting level during the prior busy period.
  110. */
  111. static void noinstr ct_kernel_exit(bool user, int offset)
  112. {
  113. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  114. WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
  115. WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
  116. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
  117. ct_dynticks_nesting() == 0);
  118. if (ct_dynticks_nesting() != 1) {
  119. // RCU will still be watching, so just do accounting and leave.
  120. ct->dynticks_nesting--;
  121. return;
  122. }
  123. instrumentation_begin();
  124. lockdep_assert_irqs_disabled();
  125. trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
  126. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
  127. rcu_preempt_deferred_qs(current);
  128. // instrumentation for the noinstr ct_kernel_exit_state()
  129. instrument_atomic_write(&ct->state, sizeof(ct->state));
  130. instrumentation_end();
  131. WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
  132. // RCU is watching here ...
  133. ct_kernel_exit_state(offset);
  134. // ... but is no longer watching here.
  135. rcu_dynticks_task_enter();
  136. }
  137. /*
  138. * Exit an RCU extended quiescent state, which can be either the
  139. * idle loop or adaptive-tickless usermode execution.
  140. *
  141. * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
  142. * allow for the possibility of usermode upcalls messing up our count of
  143. * interrupt nesting level during the busy period that is just now starting.
  144. */
  145. static void noinstr ct_kernel_enter(bool user, int offset)
  146. {
  147. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  148. long oldval;
  149. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
  150. oldval = ct_dynticks_nesting();
  151. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
  152. if (oldval) {
  153. // RCU was already watching, so just do accounting and leave.
  154. ct->dynticks_nesting++;
  155. return;
  156. }
  157. rcu_dynticks_task_exit();
  158. // RCU is not watching here ...
  159. ct_kernel_enter_state(offset);
  160. // ... but is watching here.
  161. instrumentation_begin();
  162. // instrumentation for the noinstr ct_kernel_enter_state()
  163. instrument_atomic_write(&ct->state, sizeof(ct->state));
  164. trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
  165. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
  166. WRITE_ONCE(ct->dynticks_nesting, 1);
  167. WARN_ON_ONCE(ct_dynticks_nmi_nesting());
  168. WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
  169. instrumentation_end();
  170. }
  171. /**
  172. * ct_nmi_exit - inform RCU of exit from NMI context
  173. *
  174. * If we are returning from the outermost NMI handler that interrupted an
  175. * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
  176. * to let the RCU grace-period handling know that the CPU is back to
  177. * being RCU-idle.
  178. *
  179. * If you add or remove a call to ct_nmi_exit(), be sure to test
  180. * with CONFIG_RCU_EQS_DEBUG=y.
  181. */
  182. void noinstr ct_nmi_exit(void)
  183. {
  184. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  185. instrumentation_begin();
  186. /*
  187. * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
  188. * (We are exiting an NMI handler, so RCU better be paying attention
  189. * to us!)
  190. */
  191. WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
  192. WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
  193. /*
  194. * If the nesting level is not 1, the CPU wasn't RCU-idle, so
  195. * leave it in non-RCU-idle state.
  196. */
  197. if (ct_dynticks_nmi_nesting() != 1) {
  198. trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
  199. ct_dynticks());
  200. WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
  201. ct_dynticks_nmi_nesting() - 2);
  202. instrumentation_end();
  203. return;
  204. }
  205. /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
  206. trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
  207. WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
  208. // instrumentation for the noinstr ct_kernel_exit_state()
  209. instrument_atomic_write(&ct->state, sizeof(ct->state));
  210. instrumentation_end();
  211. // RCU is watching here ...
  212. ct_kernel_exit_state(RCU_DYNTICKS_IDX);
  213. // ... but is no longer watching here.
  214. if (!in_nmi())
  215. rcu_dynticks_task_enter();
  216. }
  217. /**
  218. * ct_nmi_enter - inform RCU of entry to NMI context
  219. *
  220. * If the CPU was idle from RCU's viewpoint, update ct->state and
  221. * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
  222. * that the CPU is active. This implementation permits nested NMIs, as
  223. * long as the nesting level does not overflow an int. (You will probably
  224. * run out of stack space first.)
  225. *
  226. * If you add or remove a call to ct_nmi_enter(), be sure to test
  227. * with CONFIG_RCU_EQS_DEBUG=y.
  228. */
  229. void noinstr ct_nmi_enter(void)
  230. {
  231. long incby = 2;
  232. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  233. /* Complain about underflow. */
  234. WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
  235. /*
  236. * If idle from RCU viewpoint, atomically increment ->dynticks
  237. * to mark non-idle and increment ->dynticks_nmi_nesting by one.
  238. * Otherwise, increment ->dynticks_nmi_nesting by two. This means
  239. * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
  240. * to be in the outermost NMI handler that interrupted an RCU-idle
  241. * period (observation due to Andy Lutomirski).
  242. */
  243. if (rcu_dynticks_curr_cpu_in_eqs()) {
  244. if (!in_nmi())
  245. rcu_dynticks_task_exit();
  246. // RCU is not watching here ...
  247. ct_kernel_enter_state(RCU_DYNTICKS_IDX);
  248. // ... but is watching here.
  249. instrumentation_begin();
  250. // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
  251. instrument_atomic_read(&ct->state, sizeof(ct->state));
  252. // instrumentation for the noinstr ct_kernel_enter_state()
  253. instrument_atomic_write(&ct->state, sizeof(ct->state));
  254. incby = 1;
  255. } else if (!in_nmi()) {
  256. instrumentation_begin();
  257. rcu_irq_enter_check_tick();
  258. } else {
  259. instrumentation_begin();
  260. }
  261. trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
  262. ct_dynticks_nmi_nesting(),
  263. ct_dynticks_nmi_nesting() + incby, ct_dynticks());
  264. instrumentation_end();
  265. WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
  266. ct_dynticks_nmi_nesting() + incby);
  267. barrier();
  268. }
  269. /**
  270. * ct_idle_enter - inform RCU that current CPU is entering idle
  271. *
  272. * Enter idle mode, in other words, -leave- the mode in which RCU
  273. * read-side critical sections can occur. (Though RCU read-side
  274. * critical sections can occur in irq handlers in idle, a possibility
  275. * handled by irq_enter() and irq_exit().)
  276. *
  277. * If you add or remove a call to ct_idle_enter(), be sure to test with
  278. * CONFIG_RCU_EQS_DEBUG=y.
  279. */
  280. void noinstr ct_idle_enter(void)
  281. {
  282. WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
  283. ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
  284. }
  285. EXPORT_SYMBOL_GPL(ct_idle_enter);
  286. /**
  287. * ct_idle_exit - inform RCU that current CPU is leaving idle
  288. *
  289. * Exit idle mode, in other words, -enter- the mode in which RCU
  290. * read-side critical sections can occur.
  291. *
  292. * If you add or remove a call to ct_idle_exit(), be sure to test with
  293. * CONFIG_RCU_EQS_DEBUG=y.
  294. */
  295. void noinstr ct_idle_exit(void)
  296. {
  297. unsigned long flags;
  298. raw_local_irq_save(flags);
  299. ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
  300. raw_local_irq_restore(flags);
  301. }
  302. EXPORT_SYMBOL_GPL(ct_idle_exit);
  303. /**
  304. * ct_irq_enter - inform RCU that current CPU is entering irq away from idle
  305. *
  306. * Enter an interrupt handler, which might possibly result in exiting
  307. * idle mode, in other words, entering the mode in which read-side critical
  308. * sections can occur. The caller must have disabled interrupts.
  309. *
  310. * Note that the Linux kernel is fully capable of entering an interrupt
  311. * handler that it never exits, for example when doing upcalls to user mode!
  312. * This code assumes that the idle loop never does upcalls to user mode.
  313. * If your architecture's idle loop does do upcalls to user mode (or does
  314. * anything else that results in unbalanced calls to the irq_enter() and
  315. * irq_exit() functions), RCU will give you what you deserve, good and hard.
  316. * But very infrequently and irreproducibly.
  317. *
  318. * Use things like work queues to work around this limitation.
  319. *
  320. * You have been warned.
  321. *
  322. * If you add or remove a call to ct_irq_enter(), be sure to test with
  323. * CONFIG_RCU_EQS_DEBUG=y.
  324. */
  325. noinstr void ct_irq_enter(void)
  326. {
  327. lockdep_assert_irqs_disabled();
  328. ct_nmi_enter();
  329. }
  330. /**
  331. * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
  332. *
  333. * Exit from an interrupt handler, which might possibly result in entering
  334. * idle mode, in other words, leaving the mode in which read-side critical
  335. * sections can occur. The caller must have disabled interrupts.
  336. *
  337. * This code assumes that the idle loop never does anything that might
  338. * result in unbalanced calls to irq_enter() and irq_exit(). If your
  339. * architecture's idle loop violates this assumption, RCU will give you what
  340. * you deserve, good and hard. But very infrequently and irreproducibly.
  341. *
  342. * Use things like work queues to work around this limitation.
  343. *
  344. * You have been warned.
  345. *
  346. * If you add or remove a call to ct_irq_exit(), be sure to test with
  347. * CONFIG_RCU_EQS_DEBUG=y.
  348. */
  349. noinstr void ct_irq_exit(void)
  350. {
  351. lockdep_assert_irqs_disabled();
  352. ct_nmi_exit();
  353. }
  354. /*
  355. * Wrapper for ct_irq_enter() where interrupts are enabled.
  356. *
  357. * If you add or remove a call to ct_irq_enter_irqson(), be sure to test
  358. * with CONFIG_RCU_EQS_DEBUG=y.
  359. */
  360. void ct_irq_enter_irqson(void)
  361. {
  362. unsigned long flags;
  363. local_irq_save(flags);
  364. ct_irq_enter();
  365. local_irq_restore(flags);
  366. }
  367. /*
  368. * Wrapper for ct_irq_exit() where interrupts are enabled.
  369. *
  370. * If you add or remove a call to ct_irq_exit_irqson(), be sure to test
  371. * with CONFIG_RCU_EQS_DEBUG=y.
  372. */
  373. void ct_irq_exit_irqson(void)
  374. {
  375. unsigned long flags;
  376. local_irq_save(flags);
  377. ct_irq_exit();
  378. local_irq_restore(flags);
  379. }
  380. #else
  381. static __always_inline void ct_kernel_exit(bool user, int offset) { }
  382. static __always_inline void ct_kernel_enter(bool user, int offset) { }
  383. #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
  384. #ifdef CONFIG_CONTEXT_TRACKING_USER
  385. #define CREATE_TRACE_POINTS
  386. #include <trace/events/context_tracking.h>
  387. DEFINE_STATIC_KEY_FALSE(context_tracking_key);
  388. EXPORT_SYMBOL_GPL(context_tracking_key);
  389. static noinstr bool context_tracking_recursion_enter(void)
  390. {
  391. int recursion;
  392. recursion = __this_cpu_inc_return(context_tracking.recursion);
  393. if (recursion == 1)
  394. return true;
  395. WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
  396. __this_cpu_dec(context_tracking.recursion);
  397. return false;
  398. }
  399. static __always_inline void context_tracking_recursion_exit(void)
  400. {
  401. __this_cpu_dec(context_tracking.recursion);
  402. }
  403. /**
  404. * __ct_user_enter - Inform the context tracking that the CPU is going
  405. * to enter user or guest space mode.
  406. *
  407. * This function must be called right before we switch from the kernel
  408. * to user or guest space, when it's guaranteed the remaining kernel
  409. * instructions to execute won't use any RCU read side critical section
  410. * because this function sets RCU in extended quiescent state.
  411. */
  412. void noinstr __ct_user_enter(enum ctx_state state)
  413. {
  414. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  415. lockdep_assert_irqs_disabled();
  416. /* Kernel threads aren't supposed to go to userspace */
  417. WARN_ON_ONCE(!current->mm);
  418. if (!context_tracking_recursion_enter())
  419. return;
  420. if (__ct_state() != state) {
  421. if (ct->active) {
  422. /*
  423. * At this stage, only low level arch entry code remains and
  424. * then we'll run in userspace. We can assume there won't be
  425. * any RCU read-side critical section until the next call to
  426. * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
  427. * on the tick.
  428. */
  429. if (state == CONTEXT_USER) {
  430. instrumentation_begin();
  431. trace_user_enter(0);
  432. vtime_user_enter(current);
  433. instrumentation_end();
  434. }
  435. /*
  436. * Other than generic entry implementation, we may be past the last
  437. * rescheduling opportunity in the entry code. Trigger a self IPI
  438. * that will fire and reschedule once we resume in user/guest mode.
  439. */
  440. rcu_irq_work_resched();
  441. /*
  442. * Enter RCU idle mode right before resuming userspace. No use of RCU
  443. * is permitted between this call and rcu_eqs_exit(). This way the
  444. * CPU doesn't need to maintain the tick for RCU maintenance purposes
  445. * when the CPU runs in userspace.
  446. */
  447. ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
  448. /*
  449. * Special case if we only track user <-> kernel transitions for tickless
  450. * cputime accounting but we don't support RCU extended quiescent state.
  451. * In this we case we don't care about any concurrency/ordering.
  452. */
  453. if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
  454. arch_atomic_set(&ct->state, state);
  455. } else {
  456. /*
  457. * Even if context tracking is disabled on this CPU, because it's outside
  458. * the full dynticks mask for example, we still have to keep track of the
  459. * context transitions and states to prevent inconsistency on those of
  460. * other CPUs.
  461. * If a task triggers an exception in userspace, sleep on the exception
  462. * handler and then migrate to another CPU, that new CPU must know where
  463. * the exception returns by the time we call exception_exit().
  464. * This information can only be provided by the previous CPU when it called
  465. * exception_enter().
  466. * OTOH we can spare the calls to vtime and RCU when context_tracking.active
  467. * is false because we know that CPU is not tickless.
  468. */
  469. if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
  470. /* Tracking for vtime only, no concurrent RCU EQS accounting */
  471. arch_atomic_set(&ct->state, state);
  472. } else {
  473. /*
  474. * Tracking for vtime and RCU EQS. Make sure we don't race
  475. * with NMIs. OTOH we don't care about ordering here since
  476. * RCU only requires RCU_DYNTICKS_IDX increments to be fully
  477. * ordered.
  478. */
  479. arch_atomic_add(state, &ct->state);
  480. }
  481. }
  482. }
  483. context_tracking_recursion_exit();
  484. }
  485. EXPORT_SYMBOL_GPL(__ct_user_enter);
  486. /*
  487. * OBSOLETE:
  488. * This function should be noinstr but the below local_irq_restore() is
  489. * unsafe because it involves illegal RCU uses through tracing and lockdep.
  490. * This is unlikely to be fixed as this function is obsolete. The preferred
  491. * way is to call __context_tracking_enter() through user_enter_irqoff()
  492. * or context_tracking_guest_enter(). It should be the arch entry code
  493. * responsibility to call into context tracking with IRQs disabled.
  494. */
  495. void ct_user_enter(enum ctx_state state)
  496. {
  497. unsigned long flags;
  498. /*
  499. * Some contexts may involve an exception occuring in an irq,
  500. * leading to that nesting:
  501. * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit()
  502. * This would mess up the dyntick_nesting count though. And rcu_irq_*()
  503. * helpers are enough to protect RCU uses inside the exception. So
  504. * just return immediately if we detect we are in an IRQ.
  505. */
  506. if (in_interrupt())
  507. return;
  508. local_irq_save(flags);
  509. __ct_user_enter(state);
  510. local_irq_restore(flags);
  511. }
  512. NOKPROBE_SYMBOL(ct_user_enter);
  513. EXPORT_SYMBOL_GPL(ct_user_enter);
  514. /**
  515. * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
  516. * archs that didn't manage to check the context tracking
  517. * static key from low level code.
  518. *
  519. * This OBSOLETE function should be noinstr but it unsafely calls
  520. * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
  521. * This is unlikely to be fixed as this function is obsolete. The preferred
  522. * way is to call user_enter_irqoff(). It should be the arch entry code
  523. * responsibility to call into context tracking with IRQs disabled.
  524. */
  525. void user_enter_callable(void)
  526. {
  527. user_enter();
  528. }
  529. NOKPROBE_SYMBOL(user_enter_callable);
  530. /**
  531. * __ct_user_exit - Inform the context tracking that the CPU is
  532. * exiting user or guest mode and entering the kernel.
  533. *
  534. * This function must be called after we entered the kernel from user or
  535. * guest space before any use of RCU read side critical section. This
  536. * potentially include any high level kernel code like syscalls, exceptions,
  537. * signal handling, etc...
  538. *
  539. * This call supports re-entrancy. This way it can be called from any exception
  540. * handler without needing to know if we came from userspace or not.
  541. */
  542. void noinstr __ct_user_exit(enum ctx_state state)
  543. {
  544. struct context_tracking *ct = this_cpu_ptr(&context_tracking);
  545. if (!context_tracking_recursion_enter())
  546. return;
  547. if (__ct_state() == state) {
  548. if (ct->active) {
  549. /*
  550. * Exit RCU idle mode while entering the kernel because it can
  551. * run a RCU read side critical section anytime.
  552. */
  553. ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
  554. if (state == CONTEXT_USER) {
  555. instrumentation_begin();
  556. vtime_user_exit(current);
  557. trace_user_exit(0);
  558. instrumentation_end();
  559. }
  560. /*
  561. * Special case if we only track user <-> kernel transitions for tickless
  562. * cputime accounting but we don't support RCU extended quiescent state.
  563. * In this we case we don't care about any concurrency/ordering.
  564. */
  565. if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
  566. arch_atomic_set(&ct->state, CONTEXT_KERNEL);
  567. } else {
  568. if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
  569. /* Tracking for vtime only, no concurrent RCU EQS accounting */
  570. arch_atomic_set(&ct->state, CONTEXT_KERNEL);
  571. } else {
  572. /*
  573. * Tracking for vtime and RCU EQS. Make sure we don't race
  574. * with NMIs. OTOH we don't care about ordering here since
  575. * RCU only requires RCU_DYNTICKS_IDX increments to be fully
  576. * ordered.
  577. */
  578. arch_atomic_sub(state, &ct->state);
  579. }
  580. }
  581. }
  582. context_tracking_recursion_exit();
  583. }
  584. EXPORT_SYMBOL_GPL(__ct_user_exit);
  585. /*
  586. * OBSOLETE:
  587. * This function should be noinstr but the below local_irq_save() is
  588. * unsafe because it involves illegal RCU uses through tracing and lockdep.
  589. * This is unlikely to be fixed as this function is obsolete. The preferred
  590. * way is to call __context_tracking_exit() through user_exit_irqoff()
  591. * or context_tracking_guest_exit(). It should be the arch entry code
  592. * responsibility to call into context tracking with IRQs disabled.
  593. */
  594. void ct_user_exit(enum ctx_state state)
  595. {
  596. unsigned long flags;
  597. if (in_interrupt())
  598. return;
  599. local_irq_save(flags);
  600. __ct_user_exit(state);
  601. local_irq_restore(flags);
  602. }
  603. NOKPROBE_SYMBOL(ct_user_exit);
  604. EXPORT_SYMBOL_GPL(ct_user_exit);
  605. /**
  606. * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
  607. * archs that didn't manage to check the context tracking
  608. * static key from low level code.
  609. *
  610. * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
  611. * involving illegal RCU uses through tracing and lockdep. This is unlikely
  612. * to be fixed as this function is obsolete. The preferred way is to call
  613. * user_exit_irqoff(). It should be the arch entry code responsibility to
  614. * call into context tracking with IRQs disabled.
  615. */
  616. void user_exit_callable(void)
  617. {
  618. user_exit();
  619. }
  620. NOKPROBE_SYMBOL(user_exit_callable);
  621. void __init ct_cpu_track_user(int cpu)
  622. {
  623. static __initdata bool initialized = false;
  624. if (!per_cpu(context_tracking.active, cpu)) {
  625. per_cpu(context_tracking.active, cpu) = true;
  626. static_branch_inc(&context_tracking_key);
  627. }
  628. if (initialized)
  629. return;
  630. #ifdef CONFIG_HAVE_TIF_NOHZ
  631. /*
  632. * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
  633. * This assumes that init is the only task at this early boot stage.
  634. */
  635. set_tsk_thread_flag(&init_task, TIF_NOHZ);
  636. #endif
  637. WARN_ON_ONCE(!tasklist_empty());
  638. initialized = true;
  639. }
  640. #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
  641. void __init context_tracking_init(void)
  642. {
  643. int cpu;
  644. for_each_possible_cpu(cpu)
  645. ct_cpu_track_user(cpu);
  646. }
  647. #endif
  648. #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */