tick-broadcast.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * This file contains functions which emulate a local clock-event
  4. * device via a broadcast event source.
  5. *
  6. * Copyright(C) 2005-2006, Thomas Gleixner <[email protected]>
  7. * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  8. * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  9. */
  10. #include <linux/cpu.h>
  11. #include <linux/err.h>
  12. #include <linux/hrtimer.h>
  13. #include <linux/interrupt.h>
  14. #include <linux/percpu.h>
  15. #include <linux/profile.h>
  16. #include <linux/sched.h>
  17. #include <linux/smp.h>
  18. #include <linux/module.h>
  19. #include "tick-internal.h"
  20. /*
  21. * Broadcast support for broken x86 hardware, where the local apic
  22. * timer stops in C3 state.
  23. */
  24. static struct tick_device tick_broadcast_device;
  25. static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
  26. static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
  27. static cpumask_var_t tmpmask __cpumask_var_read_mostly;
  28. static int tick_broadcast_forced;
  29. static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  30. #ifdef CONFIG_TICK_ONESHOT
  31. static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
  32. static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic);
  33. static void tick_broadcast_clear_oneshot(int cpu);
  34. static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
  35. # ifdef CONFIG_HOTPLUG_CPU
  36. static void tick_broadcast_oneshot_offline(unsigned int cpu);
  37. # endif
  38. #else
  39. static inline void
  40. tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); }
  41. static inline void tick_broadcast_clear_oneshot(int cpu) { }
  42. static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
  43. # ifdef CONFIG_HOTPLUG_CPU
  44. static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
  45. # endif
  46. #endif
  47. /*
  48. * Debugging: see timer_list.c
  49. */
  50. struct tick_device *tick_get_broadcast_device(void)
  51. {
  52. return &tick_broadcast_device;
  53. }
  54. struct cpumask *tick_get_broadcast_mask(void)
  55. {
  56. return tick_broadcast_mask;
  57. }
  58. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
  59. const struct clock_event_device *tick_get_wakeup_device(int cpu)
  60. {
  61. return tick_get_oneshot_wakeup_device(cpu);
  62. }
  63. /*
  64. * Start the device in periodic mode
  65. */
  66. static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  67. {
  68. if (bc)
  69. tick_setup_periodic(bc, 1);
  70. }
  71. /*
  72. * Check, if the device can be utilized as broadcast device:
  73. */
  74. static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  75. struct clock_event_device *newdev)
  76. {
  77. if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  78. (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  79. (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  80. return false;
  81. if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  82. !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  83. return false;
  84. return !curdev || newdev->rating > curdev->rating;
  85. }
  86. #ifdef CONFIG_TICK_ONESHOT
  87. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
  88. {
  89. return per_cpu(tick_oneshot_wakeup_device, cpu);
  90. }
  91. static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
  92. {
  93. /*
  94. * If we woke up early and the tick was reprogrammed in the
  95. * meantime then this may be spurious but harmless.
  96. */
  97. tick_receive_broadcast();
  98. }
  99. static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
  100. int cpu)
  101. {
  102. struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
  103. if (!newdev)
  104. goto set_device;
  105. if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  106. (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  107. return false;
  108. if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  109. !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  110. return false;
  111. if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
  112. return false;
  113. if (curdev && newdev->rating <= curdev->rating)
  114. return false;
  115. if (!try_module_get(newdev->owner))
  116. return false;
  117. newdev->event_handler = tick_oneshot_wakeup_handler;
  118. set_device:
  119. clockevents_exchange_device(curdev, newdev);
  120. per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
  121. return true;
  122. }
  123. #else
  124. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
  125. {
  126. return NULL;
  127. }
  128. static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
  129. int cpu)
  130. {
  131. return false;
  132. }
  133. #endif
  134. /*
  135. * Conditionally install/replace broadcast device
  136. */
  137. void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
  138. {
  139. struct clock_event_device *cur = tick_broadcast_device.evtdev;
  140. if (tick_set_oneshot_wakeup_device(dev, cpu))
  141. return;
  142. if (!tick_check_broadcast_device(cur, dev))
  143. return;
  144. if (!try_module_get(dev->owner))
  145. return;
  146. clockevents_exchange_device(cur, dev);
  147. if (cur)
  148. cur->event_handler = clockevents_handle_noop;
  149. tick_broadcast_device.evtdev = dev;
  150. if (!cpumask_empty(tick_broadcast_mask))
  151. tick_broadcast_start_periodic(dev);
  152. if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
  153. return;
  154. /*
  155. * If the system already runs in oneshot mode, switch the newly
  156. * registered broadcast device to oneshot mode explicitly.
  157. */
  158. if (tick_broadcast_oneshot_active()) {
  159. tick_broadcast_switch_to_oneshot();
  160. return;
  161. }
  162. /*
  163. * Inform all cpus about this. We might be in a situation
  164. * where we did not switch to oneshot mode because the per cpu
  165. * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
  166. * of a oneshot capable broadcast device. Without that
  167. * notification the systems stays stuck in periodic mode
  168. * forever.
  169. */
  170. tick_clock_notify();
  171. }
  172. /*
  173. * Check, if the device is the broadcast device
  174. */
  175. int tick_is_broadcast_device(struct clock_event_device *dev)
  176. {
  177. return (dev && tick_broadcast_device.evtdev == dev);
  178. }
  179. int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
  180. {
  181. int ret = -ENODEV;
  182. if (tick_is_broadcast_device(dev)) {
  183. raw_spin_lock(&tick_broadcast_lock);
  184. ret = __clockevents_update_freq(dev, freq);
  185. raw_spin_unlock(&tick_broadcast_lock);
  186. }
  187. return ret;
  188. }
  189. static void err_broadcast(const struct cpumask *mask)
  190. {
  191. pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
  192. }
  193. static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
  194. {
  195. if (!dev->broadcast)
  196. dev->broadcast = tick_broadcast;
  197. if (!dev->broadcast) {
  198. pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
  199. dev->name);
  200. dev->broadcast = err_broadcast;
  201. }
  202. }
  203. /*
  204. * Check, if the device is dysfunctional and a placeholder, which
  205. * needs to be handled by the broadcast device.
  206. */
  207. int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
  208. {
  209. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  210. unsigned long flags;
  211. int ret = 0;
  212. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  213. /*
  214. * Devices might be registered with both periodic and oneshot
  215. * mode disabled. This signals, that the device needs to be
  216. * operated from the broadcast device and is a placeholder for
  217. * the cpu local device.
  218. */
  219. if (!tick_device_is_functional(dev)) {
  220. dev->event_handler = tick_handle_periodic;
  221. tick_device_setup_broadcast_func(dev);
  222. cpumask_set_cpu(cpu, tick_broadcast_mask);
  223. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  224. tick_broadcast_start_periodic(bc);
  225. else
  226. tick_broadcast_setup_oneshot(bc, false);
  227. ret = 1;
  228. } else {
  229. /*
  230. * Clear the broadcast bit for this cpu if the
  231. * device is not power state affected.
  232. */
  233. if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
  234. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  235. else
  236. tick_device_setup_broadcast_func(dev);
  237. /*
  238. * Clear the broadcast bit if the CPU is not in
  239. * periodic broadcast on state.
  240. */
  241. if (!cpumask_test_cpu(cpu, tick_broadcast_on))
  242. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  243. switch (tick_broadcast_device.mode) {
  244. case TICKDEV_MODE_ONESHOT:
  245. /*
  246. * If the system is in oneshot mode we can
  247. * unconditionally clear the oneshot mask bit,
  248. * because the CPU is running and therefore
  249. * not in an idle state which causes the power
  250. * state affected device to stop. Let the
  251. * caller initialize the device.
  252. */
  253. tick_broadcast_clear_oneshot(cpu);
  254. ret = 0;
  255. break;
  256. case TICKDEV_MODE_PERIODIC:
  257. /*
  258. * If the system is in periodic mode, check
  259. * whether the broadcast device can be
  260. * switched off now.
  261. */
  262. if (cpumask_empty(tick_broadcast_mask) && bc)
  263. clockevents_shutdown(bc);
  264. /*
  265. * If we kept the cpu in the broadcast mask,
  266. * tell the caller to leave the per cpu device
  267. * in shutdown state. The periodic interrupt
  268. * is delivered by the broadcast device, if
  269. * the broadcast device exists and is not
  270. * hrtimer based.
  271. */
  272. if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
  273. ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
  274. break;
  275. default:
  276. break;
  277. }
  278. }
  279. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  280. return ret;
  281. }
  282. int tick_receive_broadcast(void)
  283. {
  284. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  285. struct clock_event_device *evt = td->evtdev;
  286. if (!evt)
  287. return -ENODEV;
  288. if (!evt->event_handler)
  289. return -EINVAL;
  290. evt->event_handler(evt);
  291. return 0;
  292. }
  293. /*
  294. * Broadcast the event to the cpus, which are set in the mask (mangled).
  295. */
  296. static bool tick_do_broadcast(struct cpumask *mask)
  297. {
  298. int cpu = smp_processor_id();
  299. struct tick_device *td;
  300. bool local = false;
  301. /*
  302. * Check, if the current cpu is in the mask
  303. */
  304. if (cpumask_test_cpu(cpu, mask)) {
  305. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  306. cpumask_clear_cpu(cpu, mask);
  307. /*
  308. * We only run the local handler, if the broadcast
  309. * device is not hrtimer based. Otherwise we run into
  310. * a hrtimer recursion.
  311. *
  312. * local timer_interrupt()
  313. * local_handler()
  314. * expire_hrtimers()
  315. * bc_handler()
  316. * local_handler()
  317. * expire_hrtimers()
  318. */
  319. local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
  320. }
  321. if (!cpumask_empty(mask)) {
  322. /*
  323. * It might be necessary to actually check whether the devices
  324. * have different broadcast functions. For now, just use the
  325. * one of the first device. This works as long as we have this
  326. * misfeature only on x86 (lapic)
  327. */
  328. td = &per_cpu(tick_cpu_device, cpumask_first(mask));
  329. td->evtdev->broadcast(mask);
  330. }
  331. return local;
  332. }
  333. /*
  334. * Periodic broadcast:
  335. * - invoke the broadcast handlers
  336. */
  337. static bool tick_do_periodic_broadcast(void)
  338. {
  339. cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
  340. return tick_do_broadcast(tmpmask);
  341. }
  342. /*
  343. * Event handler for periodic broadcast ticks
  344. */
  345. static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
  346. {
  347. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  348. bool bc_local;
  349. raw_spin_lock(&tick_broadcast_lock);
  350. /* Handle spurious interrupts gracefully */
  351. if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
  352. raw_spin_unlock(&tick_broadcast_lock);
  353. return;
  354. }
  355. bc_local = tick_do_periodic_broadcast();
  356. if (clockevent_state_oneshot(dev)) {
  357. ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC);
  358. clockevents_program_event(dev, next, true);
  359. }
  360. raw_spin_unlock(&tick_broadcast_lock);
  361. /*
  362. * We run the handler of the local cpu after dropping
  363. * tick_broadcast_lock because the handler might deadlock when
  364. * trying to switch to oneshot mode.
  365. */
  366. if (bc_local)
  367. td->evtdev->event_handler(td->evtdev);
  368. }
  369. /**
  370. * tick_broadcast_control - Enable/disable or force broadcast mode
  371. * @mode: The selected broadcast mode
  372. *
  373. * Called when the system enters a state where affected tick devices
  374. * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
  375. */
  376. void tick_broadcast_control(enum tick_broadcast_mode mode)
  377. {
  378. struct clock_event_device *bc, *dev;
  379. struct tick_device *td;
  380. int cpu, bc_stopped;
  381. unsigned long flags;
  382. /* Protects also the local clockevent device. */
  383. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  384. td = this_cpu_ptr(&tick_cpu_device);
  385. dev = td->evtdev;
  386. /*
  387. * Is the device not affected by the powerstate ?
  388. */
  389. if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
  390. goto out;
  391. if (!tick_device_is_functional(dev))
  392. goto out;
  393. cpu = smp_processor_id();
  394. bc = tick_broadcast_device.evtdev;
  395. bc_stopped = cpumask_empty(tick_broadcast_mask);
  396. switch (mode) {
  397. case TICK_BROADCAST_FORCE:
  398. tick_broadcast_forced = 1;
  399. fallthrough;
  400. case TICK_BROADCAST_ON:
  401. cpumask_set_cpu(cpu, tick_broadcast_on);
  402. if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
  403. /*
  404. * Only shutdown the cpu local device, if:
  405. *
  406. * - the broadcast device exists
  407. * - the broadcast device is not a hrtimer based one
  408. * - the broadcast device is in periodic mode to
  409. * avoid a hiccup during switch to oneshot mode
  410. */
  411. if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
  412. tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  413. clockevents_shutdown(dev);
  414. }
  415. break;
  416. case TICK_BROADCAST_OFF:
  417. if (tick_broadcast_forced)
  418. break;
  419. cpumask_clear_cpu(cpu, tick_broadcast_on);
  420. if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
  421. if (tick_broadcast_device.mode ==
  422. TICKDEV_MODE_PERIODIC)
  423. tick_setup_periodic(dev, 0);
  424. }
  425. break;
  426. }
  427. if (bc) {
  428. if (cpumask_empty(tick_broadcast_mask)) {
  429. if (!bc_stopped)
  430. clockevents_shutdown(bc);
  431. } else if (bc_stopped) {
  432. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  433. tick_broadcast_start_periodic(bc);
  434. else
  435. tick_broadcast_setup_oneshot(bc, false);
  436. }
  437. }
  438. out:
  439. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  440. }
  441. EXPORT_SYMBOL_GPL(tick_broadcast_control);
  442. /*
  443. * Set the periodic handler depending on broadcast on/off
  444. */
  445. void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
  446. {
  447. if (!broadcast)
  448. dev->event_handler = tick_handle_periodic;
  449. else
  450. dev->event_handler = tick_handle_periodic_broadcast;
  451. }
  452. #ifdef CONFIG_HOTPLUG_CPU
  453. static void tick_shutdown_broadcast(void)
  454. {
  455. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  456. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
  457. if (bc && cpumask_empty(tick_broadcast_mask))
  458. clockevents_shutdown(bc);
  459. }
  460. }
  461. /*
  462. * Remove a CPU from broadcasting
  463. */
  464. void tick_broadcast_offline(unsigned int cpu)
  465. {
  466. raw_spin_lock(&tick_broadcast_lock);
  467. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  468. cpumask_clear_cpu(cpu, tick_broadcast_on);
  469. tick_broadcast_oneshot_offline(cpu);
  470. tick_shutdown_broadcast();
  471. raw_spin_unlock(&tick_broadcast_lock);
  472. }
  473. #endif
  474. void tick_suspend_broadcast(void)
  475. {
  476. struct clock_event_device *bc;
  477. unsigned long flags;
  478. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  479. bc = tick_broadcast_device.evtdev;
  480. if (bc)
  481. clockevents_shutdown(bc);
  482. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  483. }
  484. /*
  485. * This is called from tick_resume_local() on a resuming CPU. That's
  486. * called from the core resume function, tick_unfreeze() and the magic XEN
  487. * resume hackery.
  488. *
  489. * In none of these cases the broadcast device mode can change and the
  490. * bit of the resuming CPU in the broadcast mask is safe as well.
  491. */
  492. bool tick_resume_check_broadcast(void)
  493. {
  494. if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
  495. return false;
  496. else
  497. return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
  498. }
  499. void tick_resume_broadcast(void)
  500. {
  501. struct clock_event_device *bc;
  502. unsigned long flags;
  503. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  504. bc = tick_broadcast_device.evtdev;
  505. if (bc) {
  506. clockevents_tick_resume(bc);
  507. switch (tick_broadcast_device.mode) {
  508. case TICKDEV_MODE_PERIODIC:
  509. if (!cpumask_empty(tick_broadcast_mask))
  510. tick_broadcast_start_periodic(bc);
  511. break;
  512. case TICKDEV_MODE_ONESHOT:
  513. if (!cpumask_empty(tick_broadcast_mask))
  514. tick_resume_broadcast_oneshot(bc);
  515. break;
  516. }
  517. }
  518. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  519. }
  520. #ifdef CONFIG_TICK_ONESHOT
  521. static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
  522. static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
  523. static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
  524. /*
  525. * Exposed for debugging: see timer_list.c
  526. */
  527. struct cpumask *tick_get_broadcast_oneshot_mask(void)
  528. {
  529. return tick_broadcast_oneshot_mask;
  530. }
  531. /*
  532. * Called before going idle with interrupts disabled. Checks whether a
  533. * broadcast event from the other core is about to happen. We detected
  534. * that in tick_broadcast_oneshot_control(). The callsite can use this
  535. * to avoid a deep idle transition as we are about to get the
  536. * broadcast IPI right away.
  537. */
  538. int tick_check_broadcast_expired(void)
  539. {
  540. return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
  541. }
  542. /*
  543. * Set broadcast interrupt affinity
  544. */
  545. static void tick_broadcast_set_affinity(struct clock_event_device *bc,
  546. const struct cpumask *cpumask)
  547. {
  548. if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
  549. return;
  550. if (cpumask_equal(bc->cpumask, cpumask))
  551. return;
  552. bc->cpumask = cpumask;
  553. irq_set_affinity(bc->irq, bc->cpumask);
  554. }
  555. static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
  556. ktime_t expires)
  557. {
  558. if (!clockevent_state_oneshot(bc))
  559. clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
  560. clockevents_program_event(bc, expires, 1);
  561. tick_broadcast_set_affinity(bc, cpumask_of(cpu));
  562. }
  563. static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  564. {
  565. clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
  566. }
  567. /*
  568. * Called from irq_enter() when idle was interrupted to reenable the
  569. * per cpu device.
  570. */
  571. void tick_check_oneshot_broadcast_this_cpu(void)
  572. {
  573. if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
  574. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  575. /*
  576. * We might be in the middle of switching over from
  577. * periodic to oneshot. If the CPU has not yet
  578. * switched over, leave the device alone.
  579. */
  580. if (td->mode == TICKDEV_MODE_ONESHOT) {
  581. clockevents_switch_state(td->evtdev,
  582. CLOCK_EVT_STATE_ONESHOT);
  583. }
  584. }
  585. }
  586. /*
  587. * Handle oneshot mode broadcasting
  588. */
  589. static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
  590. {
  591. struct tick_device *td;
  592. ktime_t now, next_event;
  593. int cpu, next_cpu = 0;
  594. bool bc_local;
  595. raw_spin_lock(&tick_broadcast_lock);
  596. dev->next_event = KTIME_MAX;
  597. next_event = KTIME_MAX;
  598. cpumask_clear(tmpmask);
  599. now = ktime_get();
  600. /* Find all expired events */
  601. for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
  602. /*
  603. * Required for !SMP because for_each_cpu() reports
  604. * unconditionally CPU0 as set on UP kernels.
  605. */
  606. if (!IS_ENABLED(CONFIG_SMP) &&
  607. cpumask_empty(tick_broadcast_oneshot_mask))
  608. break;
  609. td = &per_cpu(tick_cpu_device, cpu);
  610. if (td->evtdev->next_event <= now) {
  611. cpumask_set_cpu(cpu, tmpmask);
  612. /*
  613. * Mark the remote cpu in the pending mask, so
  614. * it can avoid reprogramming the cpu local
  615. * timer in tick_broadcast_oneshot_control().
  616. */
  617. cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
  618. } else if (td->evtdev->next_event < next_event) {
  619. next_event = td->evtdev->next_event;
  620. next_cpu = cpu;
  621. }
  622. }
  623. /*
  624. * Remove the current cpu from the pending mask. The event is
  625. * delivered immediately in tick_do_broadcast() !
  626. */
  627. cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
  628. /* Take care of enforced broadcast requests */
  629. cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
  630. cpumask_clear(tick_broadcast_force_mask);
  631. /*
  632. * Sanity check. Catch the case where we try to broadcast to
  633. * offline cpus.
  634. */
  635. if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
  636. cpumask_and(tmpmask, tmpmask, cpu_online_mask);
  637. /*
  638. * Wakeup the cpus which have an expired event.
  639. */
  640. bc_local = tick_do_broadcast(tmpmask);
  641. /*
  642. * Two reasons for reprogram:
  643. *
  644. * - The global event did not expire any CPU local
  645. * events. This happens in dyntick mode, as the maximum PIT
  646. * delta is quite small.
  647. *
  648. * - There are pending events on sleeping CPUs which were not
  649. * in the event mask
  650. */
  651. if (next_event != KTIME_MAX)
  652. tick_broadcast_set_event(dev, next_cpu, next_event);
  653. raw_spin_unlock(&tick_broadcast_lock);
  654. if (bc_local) {
  655. td = this_cpu_ptr(&tick_cpu_device);
  656. td->evtdev->event_handler(td->evtdev);
  657. }
  658. }
  659. static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
  660. {
  661. if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
  662. return 0;
  663. if (bc->next_event == KTIME_MAX)
  664. return 0;
  665. return bc->bound_on == cpu ? -EBUSY : 0;
  666. }
  667. static void broadcast_shutdown_local(struct clock_event_device *bc,
  668. struct clock_event_device *dev)
  669. {
  670. /*
  671. * For hrtimer based broadcasting we cannot shutdown the cpu
  672. * local device if our own event is the first one to expire or
  673. * if we own the broadcast timer.
  674. */
  675. if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
  676. if (broadcast_needs_cpu(bc, smp_processor_id()))
  677. return;
  678. if (dev->next_event < bc->next_event)
  679. return;
  680. }
  681. clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
  682. }
  683. static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
  684. struct tick_device *td,
  685. int cpu)
  686. {
  687. struct clock_event_device *bc, *dev = td->evtdev;
  688. int ret = 0;
  689. ktime_t now;
  690. raw_spin_lock(&tick_broadcast_lock);
  691. bc = tick_broadcast_device.evtdev;
  692. if (state == TICK_BROADCAST_ENTER) {
  693. /*
  694. * If the current CPU owns the hrtimer broadcast
  695. * mechanism, it cannot go deep idle and we do not add
  696. * the CPU to the broadcast mask. We don't have to go
  697. * through the EXIT path as the local timer is not
  698. * shutdown.
  699. */
  700. ret = broadcast_needs_cpu(bc, cpu);
  701. if (ret)
  702. goto out;
  703. /*
  704. * If the broadcast device is in periodic mode, we
  705. * return.
  706. */
  707. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
  708. /* If it is a hrtimer based broadcast, return busy */
  709. if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
  710. ret = -EBUSY;
  711. goto out;
  712. }
  713. if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
  714. WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
  715. /* Conditionally shut down the local timer. */
  716. broadcast_shutdown_local(bc, dev);
  717. /*
  718. * We only reprogram the broadcast timer if we
  719. * did not mark ourself in the force mask and
  720. * if the cpu local event is earlier than the
  721. * broadcast event. If the current CPU is in
  722. * the force mask, then we are going to be
  723. * woken by the IPI right away; we return
  724. * busy, so the CPU does not try to go deep
  725. * idle.
  726. */
  727. if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
  728. ret = -EBUSY;
  729. } else if (dev->next_event < bc->next_event) {
  730. tick_broadcast_set_event(bc, cpu, dev->next_event);
  731. /*
  732. * In case of hrtimer broadcasts the
  733. * programming might have moved the
  734. * timer to this cpu. If yes, remove
  735. * us from the broadcast mask and
  736. * return busy.
  737. */
  738. ret = broadcast_needs_cpu(bc, cpu);
  739. if (ret) {
  740. cpumask_clear_cpu(cpu,
  741. tick_broadcast_oneshot_mask);
  742. }
  743. }
  744. }
  745. } else {
  746. if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
  747. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  748. /*
  749. * The cpu which was handling the broadcast
  750. * timer marked this cpu in the broadcast
  751. * pending mask and fired the broadcast
  752. * IPI. So we are going to handle the expired
  753. * event anyway via the broadcast IPI
  754. * handler. No need to reprogram the timer
  755. * with an already expired event.
  756. */
  757. if (cpumask_test_and_clear_cpu(cpu,
  758. tick_broadcast_pending_mask))
  759. goto out;
  760. /*
  761. * Bail out if there is no next event.
  762. */
  763. if (dev->next_event == KTIME_MAX)
  764. goto out;
  765. /*
  766. * If the pending bit is not set, then we are
  767. * either the CPU handling the broadcast
  768. * interrupt or we got woken by something else.
  769. *
  770. * We are no longer in the broadcast mask, so
  771. * if the cpu local expiry time is already
  772. * reached, we would reprogram the cpu local
  773. * timer with an already expired event.
  774. *
  775. * This can lead to a ping-pong when we return
  776. * to idle and therefore rearm the broadcast
  777. * timer before the cpu local timer was able
  778. * to fire. This happens because the forced
  779. * reprogramming makes sure that the event
  780. * will happen in the future and depending on
  781. * the min_delta setting this might be far
  782. * enough out that the ping-pong starts.
  783. *
  784. * If the cpu local next_event has expired
  785. * then we know that the broadcast timer
  786. * next_event has expired as well and
  787. * broadcast is about to be handled. So we
  788. * avoid reprogramming and enforce that the
  789. * broadcast handler, which did not run yet,
  790. * will invoke the cpu local handler.
  791. *
  792. * We cannot call the handler directly from
  793. * here, because we might be in a NOHZ phase
  794. * and we did not go through the irq_enter()
  795. * nohz fixups.
  796. */
  797. now = ktime_get();
  798. if (dev->next_event <= now) {
  799. cpumask_set_cpu(cpu, tick_broadcast_force_mask);
  800. goto out;
  801. }
  802. /*
  803. * We got woken by something else. Reprogram
  804. * the cpu local timer device.
  805. */
  806. tick_program_event(dev->next_event, 1);
  807. }
  808. }
  809. out:
  810. raw_spin_unlock(&tick_broadcast_lock);
  811. return ret;
  812. }
  813. static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
  814. struct tick_device *td,
  815. int cpu)
  816. {
  817. struct clock_event_device *dev, *wd;
  818. dev = td->evtdev;
  819. if (td->mode != TICKDEV_MODE_ONESHOT)
  820. return -EINVAL;
  821. wd = tick_get_oneshot_wakeup_device(cpu);
  822. if (!wd)
  823. return -ENODEV;
  824. switch (state) {
  825. case TICK_BROADCAST_ENTER:
  826. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
  827. clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
  828. clockevents_program_event(wd, dev->next_event, 1);
  829. break;
  830. case TICK_BROADCAST_EXIT:
  831. /* We may have transitioned to oneshot mode while idle */
  832. if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
  833. return -ENODEV;
  834. }
  835. return 0;
  836. }
  837. int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
  838. {
  839. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  840. int cpu = smp_processor_id();
  841. if (!tick_oneshot_wakeup_control(state, td, cpu))
  842. return 0;
  843. if (tick_broadcast_device.evtdev)
  844. return ___tick_broadcast_oneshot_control(state, td, cpu);
  845. /*
  846. * If there is no broadcast or wakeup device, tell the caller not
  847. * to go into deep idle.
  848. */
  849. return -EBUSY;
  850. }
  851. /*
  852. * Reset the one shot broadcast for a cpu
  853. *
  854. * Called with tick_broadcast_lock held
  855. */
  856. static void tick_broadcast_clear_oneshot(int cpu)
  857. {
  858. cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
  859. cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
  860. }
  861. static void tick_broadcast_init_next_event(struct cpumask *mask,
  862. ktime_t expires)
  863. {
  864. struct tick_device *td;
  865. int cpu;
  866. for_each_cpu(cpu, mask) {
  867. td = &per_cpu(tick_cpu_device, cpu);
  868. if (td->evtdev)
  869. td->evtdev->next_event = expires;
  870. }
  871. }
  872. static inline ktime_t tick_get_next_period(void)
  873. {
  874. ktime_t next;
  875. /*
  876. * Protect against concurrent updates (store /load tearing on
  877. * 32bit). It does not matter if the time is already in the
  878. * past. The broadcast device which is about to be programmed will
  879. * fire in any case.
  880. */
  881. raw_spin_lock(&jiffies_lock);
  882. next = tick_next_period;
  883. raw_spin_unlock(&jiffies_lock);
  884. return next;
  885. }
  886. /**
  887. * tick_broadcast_setup_oneshot - setup the broadcast device
  888. */
  889. static void tick_broadcast_setup_oneshot(struct clock_event_device *bc,
  890. bool from_periodic)
  891. {
  892. int cpu = smp_processor_id();
  893. ktime_t nexttick = 0;
  894. if (!bc)
  895. return;
  896. /*
  897. * When the broadcast device was switched to oneshot by the first
  898. * CPU handling the NOHZ change, the other CPUs will reach this
  899. * code via hrtimer_run_queues() -> tick_check_oneshot_change()
  900. * too. Set up the broadcast device only once!
  901. */
  902. if (bc->event_handler == tick_handle_oneshot_broadcast) {
  903. /*
  904. * The CPU which switched from periodic to oneshot mode
  905. * set the broadcast oneshot bit for all other CPUs which
  906. * are in the general (periodic) broadcast mask to ensure
  907. * that CPUs which wait for the periodic broadcast are
  908. * woken up.
  909. *
  910. * Clear the bit for the local CPU as the set bit would
  911. * prevent the first tick_broadcast_enter() after this CPU
  912. * switched to oneshot state to program the broadcast
  913. * device.
  914. *
  915. * This code can also be reached via tick_broadcast_control(),
  916. * but this cannot avoid the tick_broadcast_clear_oneshot()
  917. * as that would break the periodic to oneshot transition of
  918. * secondary CPUs. But that's harmless as the below only
  919. * clears already cleared bits.
  920. */
  921. tick_broadcast_clear_oneshot(cpu);
  922. return;
  923. }
  924. bc->event_handler = tick_handle_oneshot_broadcast;
  925. bc->next_event = KTIME_MAX;
  926. /*
  927. * When the tick mode is switched from periodic to oneshot it must
  928. * be ensured that CPUs which are waiting for periodic broadcast
  929. * get their wake-up at the next tick. This is achieved by ORing
  930. * tick_broadcast_mask into tick_broadcast_oneshot_mask.
  931. *
  932. * For other callers, e.g. broadcast device replacement,
  933. * tick_broadcast_oneshot_mask must not be touched as this would
  934. * set bits for CPUs which are already NOHZ, but not idle. Their
  935. * next tick_broadcast_enter() would observe the bit set and fail
  936. * to update the expiry time and the broadcast event device.
  937. */
  938. if (from_periodic) {
  939. cpumask_copy(tmpmask, tick_broadcast_mask);
  940. /* Remove the local CPU as it is obviously not idle */
  941. cpumask_clear_cpu(cpu, tmpmask);
  942. cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask);
  943. /*
  944. * Ensure that the oneshot broadcast handler will wake the
  945. * CPUs which are still waiting for periodic broadcast.
  946. */
  947. nexttick = tick_get_next_period();
  948. tick_broadcast_init_next_event(tmpmask, nexttick);
  949. /*
  950. * If the underlying broadcast clock event device is
  951. * already in oneshot state, then there is nothing to do.
  952. * The device was already armed for the next tick
  953. * in tick_handle_broadcast_periodic()
  954. */
  955. if (clockevent_state_oneshot(bc))
  956. return;
  957. }
  958. /*
  959. * When switching from periodic to oneshot mode arm the broadcast
  960. * device for the next tick.
  961. *
  962. * If the broadcast device has been replaced in oneshot mode and
  963. * the oneshot broadcast mask is not empty, then arm it to expire
  964. * immediately in order to reevaluate the next expiring timer.
  965. * @nexttick is 0 and therefore in the past which will cause the
  966. * clockevent code to force an event.
  967. *
  968. * For both cases the programming can be avoided when the oneshot
  969. * broadcast mask is empty.
  970. *
  971. * tick_broadcast_set_event() implicitly switches the broadcast
  972. * device to oneshot state.
  973. */
  974. if (!cpumask_empty(tick_broadcast_oneshot_mask))
  975. tick_broadcast_set_event(bc, cpu, nexttick);
  976. }
  977. /*
  978. * Select oneshot operating mode for the broadcast device
  979. */
  980. void tick_broadcast_switch_to_oneshot(void)
  981. {
  982. struct clock_event_device *bc;
  983. enum tick_device_mode oldmode;
  984. unsigned long flags;
  985. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  986. oldmode = tick_broadcast_device.mode;
  987. tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
  988. bc = tick_broadcast_device.evtdev;
  989. if (bc)
  990. tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC);
  991. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  992. }
  993. #ifdef CONFIG_HOTPLUG_CPU
  994. void hotplug_cpu__broadcast_tick_pull(int deadcpu)
  995. {
  996. struct clock_event_device *bc;
  997. unsigned long flags;
  998. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  999. bc = tick_broadcast_device.evtdev;
  1000. if (bc && broadcast_needs_cpu(bc, deadcpu)) {
  1001. /* This moves the broadcast assignment to this CPU: */
  1002. clockevents_program_event(bc, bc->next_event, 1);
  1003. }
  1004. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  1005. }
  1006. /*
  1007. * Remove a dying CPU from broadcasting
  1008. */
  1009. static void tick_broadcast_oneshot_offline(unsigned int cpu)
  1010. {
  1011. if (tick_get_oneshot_wakeup_device(cpu))
  1012. tick_set_oneshot_wakeup_device(NULL, cpu);
  1013. /*
  1014. * Clear the broadcast masks for the dead cpu, but do not stop
  1015. * the broadcast device!
  1016. */
  1017. cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
  1018. cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
  1019. cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
  1020. }
  1021. #endif
  1022. /*
  1023. * Check, whether the broadcast device is in one shot mode
  1024. */
  1025. int tick_broadcast_oneshot_active(void)
  1026. {
  1027. return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
  1028. }
  1029. /*
  1030. * Check whether the broadcast device supports oneshot.
  1031. */
  1032. bool tick_broadcast_oneshot_available(void)
  1033. {
  1034. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  1035. return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
  1036. }
  1037. #else
  1038. int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
  1039. {
  1040. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  1041. if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
  1042. return -EBUSY;
  1043. return 0;
  1044. }
  1045. #endif
  1046. void __init tick_broadcast_init(void)
  1047. {
  1048. zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
  1049. zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
  1050. zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
  1051. #ifdef CONFIG_TICK_ONESHOT
  1052. zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
  1053. zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
  1054. zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
  1055. #endif
  1056. }