smp.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * SMP support for ppc.
  4. *
  5. * Written by Cort Dougan ([email protected]) borrowing a great
  6. * deal of code from the sparc and intel versions.
  7. *
  8. * Copyright (C) 1999 Cort Dougan <[email protected]>
  9. *
  10. * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
  11. * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
  12. */
  13. #undef DEBUG
  14. #include <linux/kernel.h>
  15. #include <linux/export.h>
  16. #include <linux/sched/mm.h>
  17. #include <linux/sched/task_stack.h>
  18. #include <linux/sched/topology.h>
  19. #include <linux/smp.h>
  20. #include <linux/interrupt.h>
  21. #include <linux/delay.h>
  22. #include <linux/init.h>
  23. #include <linux/spinlock.h>
  24. #include <linux/cache.h>
  25. #include <linux/err.h>
  26. #include <linux/device.h>
  27. #include <linux/cpu.h>
  28. #include <linux/notifier.h>
  29. #include <linux/topology.h>
  30. #include <linux/profile.h>
  31. #include <linux/processor.h>
  32. #include <linux/random.h>
  33. #include <linux/stackprotector.h>
  34. #include <linux/pgtable.h>
  35. #include <linux/clockchips.h>
  36. #include <linux/kexec.h>
  37. #include <asm/ptrace.h>
  38. #include <linux/atomic.h>
  39. #include <asm/irq.h>
  40. #include <asm/hw_irq.h>
  41. #include <asm/kvm_ppc.h>
  42. #include <asm/dbell.h>
  43. #include <asm/page.h>
  44. #include <asm/smp.h>
  45. #include <asm/time.h>
  46. #include <asm/machdep.h>
  47. #include <asm/cputhreads.h>
  48. #include <asm/cputable.h>
  49. #include <asm/mpic.h>
  50. #include <asm/vdso_datapage.h>
  51. #ifdef CONFIG_PPC64
  52. #include <asm/paca.h>
  53. #endif
  54. #include <asm/vdso.h>
  55. #include <asm/debug.h>
  56. #include <asm/cpu_has_feature.h>
  57. #include <asm/ftrace.h>
  58. #include <asm/kup.h>
  59. #include <asm/fadump.h>
  60. #ifdef DEBUG
  61. #include <asm/udbg.h>
  62. #define DBG(fmt...) udbg_printf(fmt)
  63. #else
  64. #define DBG(fmt...)
  65. #endif
  66. #ifdef CONFIG_HOTPLUG_CPU
  67. /* State of each CPU during hotplug phases */
  68. static DEFINE_PER_CPU(int, cpu_state) = { 0 };
  69. #endif
  70. struct task_struct *secondary_current;
  71. bool has_big_cores;
  72. bool coregroup_enabled;
  73. bool thread_group_shares_l2;
  74. bool thread_group_shares_l3;
  75. DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
  76. DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
  77. DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
  78. DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
  79. static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
  80. EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
  81. EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
  82. EXPORT_PER_CPU_SYMBOL(cpu_core_map);
  83. EXPORT_SYMBOL_GPL(has_big_cores);
  84. enum {
  85. #ifdef CONFIG_SCHED_SMT
  86. smt_idx,
  87. #endif
  88. cache_idx,
  89. mc_idx,
  90. die_idx,
  91. };
  92. #define MAX_THREAD_LIST_SIZE 8
  93. #define THREAD_GROUP_SHARE_L1 1
  94. #define THREAD_GROUP_SHARE_L2_L3 2
  95. struct thread_groups {
  96. unsigned int property;
  97. unsigned int nr_groups;
  98. unsigned int threads_per_group;
  99. unsigned int thread_list[MAX_THREAD_LIST_SIZE];
  100. };
  101. /* Maximum number of properties that groups of threads within a core can share */
  102. #define MAX_THREAD_GROUP_PROPERTIES 2
  103. struct thread_groups_list {
  104. unsigned int nr_properties;
  105. struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
  106. };
  107. static struct thread_groups_list tgl[NR_CPUS] __initdata;
  108. /*
  109. * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
  110. * the set its siblings that share the L1-cache.
  111. */
  112. DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  113. /*
  114. * On some big-cores system, thread_group_l2_cache_map for each CPU
  115. * corresponds to the set its siblings within the core that share the
  116. * L2-cache.
  117. */
  118. DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
  119. /*
  120. * On P10, thread_group_l3_cache_map for each CPU is equal to the
  121. * thread_group_l2_cache_map
  122. */
  123. DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
  124. /* SMP operations for this machine */
  125. struct smp_ops_t *smp_ops;
  126. /* Can't be static due to PowerMac hackery */
  127. volatile unsigned int cpu_callin_map[NR_CPUS];
  128. int smt_enabled_at_boot = 1;
  129. /*
  130. * Returns 1 if the specified cpu should be brought up during boot.
  131. * Used to inhibit booting threads if they've been disabled or
  132. * limited on the command line
  133. */
  134. int smp_generic_cpu_bootable(unsigned int nr)
  135. {
  136. /* Special case - we inhibit secondary thread startup
  137. * during boot if the user requests it.
  138. */
  139. if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
  140. if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
  141. return 0;
  142. if (smt_enabled_at_boot
  143. && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
  144. return 0;
  145. }
  146. return 1;
  147. }
  148. #ifdef CONFIG_PPC64
  149. int smp_generic_kick_cpu(int nr)
  150. {
  151. if (nr < 0 || nr >= nr_cpu_ids)
  152. return -EINVAL;
  153. /*
  154. * The processor is currently spinning, waiting for the
  155. * cpu_start field to become non-zero After we set cpu_start,
  156. * the processor will continue on to secondary_start
  157. */
  158. if (!paca_ptrs[nr]->cpu_start) {
  159. paca_ptrs[nr]->cpu_start = 1;
  160. smp_mb();
  161. return 0;
  162. }
  163. #ifdef CONFIG_HOTPLUG_CPU
  164. /*
  165. * Ok it's not there, so it might be soft-unplugged, let's
  166. * try to bring it back
  167. */
  168. generic_set_cpu_up(nr);
  169. smp_wmb();
  170. smp_send_reschedule(nr);
  171. #endif /* CONFIG_HOTPLUG_CPU */
  172. return 0;
  173. }
  174. #endif /* CONFIG_PPC64 */
  175. static irqreturn_t call_function_action(int irq, void *data)
  176. {
  177. generic_smp_call_function_interrupt();
  178. return IRQ_HANDLED;
  179. }
  180. static irqreturn_t reschedule_action(int irq, void *data)
  181. {
  182. scheduler_ipi();
  183. return IRQ_HANDLED;
  184. }
  185. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  186. static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
  187. {
  188. timer_broadcast_interrupt();
  189. return IRQ_HANDLED;
  190. }
  191. #endif
  192. #ifdef CONFIG_NMI_IPI
  193. static irqreturn_t nmi_ipi_action(int irq, void *data)
  194. {
  195. smp_handle_nmi_ipi(get_irq_regs());
  196. return IRQ_HANDLED;
  197. }
  198. #endif
  199. static irq_handler_t smp_ipi_action[] = {
  200. [PPC_MSG_CALL_FUNCTION] = call_function_action,
  201. [PPC_MSG_RESCHEDULE] = reschedule_action,
  202. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  203. [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
  204. #endif
  205. #ifdef CONFIG_NMI_IPI
  206. [PPC_MSG_NMI_IPI] = nmi_ipi_action,
  207. #endif
  208. };
  209. /*
  210. * The NMI IPI is a fallback and not truly non-maskable. It is simpler
  211. * than going through the call function infrastructure, and strongly
  212. * serialized, so it is more appropriate for debugging.
  213. */
  214. const char *smp_ipi_name[] = {
  215. [PPC_MSG_CALL_FUNCTION] = "ipi call function",
  216. [PPC_MSG_RESCHEDULE] = "ipi reschedule",
  217. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  218. [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
  219. #endif
  220. #ifdef CONFIG_NMI_IPI
  221. [PPC_MSG_NMI_IPI] = "nmi ipi",
  222. #endif
  223. };
  224. /* optional function to request ipi, for controllers with >= 4 ipis */
  225. int smp_request_message_ipi(int virq, int msg)
  226. {
  227. int err;
  228. if (msg < 0 || msg > PPC_MSG_NMI_IPI)
  229. return -EINVAL;
  230. #ifndef CONFIG_NMI_IPI
  231. if (msg == PPC_MSG_NMI_IPI)
  232. return 1;
  233. #endif
  234. err = request_irq(virq, smp_ipi_action[msg],
  235. IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
  236. smp_ipi_name[msg], NULL);
  237. WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
  238. virq, smp_ipi_name[msg], err);
  239. return err;
  240. }
  241. #ifdef CONFIG_PPC_SMP_MUXED_IPI
  242. struct cpu_messages {
  243. long messages; /* current messages */
  244. };
  245. static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
  246. void smp_muxed_ipi_set_message(int cpu, int msg)
  247. {
  248. struct cpu_messages *info = &per_cpu(ipi_message, cpu);
  249. char *message = (char *)&info->messages;
  250. /*
  251. * Order previous accesses before accesses in the IPI handler.
  252. */
  253. smp_mb();
  254. message[msg] = 1;
  255. }
  256. void smp_muxed_ipi_message_pass(int cpu, int msg)
  257. {
  258. smp_muxed_ipi_set_message(cpu, msg);
  259. /*
  260. * cause_ipi functions are required to include a full barrier
  261. * before doing whatever causes the IPI.
  262. */
  263. smp_ops->cause_ipi(cpu);
  264. }
  265. #ifdef __BIG_ENDIAN__
  266. #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
  267. #else
  268. #define IPI_MESSAGE(A) (1uL << (8 * (A)))
  269. #endif
  270. irqreturn_t smp_ipi_demux(void)
  271. {
  272. mb(); /* order any irq clear */
  273. return smp_ipi_demux_relaxed();
  274. }
  275. /* sync-free variant. Callers should ensure synchronization */
  276. irqreturn_t smp_ipi_demux_relaxed(void)
  277. {
  278. struct cpu_messages *info;
  279. unsigned long all;
  280. info = this_cpu_ptr(&ipi_message);
  281. do {
  282. all = xchg(&info->messages, 0);
  283. #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
  284. /*
  285. * Must check for PPC_MSG_RM_HOST_ACTION messages
  286. * before PPC_MSG_CALL_FUNCTION messages because when
  287. * a VM is destroyed, we call kick_all_cpus_sync()
  288. * to ensure that any pending PPC_MSG_RM_HOST_ACTION
  289. * messages have completed before we free any VCPUs.
  290. */
  291. if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
  292. kvmppc_xics_ipi_action();
  293. #endif
  294. if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
  295. generic_smp_call_function_interrupt();
  296. if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
  297. scheduler_ipi();
  298. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  299. if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
  300. timer_broadcast_interrupt();
  301. #endif
  302. #ifdef CONFIG_NMI_IPI
  303. if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
  304. nmi_ipi_action(0, NULL);
  305. #endif
  306. } while (info->messages);
  307. return IRQ_HANDLED;
  308. }
  309. #endif /* CONFIG_PPC_SMP_MUXED_IPI */
  310. static inline void do_message_pass(int cpu, int msg)
  311. {
  312. if (smp_ops->message_pass)
  313. smp_ops->message_pass(cpu, msg);
  314. #ifdef CONFIG_PPC_SMP_MUXED_IPI
  315. else
  316. smp_muxed_ipi_message_pass(cpu, msg);
  317. #endif
  318. }
  319. void smp_send_reschedule(int cpu)
  320. {
  321. if (likely(smp_ops))
  322. do_message_pass(cpu, PPC_MSG_RESCHEDULE);
  323. }
  324. EXPORT_SYMBOL_GPL(smp_send_reschedule);
  325. void arch_send_call_function_single_ipi(int cpu)
  326. {
  327. do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
  328. }
  329. void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  330. {
  331. unsigned int cpu;
  332. for_each_cpu(cpu, mask)
  333. do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
  334. }
  335. #ifdef CONFIG_NMI_IPI
  336. /*
  337. * "NMI IPI" system.
  338. *
  339. * NMI IPIs may not be recoverable, so should not be used as ongoing part of
  340. * a running system. They can be used for crash, debug, halt/reboot, etc.
  341. *
  342. * The IPI call waits with interrupts disabled until all targets enter the
  343. * NMI handler, then returns. Subsequent IPIs can be issued before targets
  344. * have returned from their handlers, so there is no guarantee about
  345. * concurrency or re-entrancy.
  346. *
  347. * A new NMI can be issued before all targets exit the handler.
  348. *
  349. * The IPI call may time out without all targets entering the NMI handler.
  350. * In that case, there is some logic to recover (and ignore subsequent
  351. * NMI interrupts that may eventually be raised), but the platform interrupt
  352. * handler may not be able to distinguish this from other exception causes,
  353. * which may cause a crash.
  354. */
  355. static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
  356. static struct cpumask nmi_ipi_pending_mask;
  357. static bool nmi_ipi_busy = false;
  358. static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
  359. noinstr static void nmi_ipi_lock_start(unsigned long *flags)
  360. {
  361. raw_local_irq_save(*flags);
  362. hard_irq_disable();
  363. while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
  364. raw_local_irq_restore(*flags);
  365. spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
  366. raw_local_irq_save(*flags);
  367. hard_irq_disable();
  368. }
  369. }
  370. noinstr static void nmi_ipi_lock(void)
  371. {
  372. while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
  373. spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
  374. }
  375. noinstr static void nmi_ipi_unlock(void)
  376. {
  377. smp_mb();
  378. WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
  379. arch_atomic_set(&__nmi_ipi_lock, 0);
  380. }
  381. noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
  382. {
  383. nmi_ipi_unlock();
  384. raw_local_irq_restore(*flags);
  385. }
  386. /*
  387. * Platform NMI handler calls this to ack
  388. */
  389. noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
  390. {
  391. void (*fn)(struct pt_regs *) = NULL;
  392. unsigned long flags;
  393. int me = raw_smp_processor_id();
  394. int ret = 0;
  395. /*
  396. * Unexpected NMIs are possible here because the interrupt may not
  397. * be able to distinguish NMI IPIs from other types of NMIs, or
  398. * because the caller may have timed out.
  399. */
  400. nmi_ipi_lock_start(&flags);
  401. if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
  402. cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
  403. fn = READ_ONCE(nmi_ipi_function);
  404. WARN_ON_ONCE(!fn);
  405. ret = 1;
  406. }
  407. nmi_ipi_unlock_end(&flags);
  408. if (fn)
  409. fn(regs);
  410. return ret;
  411. }
  412. static void do_smp_send_nmi_ipi(int cpu, bool safe)
  413. {
  414. if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
  415. return;
  416. if (cpu >= 0) {
  417. do_message_pass(cpu, PPC_MSG_NMI_IPI);
  418. } else {
  419. int c;
  420. for_each_online_cpu(c) {
  421. if (c == raw_smp_processor_id())
  422. continue;
  423. do_message_pass(c, PPC_MSG_NMI_IPI);
  424. }
  425. }
  426. }
  427. /*
  428. * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
  429. * - fn is the target callback function.
  430. * - delay_us > 0 is the delay before giving up waiting for targets to
  431. * begin executing the handler, == 0 specifies indefinite delay.
  432. */
  433. static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
  434. u64 delay_us, bool safe)
  435. {
  436. unsigned long flags;
  437. int me = raw_smp_processor_id();
  438. int ret = 1;
  439. BUG_ON(cpu == me);
  440. BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
  441. if (unlikely(!smp_ops))
  442. return 0;
  443. nmi_ipi_lock_start(&flags);
  444. while (nmi_ipi_busy) {
  445. nmi_ipi_unlock_end(&flags);
  446. spin_until_cond(!nmi_ipi_busy);
  447. nmi_ipi_lock_start(&flags);
  448. }
  449. nmi_ipi_busy = true;
  450. nmi_ipi_function = fn;
  451. WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
  452. if (cpu < 0) {
  453. /* ALL_OTHERS */
  454. cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
  455. cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
  456. } else {
  457. cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
  458. }
  459. nmi_ipi_unlock();
  460. /* Interrupts remain hard disabled */
  461. do_smp_send_nmi_ipi(cpu, safe);
  462. nmi_ipi_lock();
  463. /* nmi_ipi_busy is set here, so unlock/lock is okay */
  464. while (!cpumask_empty(&nmi_ipi_pending_mask)) {
  465. nmi_ipi_unlock();
  466. udelay(1);
  467. nmi_ipi_lock();
  468. if (delay_us) {
  469. delay_us--;
  470. if (!delay_us)
  471. break;
  472. }
  473. }
  474. if (!cpumask_empty(&nmi_ipi_pending_mask)) {
  475. /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
  476. ret = 0;
  477. cpumask_clear(&nmi_ipi_pending_mask);
  478. }
  479. nmi_ipi_function = NULL;
  480. nmi_ipi_busy = false;
  481. nmi_ipi_unlock_end(&flags);
  482. return ret;
  483. }
  484. int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
  485. {
  486. return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
  487. }
  488. int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
  489. {
  490. return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
  491. }
  492. #endif /* CONFIG_NMI_IPI */
  493. #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
  494. void tick_broadcast(const struct cpumask *mask)
  495. {
  496. unsigned int cpu;
  497. for_each_cpu(cpu, mask)
  498. do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
  499. }
  500. #endif
  501. #ifdef CONFIG_DEBUGGER
  502. static void debugger_ipi_callback(struct pt_regs *regs)
  503. {
  504. debugger_ipi(regs);
  505. }
  506. void smp_send_debugger_break(void)
  507. {
  508. smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
  509. }
  510. #endif
  511. #ifdef CONFIG_KEXEC_CORE
  512. void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
  513. {
  514. int cpu;
  515. smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
  516. if (kdump_in_progress() && crash_wake_offline) {
  517. for_each_present_cpu(cpu) {
  518. if (cpu_online(cpu))
  519. continue;
  520. /*
  521. * crash_ipi_callback will wait for
  522. * all cpus, including offline CPUs.
  523. * We don't care about nmi_ipi_function.
  524. * Offline cpus will jump straight into
  525. * crash_ipi_callback, we can skip the
  526. * entire NMI dance and waiting for
  527. * cpus to clear pending mask, etc.
  528. */
  529. do_smp_send_nmi_ipi(cpu, false);
  530. }
  531. }
  532. }
  533. #endif
  534. void crash_smp_send_stop(void)
  535. {
  536. static bool stopped = false;
  537. /*
  538. * In case of fadump, register data for all CPUs is captured by f/w
  539. * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
  540. * this rtas call to avoid tricky post processing of those CPUs'
  541. * backtraces.
  542. */
  543. if (should_fadump_crash())
  544. return;
  545. if (stopped)
  546. return;
  547. stopped = true;
  548. #ifdef CONFIG_KEXEC_CORE
  549. if (kexec_crash_image) {
  550. crash_kexec_prepare();
  551. return;
  552. }
  553. #endif
  554. smp_send_stop();
  555. }
  556. #ifdef CONFIG_NMI_IPI
  557. static void nmi_stop_this_cpu(struct pt_regs *regs)
  558. {
  559. /*
  560. * IRQs are already hard disabled by the smp_handle_nmi_ipi.
  561. */
  562. set_cpu_online(smp_processor_id(), false);
  563. spin_begin();
  564. while (1)
  565. spin_cpu_relax();
  566. }
  567. void smp_send_stop(void)
  568. {
  569. smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
  570. }
  571. #else /* CONFIG_NMI_IPI */
  572. static void stop_this_cpu(void *dummy)
  573. {
  574. hard_irq_disable();
  575. /*
  576. * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
  577. * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
  578. * to know other CPUs are offline before it breaks locks to flush
  579. * printk buffers, in case we panic()ed while holding the lock.
  580. */
  581. set_cpu_online(smp_processor_id(), false);
  582. spin_begin();
  583. while (1)
  584. spin_cpu_relax();
  585. }
  586. void smp_send_stop(void)
  587. {
  588. static bool stopped = false;
  589. /*
  590. * Prevent waiting on csd lock from a previous smp_send_stop.
  591. * This is racy, but in general callers try to do the right
  592. * thing and only fire off one smp_send_stop (e.g., see
  593. * kernel/panic.c)
  594. */
  595. if (stopped)
  596. return;
  597. stopped = true;
  598. smp_call_function(stop_this_cpu, NULL, 0);
  599. }
  600. #endif /* CONFIG_NMI_IPI */
  601. static struct task_struct *current_set[NR_CPUS];
  602. static void smp_store_cpu_info(int id)
  603. {
  604. per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
  605. #ifdef CONFIG_PPC_E500
  606. per_cpu(next_tlbcam_idx, id)
  607. = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
  608. #endif
  609. }
  610. /*
  611. * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
  612. * rather than just passing around the cpumask we pass around a function that
  613. * returns the that cpumask for the given CPU.
  614. */
  615. static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
  616. {
  617. cpumask_set_cpu(i, get_cpumask(j));
  618. cpumask_set_cpu(j, get_cpumask(i));
  619. }
  620. #ifdef CONFIG_HOTPLUG_CPU
  621. static void set_cpus_unrelated(int i, int j,
  622. struct cpumask *(*get_cpumask)(int))
  623. {
  624. cpumask_clear_cpu(i, get_cpumask(j));
  625. cpumask_clear_cpu(j, get_cpumask(i));
  626. }
  627. #endif
  628. /*
  629. * Extends set_cpus_related. Instead of setting one CPU at a time in
  630. * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
  631. */
  632. static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
  633. struct cpumask *(*dstmask)(int))
  634. {
  635. struct cpumask *mask;
  636. int k;
  637. mask = srcmask(j);
  638. for_each_cpu(k, srcmask(i))
  639. cpumask_or(dstmask(k), dstmask(k), mask);
  640. if (i == j)
  641. return;
  642. mask = srcmask(i);
  643. for_each_cpu(k, srcmask(j))
  644. cpumask_or(dstmask(k), dstmask(k), mask);
  645. }
  646. /*
  647. * parse_thread_groups: Parses the "ibm,thread-groups" device tree
  648. * property for the CPU device node @dn and stores
  649. * the parsed output in the thread_groups_list
  650. * structure @tglp.
  651. *
  652. * @dn: The device node of the CPU device.
  653. * @tglp: Pointer to a thread group list structure into which the parsed
  654. * output of "ibm,thread-groups" is stored.
  655. *
  656. * ibm,thread-groups[0..N-1] array defines which group of threads in
  657. * the CPU-device node can be grouped together based on the property.
  658. *
  659. * This array can represent thread groupings for multiple properties.
  660. *
  661. * ibm,thread-groups[i + 0] tells us the property based on which the
  662. * threads are being grouped together. If this value is 1, it implies
  663. * that the threads in the same group share L1, translation cache. If
  664. * the value is 2, it implies that the threads in the same group share
  665. * the same L2 cache.
  666. *
  667. * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
  668. * property ibm,thread-groups[i]
  669. *
  670. * ibm,thread-groups[i+2] tells us the number of threads in each such
  671. * group.
  672. * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
  673. *
  674. * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
  675. * "ibm,ppc-interrupt-server#s" arranged as per their membership in
  676. * the grouping.
  677. *
  678. * Example:
  679. * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
  680. * This can be decomposed up into two consecutive arrays:
  681. * a) [1,2,4,8,10,12,14,9,11,13,15]
  682. * b) [2,2,4,8,10,12,14,9,11,13,15]
  683. *
  684. * where in,
  685. *
  686. * a) provides information of Property "1" being shared by "2" groups,
  687. * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
  688. * the first group is {8,10,12,14} and the
  689. * "ibm,ppc-interrupt-server#s" of the second group is
  690. * {9,11,13,15}. Property "1" is indicative of the thread in the
  691. * group sharing L1 cache, translation cache and Instruction Data
  692. * flow.
  693. *
  694. * b) provides information of Property "2" being shared by "2" groups,
  695. * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
  696. * the first group is {8,10,12,14} and the
  697. * "ibm,ppc-interrupt-server#s" of the second group is
  698. * {9,11,13,15}. Property "2" indicates that the threads in each
  699. * group share the L2-cache.
  700. *
  701. * Returns 0 on success, -EINVAL if the property does not exist,
  702. * -ENODATA if property does not have a value, and -EOVERFLOW if the
  703. * property data isn't large enough.
  704. */
  705. static int parse_thread_groups(struct device_node *dn,
  706. struct thread_groups_list *tglp)
  707. {
  708. unsigned int property_idx = 0;
  709. u32 *thread_group_array;
  710. size_t total_threads;
  711. int ret = 0, count;
  712. u32 *thread_list;
  713. int i = 0;
  714. count = of_property_count_u32_elems(dn, "ibm,thread-groups");
  715. thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
  716. ret = of_property_read_u32_array(dn, "ibm,thread-groups",
  717. thread_group_array, count);
  718. if (ret)
  719. goto out_free;
  720. while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
  721. int j;
  722. struct thread_groups *tg = &tglp->property_tgs[property_idx++];
  723. tg->property = thread_group_array[i];
  724. tg->nr_groups = thread_group_array[i + 1];
  725. tg->threads_per_group = thread_group_array[i + 2];
  726. total_threads = tg->nr_groups * tg->threads_per_group;
  727. thread_list = &thread_group_array[i + 3];
  728. for (j = 0; j < total_threads; j++)
  729. tg->thread_list[j] = thread_list[j];
  730. i = i + 3 + total_threads;
  731. }
  732. tglp->nr_properties = property_idx;
  733. out_free:
  734. kfree(thread_group_array);
  735. return ret;
  736. }
  737. /*
  738. * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
  739. * that @cpu belongs to.
  740. *
  741. * @cpu : The logical CPU whose thread group is being searched.
  742. * @tg : The thread-group structure of the CPU node which @cpu belongs
  743. * to.
  744. *
  745. * Returns the index to tg->thread_list that points to the start
  746. * of the thread_group that @cpu belongs to.
  747. *
  748. * Returns -1 if cpu doesn't belong to any of the groups pointed to by
  749. * tg->thread_list.
  750. */
  751. static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
  752. {
  753. int hw_cpu_id = get_hard_smp_processor_id(cpu);
  754. int i, j;
  755. for (i = 0; i < tg->nr_groups; i++) {
  756. int group_start = i * tg->threads_per_group;
  757. for (j = 0; j < tg->threads_per_group; j++) {
  758. int idx = group_start + j;
  759. if (tg->thread_list[idx] == hw_cpu_id)
  760. return group_start;
  761. }
  762. }
  763. return -1;
  764. }
  765. static struct thread_groups *__init get_thread_groups(int cpu,
  766. int group_property,
  767. int *err)
  768. {
  769. struct device_node *dn = of_get_cpu_node(cpu, NULL);
  770. struct thread_groups_list *cpu_tgl = &tgl[cpu];
  771. struct thread_groups *tg = NULL;
  772. int i;
  773. *err = 0;
  774. if (!dn) {
  775. *err = -ENODATA;
  776. return NULL;
  777. }
  778. if (!cpu_tgl->nr_properties) {
  779. *err = parse_thread_groups(dn, cpu_tgl);
  780. if (*err)
  781. goto out;
  782. }
  783. for (i = 0; i < cpu_tgl->nr_properties; i++) {
  784. if (cpu_tgl->property_tgs[i].property == group_property) {
  785. tg = &cpu_tgl->property_tgs[i];
  786. break;
  787. }
  788. }
  789. if (!tg)
  790. *err = -EINVAL;
  791. out:
  792. of_node_put(dn);
  793. return tg;
  794. }
  795. static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
  796. int cpu, int cpu_group_start)
  797. {
  798. int first_thread = cpu_first_thread_sibling(cpu);
  799. int i;
  800. zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
  801. for (i = first_thread; i < first_thread + threads_per_core; i++) {
  802. int i_group_start = get_cpu_thread_group_start(i, tg);
  803. if (unlikely(i_group_start == -1)) {
  804. WARN_ON_ONCE(1);
  805. return -ENODATA;
  806. }
  807. if (i_group_start == cpu_group_start)
  808. cpumask_set_cpu(i, *mask);
  809. }
  810. return 0;
  811. }
  812. static int __init init_thread_group_cache_map(int cpu, int cache_property)
  813. {
  814. int cpu_group_start = -1, err = 0;
  815. struct thread_groups *tg = NULL;
  816. cpumask_var_t *mask = NULL;
  817. if (cache_property != THREAD_GROUP_SHARE_L1 &&
  818. cache_property != THREAD_GROUP_SHARE_L2_L3)
  819. return -EINVAL;
  820. tg = get_thread_groups(cpu, cache_property, &err);
  821. if (!tg)
  822. return err;
  823. cpu_group_start = get_cpu_thread_group_start(cpu, tg);
  824. if (unlikely(cpu_group_start == -1)) {
  825. WARN_ON_ONCE(1);
  826. return -ENODATA;
  827. }
  828. if (cache_property == THREAD_GROUP_SHARE_L1) {
  829. mask = &per_cpu(thread_group_l1_cache_map, cpu);
  830. update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
  831. }
  832. else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
  833. mask = &per_cpu(thread_group_l2_cache_map, cpu);
  834. update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
  835. mask = &per_cpu(thread_group_l3_cache_map, cpu);
  836. update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
  837. }
  838. return 0;
  839. }
  840. static bool shared_caches;
  841. #ifdef CONFIG_SCHED_SMT
  842. /* cpumask of CPUs with asymmetric SMT dependency */
  843. static int powerpc_smt_flags(void)
  844. {
  845. int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
  846. if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
  847. printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
  848. flags |= SD_ASYM_PACKING;
  849. }
  850. return flags;
  851. }
  852. #endif
  853. /*
  854. * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
  855. * This topology makes it *much* cheaper to migrate tasks between adjacent cores
  856. * since the migrated task remains cache hot. We want to take advantage of this
  857. * at the scheduler level so an extra topology level is required.
  858. */
  859. static int powerpc_shared_cache_flags(void)
  860. {
  861. return SD_SHARE_PKG_RESOURCES;
  862. }
  863. /*
  864. * We can't just pass cpu_l2_cache_mask() directly because
  865. * returns a non-const pointer and the compiler barfs on that.
  866. */
  867. static const struct cpumask *shared_cache_mask(int cpu)
  868. {
  869. return per_cpu(cpu_l2_cache_map, cpu);
  870. }
  871. #ifdef CONFIG_SCHED_SMT
  872. static const struct cpumask *smallcore_smt_mask(int cpu)
  873. {
  874. return cpu_smallcore_mask(cpu);
  875. }
  876. #endif
  877. static struct cpumask *cpu_coregroup_mask(int cpu)
  878. {
  879. return per_cpu(cpu_coregroup_map, cpu);
  880. }
  881. static bool has_coregroup_support(void)
  882. {
  883. return coregroup_enabled;
  884. }
  885. static const struct cpumask *cpu_mc_mask(int cpu)
  886. {
  887. return cpu_coregroup_mask(cpu);
  888. }
  889. static struct sched_domain_topology_level powerpc_topology[] = {
  890. #ifdef CONFIG_SCHED_SMT
  891. { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
  892. #endif
  893. { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
  894. { cpu_mc_mask, SD_INIT_NAME(MC) },
  895. { cpu_cpu_mask, SD_INIT_NAME(DIE) },
  896. { NULL, },
  897. };
  898. static int __init init_big_cores(void)
  899. {
  900. int cpu;
  901. for_each_possible_cpu(cpu) {
  902. int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
  903. if (err)
  904. return err;
  905. zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
  906. GFP_KERNEL,
  907. cpu_to_node(cpu));
  908. }
  909. has_big_cores = true;
  910. for_each_possible_cpu(cpu) {
  911. int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
  912. if (err)
  913. return err;
  914. }
  915. thread_group_shares_l2 = true;
  916. thread_group_shares_l3 = true;
  917. pr_debug("L2/L3 cache only shared by the threads in the small core\n");
  918. return 0;
  919. }
  920. void __init smp_prepare_cpus(unsigned int max_cpus)
  921. {
  922. unsigned int cpu;
  923. DBG("smp_prepare_cpus\n");
  924. /*
  925. * setup_cpu may need to be called on the boot cpu. We haven't
  926. * spun any cpus up but lets be paranoid.
  927. */
  928. BUG_ON(boot_cpuid != smp_processor_id());
  929. /* Fixup boot cpu */
  930. smp_store_cpu_info(boot_cpuid);
  931. cpu_callin_map[boot_cpuid] = 1;
  932. for_each_possible_cpu(cpu) {
  933. zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
  934. GFP_KERNEL, cpu_to_node(cpu));
  935. zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
  936. GFP_KERNEL, cpu_to_node(cpu));
  937. zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
  938. GFP_KERNEL, cpu_to_node(cpu));
  939. if (has_coregroup_support())
  940. zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
  941. GFP_KERNEL, cpu_to_node(cpu));
  942. #ifdef CONFIG_NUMA
  943. /*
  944. * numa_node_id() works after this.
  945. */
  946. if (cpu_present(cpu)) {
  947. set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
  948. set_cpu_numa_mem(cpu,
  949. local_memory_node(numa_cpu_lookup_table[cpu]));
  950. }
  951. #endif
  952. }
  953. /* Init the cpumasks so the boot CPU is related to itself */
  954. cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
  955. cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
  956. cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
  957. if (has_coregroup_support())
  958. cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
  959. init_big_cores();
  960. if (has_big_cores) {
  961. cpumask_set_cpu(boot_cpuid,
  962. cpu_smallcore_mask(boot_cpuid));
  963. }
  964. if (cpu_to_chip_id(boot_cpuid) != -1) {
  965. int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
  966. /*
  967. * All threads of a core will all belong to the same core,
  968. * chip_id_lookup_table will have one entry per core.
  969. * Assumption: if boot_cpuid doesn't have a chip-id, then no
  970. * other CPUs, will also not have chip-id.
  971. */
  972. chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
  973. if (chip_id_lookup_table)
  974. memset(chip_id_lookup_table, -1, sizeof(int) * idx);
  975. }
  976. if (smp_ops && smp_ops->probe)
  977. smp_ops->probe();
  978. }
  979. void smp_prepare_boot_cpu(void)
  980. {
  981. BUG_ON(smp_processor_id() != boot_cpuid);
  982. #ifdef CONFIG_PPC64
  983. paca_ptrs[boot_cpuid]->__current = current;
  984. #endif
  985. set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
  986. current_set[boot_cpuid] = current;
  987. }
  988. #ifdef CONFIG_HOTPLUG_CPU
  989. int generic_cpu_disable(void)
  990. {
  991. unsigned int cpu = smp_processor_id();
  992. if (cpu == boot_cpuid)
  993. return -EBUSY;
  994. set_cpu_online(cpu, false);
  995. #ifdef CONFIG_PPC64
  996. vdso_data->processorCount--;
  997. #endif
  998. /* Update affinity of all IRQs previously aimed at this CPU */
  999. irq_migrate_all_off_this_cpu();
  1000. /*
  1001. * Depending on the details of the interrupt controller, it's possible
  1002. * that one of the interrupts we just migrated away from this CPU is
  1003. * actually already pending on this CPU. If we leave it in that state
  1004. * the interrupt will never be EOI'ed, and will never fire again. So
  1005. * temporarily enable interrupts here, to allow any pending interrupt to
  1006. * be received (and EOI'ed), before we take this CPU offline.
  1007. */
  1008. local_irq_enable();
  1009. mdelay(1);
  1010. local_irq_disable();
  1011. return 0;
  1012. }
  1013. void generic_cpu_die(unsigned int cpu)
  1014. {
  1015. int i;
  1016. for (i = 0; i < 100; i++) {
  1017. smp_rmb();
  1018. if (is_cpu_dead(cpu))
  1019. return;
  1020. msleep(100);
  1021. }
  1022. printk(KERN_ERR "CPU%d didn't die...\n", cpu);
  1023. }
  1024. void generic_set_cpu_dead(unsigned int cpu)
  1025. {
  1026. per_cpu(cpu_state, cpu) = CPU_DEAD;
  1027. }
  1028. /*
  1029. * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
  1030. * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
  1031. * which makes the delay in generic_cpu_die() not happen.
  1032. */
  1033. void generic_set_cpu_up(unsigned int cpu)
  1034. {
  1035. per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
  1036. }
  1037. int generic_check_cpu_restart(unsigned int cpu)
  1038. {
  1039. return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
  1040. }
  1041. int is_cpu_dead(unsigned int cpu)
  1042. {
  1043. return per_cpu(cpu_state, cpu) == CPU_DEAD;
  1044. }
  1045. static bool secondaries_inhibited(void)
  1046. {
  1047. return kvm_hv_mode_active();
  1048. }
  1049. #else /* HOTPLUG_CPU */
  1050. #define secondaries_inhibited() 0
  1051. #endif
  1052. static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
  1053. {
  1054. #ifdef CONFIG_PPC64
  1055. paca_ptrs[cpu]->__current = idle;
  1056. paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
  1057. THREAD_SIZE - STACK_FRAME_OVERHEAD;
  1058. #endif
  1059. task_thread_info(idle)->cpu = cpu;
  1060. secondary_current = current_set[cpu] = idle;
  1061. }
  1062. int __cpu_up(unsigned int cpu, struct task_struct *tidle)
  1063. {
  1064. const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
  1065. const bool booting = system_state < SYSTEM_RUNNING;
  1066. const unsigned long hp_spin_ms = 1;
  1067. unsigned long deadline;
  1068. int rc;
  1069. const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
  1070. /*
  1071. * Don't allow secondary threads to come online if inhibited
  1072. */
  1073. if (threads_per_core > 1 && secondaries_inhibited() &&
  1074. cpu_thread_in_subcore(cpu))
  1075. return -EBUSY;
  1076. if (smp_ops == NULL ||
  1077. (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
  1078. return -EINVAL;
  1079. cpu_idle_thread_init(cpu, tidle);
  1080. /*
  1081. * The platform might need to allocate resources prior to bringing
  1082. * up the CPU
  1083. */
  1084. if (smp_ops->prepare_cpu) {
  1085. rc = smp_ops->prepare_cpu(cpu);
  1086. if (rc)
  1087. return rc;
  1088. }
  1089. /* Make sure callin-map entry is 0 (can be leftover a CPU
  1090. * hotplug
  1091. */
  1092. cpu_callin_map[cpu] = 0;
  1093. /* The information for processor bringup must
  1094. * be written out to main store before we release
  1095. * the processor.
  1096. */
  1097. smp_mb();
  1098. /* wake up cpus */
  1099. DBG("smp: kicking cpu %d\n", cpu);
  1100. rc = smp_ops->kick_cpu(cpu);
  1101. if (rc) {
  1102. pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
  1103. return rc;
  1104. }
  1105. /*
  1106. * At boot time, simply spin on the callin word until the
  1107. * deadline passes.
  1108. *
  1109. * At run time, spin for an optimistic amount of time to avoid
  1110. * sleeping in the common case.
  1111. */
  1112. deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
  1113. spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
  1114. if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
  1115. const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
  1116. const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
  1117. deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
  1118. while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
  1119. fsleep(sleep_interval_us);
  1120. }
  1121. if (!cpu_callin_map[cpu]) {
  1122. printk(KERN_ERR "Processor %u is stuck.\n", cpu);
  1123. return -ENOENT;
  1124. }
  1125. DBG("Processor %u found.\n", cpu);
  1126. if (smp_ops->give_timebase)
  1127. smp_ops->give_timebase();
  1128. /* Wait until cpu puts itself in the online & active maps */
  1129. spin_until_cond(cpu_online(cpu));
  1130. return 0;
  1131. }
  1132. /* Return the value of the reg property corresponding to the given
  1133. * logical cpu.
  1134. */
  1135. int cpu_to_core_id(int cpu)
  1136. {
  1137. struct device_node *np;
  1138. int id = -1;
  1139. np = of_get_cpu_node(cpu, NULL);
  1140. if (!np)
  1141. goto out;
  1142. id = of_get_cpu_hwid(np, 0);
  1143. out:
  1144. of_node_put(np);
  1145. return id;
  1146. }
  1147. EXPORT_SYMBOL_GPL(cpu_to_core_id);
  1148. /* Helper routines for cpu to core mapping */
  1149. int cpu_core_index_of_thread(int cpu)
  1150. {
  1151. return cpu >> threads_shift;
  1152. }
  1153. EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
  1154. int cpu_first_thread_of_core(int core)
  1155. {
  1156. return core << threads_shift;
  1157. }
  1158. EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
  1159. /* Must be called when no change can occur to cpu_present_mask,
  1160. * i.e. during cpu online or offline.
  1161. */
  1162. static struct device_node *cpu_to_l2cache(int cpu)
  1163. {
  1164. struct device_node *np;
  1165. struct device_node *cache;
  1166. if (!cpu_present(cpu))
  1167. return NULL;
  1168. np = of_get_cpu_node(cpu, NULL);
  1169. if (np == NULL)
  1170. return NULL;
  1171. cache = of_find_next_cache_node(np);
  1172. of_node_put(np);
  1173. return cache;
  1174. }
  1175. static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
  1176. {
  1177. struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
  1178. struct device_node *l2_cache, *np;
  1179. int i;
  1180. if (has_big_cores)
  1181. submask_fn = cpu_smallcore_mask;
  1182. /*
  1183. * If the threads in a thread-group share L2 cache, then the
  1184. * L2-mask can be obtained from thread_group_l2_cache_map.
  1185. */
  1186. if (thread_group_shares_l2) {
  1187. cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
  1188. for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
  1189. if (cpu_online(i))
  1190. set_cpus_related(i, cpu, cpu_l2_cache_mask);
  1191. }
  1192. /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
  1193. if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
  1194. !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
  1195. pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
  1196. cpu);
  1197. }
  1198. return true;
  1199. }
  1200. l2_cache = cpu_to_l2cache(cpu);
  1201. if (!l2_cache || !*mask) {
  1202. /* Assume only core siblings share cache with this CPU */
  1203. for_each_cpu(i, cpu_sibling_mask(cpu))
  1204. set_cpus_related(cpu, i, cpu_l2_cache_mask);
  1205. return false;
  1206. }
  1207. cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
  1208. /* Update l2-cache mask with all the CPUs that are part of submask */
  1209. or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
  1210. /* Skip all CPUs already part of current CPU l2-cache mask */
  1211. cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
  1212. for_each_cpu(i, *mask) {
  1213. /*
  1214. * when updating the marks the current CPU has not been marked
  1215. * online, but we need to update the cache masks
  1216. */
  1217. np = cpu_to_l2cache(i);
  1218. /* Skip all CPUs already part of current CPU l2-cache */
  1219. if (np == l2_cache) {
  1220. or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
  1221. cpumask_andnot(*mask, *mask, submask_fn(i));
  1222. } else {
  1223. cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
  1224. }
  1225. of_node_put(np);
  1226. }
  1227. of_node_put(l2_cache);
  1228. return true;
  1229. }
  1230. #ifdef CONFIG_HOTPLUG_CPU
  1231. static void remove_cpu_from_masks(int cpu)
  1232. {
  1233. struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
  1234. int i;
  1235. unmap_cpu_from_node(cpu);
  1236. if (shared_caches)
  1237. mask_fn = cpu_l2_cache_mask;
  1238. for_each_cpu(i, mask_fn(cpu)) {
  1239. set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
  1240. set_cpus_unrelated(cpu, i, cpu_sibling_mask);
  1241. if (has_big_cores)
  1242. set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
  1243. }
  1244. for_each_cpu(i, cpu_core_mask(cpu))
  1245. set_cpus_unrelated(cpu, i, cpu_core_mask);
  1246. if (has_coregroup_support()) {
  1247. for_each_cpu(i, cpu_coregroup_mask(cpu))
  1248. set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
  1249. }
  1250. }
  1251. #endif
  1252. static inline void add_cpu_to_smallcore_masks(int cpu)
  1253. {
  1254. int i;
  1255. if (!has_big_cores)
  1256. return;
  1257. cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
  1258. for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
  1259. if (cpu_online(i))
  1260. set_cpus_related(i, cpu, cpu_smallcore_mask);
  1261. }
  1262. }
  1263. static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
  1264. {
  1265. struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
  1266. int coregroup_id = cpu_to_coregroup_id(cpu);
  1267. int i;
  1268. if (shared_caches)
  1269. submask_fn = cpu_l2_cache_mask;
  1270. if (!*mask) {
  1271. /* Assume only siblings are part of this CPU's coregroup */
  1272. for_each_cpu(i, submask_fn(cpu))
  1273. set_cpus_related(cpu, i, cpu_coregroup_mask);
  1274. return;
  1275. }
  1276. cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
  1277. /* Update coregroup mask with all the CPUs that are part of submask */
  1278. or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
  1279. /* Skip all CPUs already part of coregroup mask */
  1280. cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
  1281. for_each_cpu(i, *mask) {
  1282. /* Skip all CPUs not part of this coregroup */
  1283. if (coregroup_id == cpu_to_coregroup_id(i)) {
  1284. or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
  1285. cpumask_andnot(*mask, *mask, submask_fn(i));
  1286. } else {
  1287. cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
  1288. }
  1289. }
  1290. }
  1291. static void add_cpu_to_masks(int cpu)
  1292. {
  1293. struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
  1294. int first_thread = cpu_first_thread_sibling(cpu);
  1295. cpumask_var_t mask;
  1296. int chip_id = -1;
  1297. bool ret;
  1298. int i;
  1299. /*
  1300. * This CPU will not be in the online mask yet so we need to manually
  1301. * add it to it's own thread sibling mask.
  1302. */
  1303. map_cpu_to_node(cpu, cpu_to_node(cpu));
  1304. cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
  1305. cpumask_set_cpu(cpu, cpu_core_mask(cpu));
  1306. for (i = first_thread; i < first_thread + threads_per_core; i++)
  1307. if (cpu_online(i))
  1308. set_cpus_related(i, cpu, cpu_sibling_mask);
  1309. add_cpu_to_smallcore_masks(cpu);
  1310. /* In CPU-hotplug path, hence use GFP_ATOMIC */
  1311. ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
  1312. update_mask_by_l2(cpu, &mask);
  1313. if (has_coregroup_support())
  1314. update_coregroup_mask(cpu, &mask);
  1315. if (chip_id_lookup_table && ret)
  1316. chip_id = cpu_to_chip_id(cpu);
  1317. if (shared_caches)
  1318. submask_fn = cpu_l2_cache_mask;
  1319. /* Update core_mask with all the CPUs that are part of submask */
  1320. or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
  1321. /* Skip all CPUs already part of current CPU core mask */
  1322. cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
  1323. /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
  1324. if (chip_id == -1)
  1325. cpumask_and(mask, mask, cpu_cpu_mask(cpu));
  1326. for_each_cpu(i, mask) {
  1327. if (chip_id == cpu_to_chip_id(i)) {
  1328. or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
  1329. cpumask_andnot(mask, mask, submask_fn(i));
  1330. } else {
  1331. cpumask_andnot(mask, mask, cpu_core_mask(i));
  1332. }
  1333. }
  1334. free_cpumask_var(mask);
  1335. }
  1336. /* Activate a secondary processor. */
  1337. void start_secondary(void *unused)
  1338. {
  1339. unsigned int cpu = raw_smp_processor_id();
  1340. /* PPC64 calls setup_kup() in early_setup_secondary() */
  1341. if (IS_ENABLED(CONFIG_PPC32))
  1342. setup_kup();
  1343. mmgrab(&init_mm);
  1344. current->active_mm = &init_mm;
  1345. smp_store_cpu_info(cpu);
  1346. set_dec(tb_ticks_per_jiffy);
  1347. rcu_cpu_starting(cpu);
  1348. cpu_callin_map[cpu] = 1;
  1349. if (smp_ops->setup_cpu)
  1350. smp_ops->setup_cpu(cpu);
  1351. if (smp_ops->take_timebase)
  1352. smp_ops->take_timebase();
  1353. secondary_cpu_time_init();
  1354. #ifdef CONFIG_PPC64
  1355. if (system_state == SYSTEM_RUNNING)
  1356. vdso_data->processorCount++;
  1357. vdso_getcpu_init();
  1358. #endif
  1359. set_numa_node(numa_cpu_lookup_table[cpu]);
  1360. set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
  1361. /* Update topology CPU masks */
  1362. add_cpu_to_masks(cpu);
  1363. /*
  1364. * Check for any shared caches. Note that this must be done on a
  1365. * per-core basis because one core in the pair might be disabled.
  1366. */
  1367. if (!shared_caches) {
  1368. struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
  1369. struct cpumask *mask = cpu_l2_cache_mask(cpu);
  1370. if (has_big_cores)
  1371. sibling_mask = cpu_smallcore_mask;
  1372. if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
  1373. shared_caches = true;
  1374. }
  1375. smp_wmb();
  1376. notify_cpu_starting(cpu);
  1377. set_cpu_online(cpu, true);
  1378. boot_init_stack_canary();
  1379. local_irq_enable();
  1380. /* We can enable ftrace for secondary cpus now */
  1381. this_cpu_enable_ftrace();
  1382. cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
  1383. BUG();
  1384. }
  1385. static void __init fixup_topology(void)
  1386. {
  1387. int i;
  1388. #ifdef CONFIG_SCHED_SMT
  1389. if (has_big_cores) {
  1390. pr_info("Big cores detected but using small core scheduling\n");
  1391. powerpc_topology[smt_idx].mask = smallcore_smt_mask;
  1392. }
  1393. #endif
  1394. if (!has_coregroup_support())
  1395. powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
  1396. /*
  1397. * Try to consolidate topology levels here instead of
  1398. * allowing scheduler to degenerate.
  1399. * - Dont consolidate if masks are different.
  1400. * - Dont consolidate if sd_flags exists and are different.
  1401. */
  1402. for (i = 1; i <= die_idx; i++) {
  1403. if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
  1404. continue;
  1405. if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
  1406. powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
  1407. continue;
  1408. if (!powerpc_topology[i - 1].sd_flags)
  1409. powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
  1410. powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
  1411. powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
  1412. #ifdef CONFIG_SCHED_DEBUG
  1413. powerpc_topology[i].name = powerpc_topology[i + 1].name;
  1414. #endif
  1415. }
  1416. }
  1417. void __init smp_cpus_done(unsigned int max_cpus)
  1418. {
  1419. /*
  1420. * We are running pinned to the boot CPU, see rest_init().
  1421. */
  1422. if (smp_ops && smp_ops->setup_cpu)
  1423. smp_ops->setup_cpu(boot_cpuid);
  1424. if (smp_ops && smp_ops->bringup_done)
  1425. smp_ops->bringup_done();
  1426. dump_numa_cpu_topology();
  1427. fixup_topology();
  1428. set_sched_topology(powerpc_topology);
  1429. }
  1430. #ifdef CONFIG_HOTPLUG_CPU
  1431. int __cpu_disable(void)
  1432. {
  1433. int cpu = smp_processor_id();
  1434. int err;
  1435. if (!smp_ops->cpu_disable)
  1436. return -ENOSYS;
  1437. this_cpu_disable_ftrace();
  1438. err = smp_ops->cpu_disable();
  1439. if (err)
  1440. return err;
  1441. /* Update sibling maps */
  1442. remove_cpu_from_masks(cpu);
  1443. return 0;
  1444. }
  1445. void __cpu_die(unsigned int cpu)
  1446. {
  1447. if (smp_ops->cpu_die)
  1448. smp_ops->cpu_die(cpu);
  1449. }
  1450. void arch_cpu_idle_dead(void)
  1451. {
  1452. /*
  1453. * Disable on the down path. This will be re-enabled by
  1454. * start_secondary() via start_secondary_resume() below
  1455. */
  1456. this_cpu_disable_ftrace();
  1457. if (smp_ops->cpu_offline_self)
  1458. smp_ops->cpu_offline_self();
  1459. /* If we return, we re-enter start_secondary */
  1460. start_secondary_resume();
  1461. }
  1462. #endif