setup_64.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. *
  4. * Common boot and setup code.
  5. *
  6. * Copyright (C) 2001 PPC64 Team, IBM Corp
  7. */
  8. #include <linux/export.h>
  9. #include <linux/string.h>
  10. #include <linux/sched.h>
  11. #include <linux/init.h>
  12. #include <linux/kernel.h>
  13. #include <linux/reboot.h>
  14. #include <linux/delay.h>
  15. #include <linux/initrd.h>
  16. #include <linux/seq_file.h>
  17. #include <linux/ioport.h>
  18. #include <linux/console.h>
  19. #include <linux/utsname.h>
  20. #include <linux/tty.h>
  21. #include <linux/root_dev.h>
  22. #include <linux/notifier.h>
  23. #include <linux/cpu.h>
  24. #include <linux/unistd.h>
  25. #include <linux/serial.h>
  26. #include <linux/serial_8250.h>
  27. #include <linux/memblock.h>
  28. #include <linux/pci.h>
  29. #include <linux/lockdep.h>
  30. #include <linux/memory.h>
  31. #include <linux/nmi.h>
  32. #include <linux/pgtable.h>
  33. #include <linux/of.h>
  34. #include <linux/of_fdt.h>
  35. #include <asm/asm-prototypes.h>
  36. #include <asm/kvm_guest.h>
  37. #include <asm/io.h>
  38. #include <asm/kdump.h>
  39. #include <asm/processor.h>
  40. #include <asm/smp.h>
  41. #include <asm/elf.h>
  42. #include <asm/machdep.h>
  43. #include <asm/paca.h>
  44. #include <asm/time.h>
  45. #include <asm/cputable.h>
  46. #include <asm/dt_cpu_ftrs.h>
  47. #include <asm/sections.h>
  48. #include <asm/btext.h>
  49. #include <asm/nvram.h>
  50. #include <asm/setup.h>
  51. #include <asm/rtas.h>
  52. #include <asm/iommu.h>
  53. #include <asm/serial.h>
  54. #include <asm/cache.h>
  55. #include <asm/page.h>
  56. #include <asm/mmu.h>
  57. #include <asm/firmware.h>
  58. #include <asm/xmon.h>
  59. #include <asm/udbg.h>
  60. #include <asm/kexec.h>
  61. #include <asm/code-patching.h>
  62. #include <asm/ftrace.h>
  63. #include <asm/opal.h>
  64. #include <asm/cputhreads.h>
  65. #include <asm/hw_irq.h>
  66. #include <asm/feature-fixups.h>
  67. #include <asm/kup.h>
  68. #include <asm/early_ioremap.h>
  69. #include <asm/pgalloc.h>
  70. #include "setup.h"
  71. int spinning_secondaries;
  72. u64 ppc64_pft_size;
  73. struct ppc64_caches ppc64_caches = {
  74. .l1d = {
  75. .block_size = 0x40,
  76. .log_block_size = 6,
  77. },
  78. .l1i = {
  79. .block_size = 0x40,
  80. .log_block_size = 6
  81. },
  82. };
  83. EXPORT_SYMBOL_GPL(ppc64_caches);
  84. #if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
  85. void __init setup_tlb_core_data(void)
  86. {
  87. int cpu;
  88. BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
  89. for_each_possible_cpu(cpu) {
  90. int first = cpu_first_thread_sibling(cpu);
  91. /*
  92. * If we boot via kdump on a non-primary thread,
  93. * make sure we point at the thread that actually
  94. * set up this TLB.
  95. */
  96. if (cpu_first_thread_sibling(boot_cpuid) == first)
  97. first = boot_cpuid;
  98. paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
  99. /*
  100. * If we have threads, we need either tlbsrx.
  101. * or e6500 tablewalk mode, or else TLB handlers
  102. * will be racy and could produce duplicate entries.
  103. * Should we panic instead?
  104. */
  105. WARN_ONCE(smt_enabled_at_boot >= 2 &&
  106. book3e_htw_mode != PPC_HTW_E6500,
  107. "%s: unsupported MMU configuration\n", __func__);
  108. }
  109. }
  110. #endif
  111. #ifdef CONFIG_SMP
  112. static char *smt_enabled_cmdline;
  113. /* Look for ibm,smt-enabled OF option */
  114. void __init check_smt_enabled(void)
  115. {
  116. struct device_node *dn;
  117. const char *smt_option;
  118. /* Default to enabling all threads */
  119. smt_enabled_at_boot = threads_per_core;
  120. /* Allow the command line to overrule the OF option */
  121. if (smt_enabled_cmdline) {
  122. if (!strcmp(smt_enabled_cmdline, "on"))
  123. smt_enabled_at_boot = threads_per_core;
  124. else if (!strcmp(smt_enabled_cmdline, "off"))
  125. smt_enabled_at_boot = 0;
  126. else {
  127. int smt;
  128. int rc;
  129. rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
  130. if (!rc)
  131. smt_enabled_at_boot =
  132. min(threads_per_core, smt);
  133. }
  134. } else {
  135. dn = of_find_node_by_path("/options");
  136. if (dn) {
  137. smt_option = of_get_property(dn, "ibm,smt-enabled",
  138. NULL);
  139. if (smt_option) {
  140. if (!strcmp(smt_option, "on"))
  141. smt_enabled_at_boot = threads_per_core;
  142. else if (!strcmp(smt_option, "off"))
  143. smt_enabled_at_boot = 0;
  144. }
  145. of_node_put(dn);
  146. }
  147. }
  148. }
  149. /* Look for smt-enabled= cmdline option */
  150. static int __init early_smt_enabled(char *p)
  151. {
  152. smt_enabled_cmdline = p;
  153. return 0;
  154. }
  155. early_param("smt-enabled", early_smt_enabled);
  156. #endif /* CONFIG_SMP */
  157. /** Fix up paca fields required for the boot cpu */
  158. static void __init fixup_boot_paca(struct paca_struct *boot_paca)
  159. {
  160. /* The boot cpu is started */
  161. boot_paca->cpu_start = 1;
  162. #ifdef CONFIG_PPC_BOOK3S_64
  163. /*
  164. * Give the early boot machine check stack somewhere to use, use
  165. * half of the init stack. This is a bit hacky but there should not be
  166. * deep stack usage in early init so shouldn't overflow it or overwrite
  167. * things.
  168. */
  169. boot_paca->mc_emergency_sp = (void *)&init_thread_union +
  170. (THREAD_SIZE/2);
  171. #endif
  172. /* Allow percpu accesses to work until we setup percpu data */
  173. boot_paca->data_offset = 0;
  174. /* Mark interrupts soft and hard disabled in PACA */
  175. boot_paca->irq_soft_mask = IRQS_DISABLED;
  176. boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
  177. WARN_ON(mfmsr() & MSR_EE);
  178. }
  179. static void __init configure_exceptions(void)
  180. {
  181. /*
  182. * Setup the trampolines from the lowmem exception vectors
  183. * to the kdump kernel when not using a relocatable kernel.
  184. */
  185. setup_kdump_trampoline();
  186. /* Under a PAPR hypervisor, we need hypercalls */
  187. if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
  188. /*
  189. * - PR KVM does not support AIL mode interrupts in the host
  190. * while a PR guest is running.
  191. *
  192. * - SCV system call interrupt vectors are only implemented for
  193. * AIL mode interrupts.
  194. *
  195. * - On pseries, AIL mode can only be enabled and disabled
  196. * system-wide so when a PR VM is created on a pseries host,
  197. * all CPUs of the host are set to AIL=0 mode.
  198. *
  199. * - Therefore host CPUs must not execute scv while a PR VM
  200. * exists.
  201. *
  202. * - SCV support can not be disabled dynamically because the
  203. * feature is advertised to host userspace. Disabling the
  204. * facility and emulating it would be possible but is not
  205. * implemented.
  206. *
  207. * - So SCV support is blanket disabled if PR KVM could possibly
  208. * run. That is, PR support compiled in, booting on pseries
  209. * with hash MMU.
  210. */
  211. if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
  212. init_task.thread.fscr &= ~FSCR_SCV;
  213. cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
  214. }
  215. /* Enable AIL if possible */
  216. if (!pseries_enable_reloc_on_exc()) {
  217. init_task.thread.fscr &= ~FSCR_SCV;
  218. cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
  219. }
  220. /*
  221. * Tell the hypervisor that we want our exceptions to
  222. * be taken in little endian mode.
  223. *
  224. * We don't call this for big endian as our calling convention
  225. * makes us always enter in BE, and the call may fail under
  226. * some circumstances with kdump.
  227. */
  228. #ifdef __LITTLE_ENDIAN__
  229. pseries_little_endian_exceptions();
  230. #endif
  231. } else {
  232. /* Set endian mode using OPAL */
  233. if (firmware_has_feature(FW_FEATURE_OPAL))
  234. opal_configure_cores();
  235. /* AIL on native is done in cpu_ready_for_interrupts() */
  236. }
  237. }
  238. static void cpu_ready_for_interrupts(void)
  239. {
  240. /*
  241. * Enable AIL if supported, and we are in hypervisor mode. This
  242. * is called once for every processor.
  243. *
  244. * If we are not in hypervisor mode the job is done once for
  245. * the whole partition in configure_exceptions().
  246. */
  247. if (cpu_has_feature(CPU_FTR_HVMODE)) {
  248. unsigned long lpcr = mfspr(SPRN_LPCR);
  249. unsigned long new_lpcr = lpcr;
  250. if (cpu_has_feature(CPU_FTR_ARCH_31)) {
  251. /* P10 DD1 does not have HAIL */
  252. if (pvr_version_is(PVR_POWER10) &&
  253. (mfspr(SPRN_PVR) & 0xf00) == 0x100)
  254. new_lpcr |= LPCR_AIL_3;
  255. else
  256. new_lpcr |= LPCR_HAIL;
  257. } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
  258. new_lpcr |= LPCR_AIL_3;
  259. }
  260. if (new_lpcr != lpcr)
  261. mtspr(SPRN_LPCR, new_lpcr);
  262. }
  263. /*
  264. * Set HFSCR:TM based on CPU features:
  265. * In the special case of TM no suspend (P9N DD2.1), Linux is
  266. * told TM is off via the dt-ftrs but told to (partially) use
  267. * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
  268. * will be off from dt-ftrs but we need to turn it on for the
  269. * no suspend case.
  270. */
  271. if (cpu_has_feature(CPU_FTR_HVMODE)) {
  272. if (cpu_has_feature(CPU_FTR_TM_COMP))
  273. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
  274. else
  275. mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
  276. }
  277. /* Set IR and DR in PACA MSR */
  278. get_paca()->kernel_msr = MSR_KERNEL;
  279. }
  280. unsigned long spr_default_dscr = 0;
  281. static void __init record_spr_defaults(void)
  282. {
  283. if (early_cpu_has_feature(CPU_FTR_DSCR))
  284. spr_default_dscr = mfspr(SPRN_DSCR);
  285. }
  286. /*
  287. * Early initialization entry point. This is called by head.S
  288. * with MMU translation disabled. We rely on the "feature" of
  289. * the CPU that ignores the top 2 bits of the address in real
  290. * mode so we can access kernel globals normally provided we
  291. * only toy with things in the RMO region. From here, we do
  292. * some early parsing of the device-tree to setup out MEMBLOCK
  293. * data structures, and allocate & initialize the hash table
  294. * and segment tables so we can start running with translation
  295. * enabled.
  296. *
  297. * It is this function which will call the probe() callback of
  298. * the various platform types and copy the matching one to the
  299. * global ppc_md structure. Your platform can eventually do
  300. * some very early initializations from the probe() routine, but
  301. * this is not recommended, be very careful as, for example, the
  302. * device-tree is not accessible via normal means at this point.
  303. */
  304. void __init early_setup(unsigned long dt_ptr)
  305. {
  306. static __initdata struct paca_struct boot_paca;
  307. /* -------- printk is _NOT_ safe to use here ! ------- */
  308. /*
  309. * Assume we're on cpu 0 for now.
  310. *
  311. * We need to load a PACA very early for a few reasons.
  312. *
  313. * The stack protector canary is stored in the paca, so as soon as we
  314. * call any stack protected code we need r13 pointing somewhere valid.
  315. *
  316. * If we are using kcov it will call in_task() in its instrumentation,
  317. * which relies on the current task from the PACA.
  318. *
  319. * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
  320. * printk(), which can trigger both stack protector and kcov.
  321. *
  322. * percpu variables and spin locks also use the paca.
  323. *
  324. * So set up a temporary paca. It will be replaced below once we know
  325. * what CPU we are on.
  326. */
  327. initialise_paca(&boot_paca, 0);
  328. fixup_boot_paca(&boot_paca);
  329. WARN_ON(local_paca != 0);
  330. setup_paca(&boot_paca); /* install the paca into registers */
  331. /* -------- printk is now safe to use ------- */
  332. if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
  333. enable_machine_check();
  334. /* Try new device tree based feature discovery ... */
  335. if (!dt_cpu_ftrs_init(__va(dt_ptr)))
  336. /* Otherwise use the old style CPU table */
  337. identify_cpu(0, mfspr(SPRN_PVR));
  338. /* Enable early debugging if any specified (see udbg.h) */
  339. udbg_early_init();
  340. udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
  341. /*
  342. * Do early initialization using the flattened device
  343. * tree, such as retrieving the physical memory map or
  344. * calculating/retrieving the hash table size, discover
  345. * boot_cpuid and boot_cpu_hwid.
  346. */
  347. early_init_devtree(__va(dt_ptr));
  348. allocate_paca_ptrs();
  349. allocate_paca(boot_cpuid);
  350. set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid);
  351. fixup_boot_paca(paca_ptrs[boot_cpuid]);
  352. setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
  353. // smp_processor_id() now reports boot_cpuid
  354. #ifdef CONFIG_SMP
  355. task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current)
  356. #endif
  357. /*
  358. * Configure exception handlers. This include setting up trampolines
  359. * if needed, setting exception endian mode, etc...
  360. */
  361. configure_exceptions();
  362. /*
  363. * Configure Kernel Userspace Protection. This needs to happen before
  364. * feature fixups for platforms that implement this using features.
  365. */
  366. setup_kup();
  367. /* Apply all the dynamic patching */
  368. apply_feature_fixups();
  369. setup_feature_keys();
  370. /* Initialize the hash table or TLB handling */
  371. early_init_mmu();
  372. early_ioremap_setup();
  373. /*
  374. * After firmware and early platform setup code has set things up,
  375. * we note the SPR values for configurable control/performance
  376. * registers, and use those as initial defaults.
  377. */
  378. record_spr_defaults();
  379. /*
  380. * At this point, we can let interrupts switch to virtual mode
  381. * (the MMU has been setup), so adjust the MSR in the PACA to
  382. * have IR and DR set and enable AIL if it exists
  383. */
  384. cpu_ready_for_interrupts();
  385. /*
  386. * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
  387. * will only actually get enabled on the boot cpu much later once
  388. * ftrace itself has been initialized.
  389. */
  390. this_cpu_enable_ftrace();
  391. udbg_printf(" <- %s()\n", __func__);
  392. #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
  393. /*
  394. * This needs to be done *last* (after the above udbg_printf() even)
  395. *
  396. * Right after we return from this function, we turn on the MMU
  397. * which means the real-mode access trick that btext does will
  398. * no longer work, it needs to switch to using a real MMU
  399. * mapping. This call will ensure that it does
  400. */
  401. btext_map();
  402. #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
  403. }
  404. #ifdef CONFIG_SMP
  405. void early_setup_secondary(void)
  406. {
  407. /* Mark interrupts disabled in PACA */
  408. irq_soft_mask_set(IRQS_DISABLED);
  409. /* Initialize the hash table or TLB handling */
  410. early_init_mmu_secondary();
  411. /* Perform any KUP setup that is per-cpu */
  412. setup_kup();
  413. /*
  414. * At this point, we can let interrupts switch to virtual mode
  415. * (the MMU has been setup), so adjust the MSR in the PACA to
  416. * have IR and DR set.
  417. */
  418. cpu_ready_for_interrupts();
  419. }
  420. #endif /* CONFIG_SMP */
  421. void panic_smp_self_stop(void)
  422. {
  423. hard_irq_disable();
  424. spin_begin();
  425. while (1)
  426. spin_cpu_relax();
  427. }
  428. #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
  429. static bool use_spinloop(void)
  430. {
  431. if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
  432. /*
  433. * See comments in head_64.S -- not all platforms insert
  434. * secondaries at __secondary_hold and wait at the spin
  435. * loop.
  436. */
  437. if (firmware_has_feature(FW_FEATURE_OPAL))
  438. return false;
  439. return true;
  440. }
  441. /*
  442. * When book3e boots from kexec, the ePAPR spin table does
  443. * not get used.
  444. */
  445. return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
  446. }
  447. void smp_release_cpus(void)
  448. {
  449. unsigned long *ptr;
  450. int i;
  451. if (!use_spinloop())
  452. return;
  453. /* All secondary cpus are spinning on a common spinloop, release them
  454. * all now so they can start to spin on their individual paca
  455. * spinloops. For non SMP kernels, the secondary cpus never get out
  456. * of the common spinloop.
  457. */
  458. ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
  459. - PHYSICAL_START);
  460. *ptr = ppc_function_entry(generic_secondary_smp_init);
  461. /* And wait a bit for them to catch up */
  462. for (i = 0; i < 100000; i++) {
  463. mb();
  464. HMT_low();
  465. if (spinning_secondaries == 0)
  466. break;
  467. udelay(1);
  468. }
  469. pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
  470. }
  471. #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
  472. /*
  473. * Initialize some remaining members of the ppc64_caches and systemcfg
  474. * structures
  475. * (at least until we get rid of them completely). This is mostly some
  476. * cache informations about the CPU that will be used by cache flush
  477. * routines and/or provided to userland
  478. */
  479. static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
  480. u32 bsize, u32 sets)
  481. {
  482. info->size = size;
  483. info->sets = sets;
  484. info->line_size = lsize;
  485. info->block_size = bsize;
  486. info->log_block_size = __ilog2(bsize);
  487. if (bsize)
  488. info->blocks_per_page = PAGE_SIZE / bsize;
  489. else
  490. info->blocks_per_page = 0;
  491. if (sets == 0)
  492. info->assoc = 0xffff;
  493. else
  494. info->assoc = size / (sets * lsize);
  495. }
  496. static bool __init parse_cache_info(struct device_node *np,
  497. bool icache,
  498. struct ppc_cache_info *info)
  499. {
  500. static const char *ipropnames[] __initdata = {
  501. "i-cache-size",
  502. "i-cache-sets",
  503. "i-cache-block-size",
  504. "i-cache-line-size",
  505. };
  506. static const char *dpropnames[] __initdata = {
  507. "d-cache-size",
  508. "d-cache-sets",
  509. "d-cache-block-size",
  510. "d-cache-line-size",
  511. };
  512. const char **propnames = icache ? ipropnames : dpropnames;
  513. const __be32 *sizep, *lsizep, *bsizep, *setsp;
  514. u32 size, lsize, bsize, sets;
  515. bool success = true;
  516. size = 0;
  517. sets = -1u;
  518. lsize = bsize = cur_cpu_spec->dcache_bsize;
  519. sizep = of_get_property(np, propnames[0], NULL);
  520. if (sizep != NULL)
  521. size = be32_to_cpu(*sizep);
  522. setsp = of_get_property(np, propnames[1], NULL);
  523. if (setsp != NULL)
  524. sets = be32_to_cpu(*setsp);
  525. bsizep = of_get_property(np, propnames[2], NULL);
  526. lsizep = of_get_property(np, propnames[3], NULL);
  527. if (bsizep == NULL)
  528. bsizep = lsizep;
  529. if (lsizep == NULL)
  530. lsizep = bsizep;
  531. if (lsizep != NULL)
  532. lsize = be32_to_cpu(*lsizep);
  533. if (bsizep != NULL)
  534. bsize = be32_to_cpu(*bsizep);
  535. if (sizep == NULL || bsizep == NULL || lsizep == NULL)
  536. success = false;
  537. /*
  538. * OF is weird .. it represents fully associative caches
  539. * as "1 way" which doesn't make much sense and doesn't
  540. * leave room for direct mapped. We'll assume that 0
  541. * in OF means direct mapped for that reason.
  542. */
  543. if (sets == 1)
  544. sets = 0;
  545. else if (sets == 0)
  546. sets = 1;
  547. init_cache_info(info, size, lsize, bsize, sets);
  548. return success;
  549. }
  550. void __init initialize_cache_info(void)
  551. {
  552. struct device_node *cpu = NULL, *l2, *l3 = NULL;
  553. u32 pvr;
  554. /*
  555. * All shipping POWER8 machines have a firmware bug that
  556. * puts incorrect information in the device-tree. This will
  557. * be (hopefully) fixed for future chips but for now hard
  558. * code the values if we are running on one of these
  559. */
  560. pvr = PVR_VER(mfspr(SPRN_PVR));
  561. if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
  562. pvr == PVR_POWER8NVL) {
  563. /* size lsize blk sets */
  564. init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
  565. init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
  566. init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
  567. init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
  568. } else
  569. cpu = of_find_node_by_type(NULL, "cpu");
  570. /*
  571. * We're assuming *all* of the CPUs have the same
  572. * d-cache and i-cache sizes... -Peter
  573. */
  574. if (cpu) {
  575. if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
  576. pr_warn("Argh, can't find dcache properties !\n");
  577. if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
  578. pr_warn("Argh, can't find icache properties !\n");
  579. /*
  580. * Try to find the L2 and L3 if any. Assume they are
  581. * unified and use the D-side properties.
  582. */
  583. l2 = of_find_next_cache_node(cpu);
  584. of_node_put(cpu);
  585. if (l2) {
  586. parse_cache_info(l2, false, &ppc64_caches.l2);
  587. l3 = of_find_next_cache_node(l2);
  588. of_node_put(l2);
  589. }
  590. if (l3) {
  591. parse_cache_info(l3, false, &ppc64_caches.l3);
  592. of_node_put(l3);
  593. }
  594. }
  595. /* For use by binfmt_elf */
  596. dcache_bsize = ppc64_caches.l1d.block_size;
  597. icache_bsize = ppc64_caches.l1i.block_size;
  598. cur_cpu_spec->dcache_bsize = dcache_bsize;
  599. cur_cpu_spec->icache_bsize = icache_bsize;
  600. }
  601. /*
  602. * This returns the limit below which memory accesses to the linear
  603. * mapping are guarnateed not to cause an architectural exception (e.g.,
  604. * TLB or SLB miss fault).
  605. *
  606. * This is used to allocate PACAs and various interrupt stacks that
  607. * that are accessed early in interrupt handlers that must not cause
  608. * re-entrant interrupts.
  609. */
  610. __init u64 ppc64_bolted_size(void)
  611. {
  612. #ifdef CONFIG_PPC_BOOK3E_64
  613. /* Freescale BookE bolts the entire linear mapping */
  614. /* XXX: BookE ppc64_rma_limit setup seems to disagree? */
  615. if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
  616. return linear_map_top;
  617. /* Other BookE, we assume the first GB is bolted */
  618. return 1ul << 30;
  619. #else
  620. /* BookS radix, does not take faults on linear mapping */
  621. if (early_radix_enabled())
  622. return ULONG_MAX;
  623. /* BookS hash, the first segment is bolted */
  624. if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
  625. return 1UL << SID_SHIFT_1T;
  626. return 1UL << SID_SHIFT;
  627. #endif
  628. }
  629. static void *__init alloc_stack(unsigned long limit, int cpu)
  630. {
  631. void *ptr;
  632. BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
  633. ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
  634. MEMBLOCK_LOW_LIMIT, limit,
  635. early_cpu_to_node(cpu));
  636. if (!ptr)
  637. panic("cannot allocate stacks");
  638. return ptr;
  639. }
  640. void __init irqstack_early_init(void)
  641. {
  642. u64 limit = ppc64_bolted_size();
  643. unsigned int i;
  644. /*
  645. * Interrupt stacks must be in the first segment since we
  646. * cannot afford to take SLB misses on them. They are not
  647. * accessed in realmode.
  648. */
  649. for_each_possible_cpu(i) {
  650. softirq_ctx[i] = alloc_stack(limit, i);
  651. hardirq_ctx[i] = alloc_stack(limit, i);
  652. }
  653. }
  654. #ifdef CONFIG_PPC_BOOK3E_64
  655. void __init exc_lvl_early_init(void)
  656. {
  657. unsigned int i;
  658. for_each_possible_cpu(i) {
  659. void *sp;
  660. sp = alloc_stack(ULONG_MAX, i);
  661. critirq_ctx[i] = sp;
  662. paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
  663. sp = alloc_stack(ULONG_MAX, i);
  664. dbgirq_ctx[i] = sp;
  665. paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
  666. sp = alloc_stack(ULONG_MAX, i);
  667. mcheckirq_ctx[i] = sp;
  668. paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
  669. }
  670. if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
  671. patch_exception(0x040, exc_debug_debug_book3e);
  672. }
  673. #endif
  674. /*
  675. * Stack space used when we detect a bad kernel stack pointer, and
  676. * early in SMP boots before relocation is enabled. Exclusive emergency
  677. * stack for machine checks.
  678. */
  679. void __init emergency_stack_init(void)
  680. {
  681. u64 limit, mce_limit;
  682. unsigned int i;
  683. /*
  684. * Emergency stacks must be under 256MB, we cannot afford to take
  685. * SLB misses on them. The ABI also requires them to be 128-byte
  686. * aligned.
  687. *
  688. * Since we use these as temporary stacks during secondary CPU
  689. * bringup, machine check, system reset, and HMI, we need to get
  690. * at them in real mode. This means they must also be within the RMO
  691. * region.
  692. *
  693. * The IRQ stacks allocated elsewhere in this file are zeroed and
  694. * initialized in kernel/irq.c. These are initialized here in order
  695. * to have emergency stacks available as early as possible.
  696. */
  697. limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
  698. /*
  699. * Machine check on pseries calls rtas, but can't use the static
  700. * rtas_args due to a machine check hitting while the lock is held.
  701. * rtas args have to be under 4GB, so the machine check stack is
  702. * limited to 4GB so args can be put on stack.
  703. */
  704. if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
  705. mce_limit = SZ_4G;
  706. for_each_possible_cpu(i) {
  707. paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
  708. #ifdef CONFIG_PPC_BOOK3S_64
  709. /* emergency stack for NMI exception handling. */
  710. paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
  711. /* emergency stack for machine check exception handling. */
  712. paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
  713. #endif
  714. }
  715. }
  716. #ifdef CONFIG_SMP
  717. static int pcpu_cpu_distance(unsigned int from, unsigned int to)
  718. {
  719. if (early_cpu_to_node(from) == early_cpu_to_node(to))
  720. return LOCAL_DISTANCE;
  721. else
  722. return REMOTE_DISTANCE;
  723. }
  724. static __init int pcpu_cpu_to_node(int cpu)
  725. {
  726. return early_cpu_to_node(cpu);
  727. }
  728. unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
  729. EXPORT_SYMBOL(__per_cpu_offset);
  730. void __init setup_per_cpu_areas(void)
  731. {
  732. const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
  733. size_t atom_size;
  734. unsigned long delta;
  735. unsigned int cpu;
  736. int rc = -EINVAL;
  737. /*
  738. * BookE and BookS radix are historical values and should be revisited.
  739. */
  740. if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
  741. atom_size = SZ_1M;
  742. } else if (radix_enabled()) {
  743. atom_size = PAGE_SIZE;
  744. } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
  745. /*
  746. * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
  747. * to group units. For larger mappings, use 1M atom which
  748. * should be large enough to contain a number of units.
  749. */
  750. if (mmu_linear_psize == MMU_PAGE_4K)
  751. atom_size = PAGE_SIZE;
  752. else
  753. atom_size = SZ_1M;
  754. }
  755. if (pcpu_chosen_fc != PCPU_FC_PAGE) {
  756. rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
  757. pcpu_cpu_to_node);
  758. if (rc)
  759. pr_warn("PERCPU: %s allocator failed (%d), "
  760. "falling back to page size\n",
  761. pcpu_fc_names[pcpu_chosen_fc], rc);
  762. }
  763. if (rc < 0)
  764. rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
  765. if (rc < 0)
  766. panic("cannot initialize percpu area (err=%d)", rc);
  767. delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
  768. for_each_possible_cpu(cpu) {
  769. __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
  770. paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
  771. }
  772. }
  773. #endif
  774. #ifdef CONFIG_MEMORY_HOTPLUG
  775. unsigned long memory_block_size_bytes(void)
  776. {
  777. if (ppc_md.memory_block_size)
  778. return ppc_md.memory_block_size();
  779. return MIN_MEMORY_BLOCK_SIZE;
  780. }
  781. #endif
  782. #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
  783. struct ppc_pci_io ppc_pci_io;
  784. EXPORT_SYMBOL(ppc_pci_io);
  785. #endif
  786. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  787. u64 hw_nmi_get_sample_period(int watchdog_thresh)
  788. {
  789. return ppc_proc_freq * watchdog_thresh;
  790. }
  791. #endif
  792. /*
  793. * The perf based hardlockup detector breaks PMU event based branches, so
  794. * disable it by default. Book3S has a soft-nmi hardlockup detector based
  795. * on the decrementer interrupt, so it does not suffer from this problem.
  796. *
  797. * It is likely to get false positives in KVM guests, so disable it there
  798. * by default too. PowerVM will not stop or arbitrarily oversubscribe
  799. * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
  800. * the detector for non-KVM guests, assume PowerVM.
  801. */
  802. static int __init disable_hardlockup_detector(void)
  803. {
  804. #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
  805. hardlockup_detector_disable();
  806. #else
  807. if (firmware_has_feature(FW_FEATURE_LPAR)) {
  808. if (is_kvm_guest())
  809. hardlockup_detector_disable();
  810. }
  811. #endif
  812. return 0;
  813. }
  814. early_initcall(disable_hardlockup_detector);