setup.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * 64-bit pSeries and RS/6000 setup code.
  4. *
  5. * Copyright (C) 1995 Linus Torvalds
  6. * Adapted from 'alpha' version by Gary Thomas
  7. * Modified by Cort Dougan ([email protected])
  8. * Modified by PPC64 Team, IBM Corp
  9. */
  10. /*
  11. * bootup setup stuff..
  12. */
  13. #include <linux/cpu.h>
  14. #include <linux/errno.h>
  15. #include <linux/platform_device.h>
  16. #include <linux/sched.h>
  17. #include <linux/kernel.h>
  18. #include <linux/mm.h>
  19. #include <linux/stddef.h>
  20. #include <linux/unistd.h>
  21. #include <linux/user.h>
  22. #include <linux/tty.h>
  23. #include <linux/major.h>
  24. #include <linux/interrupt.h>
  25. #include <linux/reboot.h>
  26. #include <linux/init.h>
  27. #include <linux/ioport.h>
  28. #include <linux/console.h>
  29. #include <linux/pci.h>
  30. #include <linux/utsname.h>
  31. #include <linux/adb.h>
  32. #include <linux/export.h>
  33. #include <linux/delay.h>
  34. #include <linux/irq.h>
  35. #include <linux/seq_file.h>
  36. #include <linux/root_dev.h>
  37. #include <linux/of.h>
  38. #include <linux/of_irq.h>
  39. #include <linux/of_pci.h>
  40. #include <linux/memblock.h>
  41. #include <linux/swiotlb.h>
  42. #include <linux/seq_buf.h>
  43. #include <asm/mmu.h>
  44. #include <asm/processor.h>
  45. #include <asm/io.h>
  46. #include <asm/rtas.h>
  47. #include <asm/pci-bridge.h>
  48. #include <asm/iommu.h>
  49. #include <asm/dma.h>
  50. #include <asm/machdep.h>
  51. #include <asm/irq.h>
  52. #include <asm/time.h>
  53. #include <asm/nvram.h>
  54. #include <asm/pmc.h>
  55. #include <asm/xics.h>
  56. #include <asm/xive.h>
  57. #include <asm/ppc-pci.h>
  58. #include <asm/i8259.h>
  59. #include <asm/udbg.h>
  60. #include <asm/smp.h>
  61. #include <asm/firmware.h>
  62. #include <asm/eeh.h>
  63. #include <asm/reg.h>
  64. #include <asm/plpar_wrappers.h>
  65. #include <asm/kexec.h>
  66. #include <asm/isa-bridge.h>
  67. #include <asm/security_features.h>
  68. #include <asm/asm-const.h>
  69. #include <asm/idle.h>
  70. #include <asm/swiotlb.h>
  71. #include <asm/svm.h>
  72. #include <asm/dtl.h>
  73. #include <asm/hvconsole.h>
  74. #include <asm/setup.h>
  75. #include "pseries.h"
  76. DEFINE_STATIC_KEY_FALSE(shared_processor);
  77. EXPORT_SYMBOL(shared_processor);
  78. #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
  79. struct static_key paravirt_steal_enabled;
  80. struct static_key paravirt_steal_rq_enabled;
  81. static bool steal_acc = true;
  82. static int __init parse_no_stealacc(char *arg)
  83. {
  84. steal_acc = false;
  85. return 0;
  86. }
  87. early_param("no-steal-acc", parse_no_stealacc);
  88. #endif
  89. int CMO_PrPSP = -1;
  90. int CMO_SecPSP = -1;
  91. unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
  92. EXPORT_SYMBOL(CMO_PageSize);
  93. int fwnmi_active; /* TRUE if an FWNMI handler is present */
  94. int ibm_nmi_interlock_token;
  95. u32 pseries_security_flavor;
  96. static void pSeries_show_cpuinfo(struct seq_file *m)
  97. {
  98. struct device_node *root;
  99. const char *model = "";
  100. root = of_find_node_by_path("/");
  101. if (root)
  102. model = of_get_property(root, "model", NULL);
  103. seq_printf(m, "machine\t\t: CHRP %s\n", model);
  104. of_node_put(root);
  105. if (radix_enabled())
  106. seq_printf(m, "MMU\t\t: Radix\n");
  107. else
  108. seq_printf(m, "MMU\t\t: Hash\n");
  109. }
  110. /* Initialize firmware assisted non-maskable interrupts if
  111. * the firmware supports this feature.
  112. */
  113. static void __init fwnmi_init(void)
  114. {
  115. unsigned long system_reset_addr, machine_check_addr;
  116. u8 *mce_data_buf;
  117. unsigned int i;
  118. int nr_cpus = num_possible_cpus();
  119. #ifdef CONFIG_PPC_64S_HASH_MMU
  120. struct slb_entry *slb_ptr;
  121. size_t size;
  122. #endif
  123. int ibm_nmi_register_token;
  124. ibm_nmi_register_token = rtas_token("ibm,nmi-register");
  125. if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE)
  126. return;
  127. ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock");
  128. if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE))
  129. return;
  130. /* If the kernel's not linked at zero we point the firmware at low
  131. * addresses anyway, and use a trampoline to get to the real code. */
  132. system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START;
  133. machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
  134. if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
  135. system_reset_addr, machine_check_addr))
  136. fwnmi_active = 1;
  137. /*
  138. * Allocate a chunk for per cpu buffer to hold rtas errorlog.
  139. * It will be used in real mode mce handler, hence it needs to be
  140. * below RMA.
  141. */
  142. mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
  143. RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
  144. ppc64_rma_size, NUMA_NO_NODE);
  145. if (!mce_data_buf)
  146. panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
  147. RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
  148. for_each_possible_cpu(i) {
  149. paca_ptrs[i]->mce_data_buf = mce_data_buf +
  150. (RTAS_ERROR_LOG_MAX * i);
  151. }
  152. #ifdef CONFIG_PPC_64S_HASH_MMU
  153. if (!radix_enabled()) {
  154. /* Allocate per cpu area to save old slb contents during MCE */
  155. size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
  156. slb_ptr = memblock_alloc_try_nid_raw(size,
  157. sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
  158. ppc64_rma_size, NUMA_NO_NODE);
  159. if (!slb_ptr)
  160. panic("Failed to allocate %zu bytes below %pa for slb area\n",
  161. size, &ppc64_rma_size);
  162. for_each_possible_cpu(i)
  163. paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
  164. }
  165. #endif
  166. }
  167. /*
  168. * Affix a device for the first timer to the platform bus if
  169. * we have firmware support for the H_WATCHDOG hypercall.
  170. */
  171. static __init int pseries_wdt_init(void)
  172. {
  173. if (firmware_has_feature(FW_FEATURE_WATCHDOG))
  174. platform_device_register_simple("pseries-wdt", 0, NULL, 0);
  175. return 0;
  176. }
  177. machine_subsys_initcall(pseries, pseries_wdt_init);
  178. static void pseries_8259_cascade(struct irq_desc *desc)
  179. {
  180. struct irq_chip *chip = irq_desc_get_chip(desc);
  181. unsigned int cascade_irq = i8259_irq();
  182. if (cascade_irq)
  183. generic_handle_irq(cascade_irq);
  184. chip->irq_eoi(&desc->irq_data);
  185. }
  186. static void __init pseries_setup_i8259_cascade(void)
  187. {
  188. struct device_node *np, *old, *found = NULL;
  189. unsigned int cascade;
  190. const u32 *addrp;
  191. unsigned long intack = 0;
  192. int naddr;
  193. for_each_node_by_type(np, "interrupt-controller") {
  194. if (of_device_is_compatible(np, "chrp,iic")) {
  195. found = np;
  196. break;
  197. }
  198. }
  199. if (found == NULL) {
  200. printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
  201. return;
  202. }
  203. cascade = irq_of_parse_and_map(found, 0);
  204. if (!cascade) {
  205. printk(KERN_ERR "pic: failed to map cascade interrupt");
  206. return;
  207. }
  208. pr_debug("pic: cascade mapped to irq %d\n", cascade);
  209. for (old = of_node_get(found); old != NULL ; old = np) {
  210. np = of_get_parent(old);
  211. of_node_put(old);
  212. if (np == NULL)
  213. break;
  214. if (!of_node_name_eq(np, "pci"))
  215. continue;
  216. addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
  217. if (addrp == NULL)
  218. continue;
  219. naddr = of_n_addr_cells(np);
  220. intack = addrp[naddr-1];
  221. if (naddr > 1)
  222. intack |= ((unsigned long)addrp[naddr-2]) << 32;
  223. }
  224. if (intack)
  225. printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
  226. i8259_init(found, intack);
  227. of_node_put(found);
  228. irq_set_chained_handler(cascade, pseries_8259_cascade);
  229. }
  230. static void __init pseries_init_irq(void)
  231. {
  232. /* Try using a XIVE if available, otherwise use a XICS */
  233. if (!xive_spapr_init()) {
  234. xics_init();
  235. pseries_setup_i8259_cascade();
  236. }
  237. }
  238. static void pseries_lpar_enable_pmcs(void)
  239. {
  240. unsigned long set, reset;
  241. set = 1UL << 63;
  242. reset = 0;
  243. plpar_hcall_norets(H_PERFMON, set, reset);
  244. }
  245. static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
  246. {
  247. struct of_reconfig_data *rd = data;
  248. struct device_node *parent, *np = rd->dn;
  249. struct pci_dn *pdn;
  250. int err = NOTIFY_OK;
  251. switch (action) {
  252. case OF_RECONFIG_ATTACH_NODE:
  253. parent = of_get_parent(np);
  254. pdn = parent ? PCI_DN(parent) : NULL;
  255. if (pdn)
  256. pci_add_device_node_info(pdn->phb, np);
  257. of_node_put(parent);
  258. break;
  259. case OF_RECONFIG_DETACH_NODE:
  260. pdn = PCI_DN(np);
  261. if (pdn)
  262. list_del(&pdn->list);
  263. break;
  264. default:
  265. err = NOTIFY_DONE;
  266. break;
  267. }
  268. return err;
  269. }
  270. static struct notifier_block pci_dn_reconfig_nb = {
  271. .notifier_call = pci_dn_reconfig_notifier,
  272. };
  273. struct kmem_cache *dtl_cache;
  274. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  275. /*
  276. * Allocate space for the dispatch trace log for all possible cpus
  277. * and register the buffers with the hypervisor. This is used for
  278. * computing time stolen by the hypervisor.
  279. */
  280. static int alloc_dispatch_logs(void)
  281. {
  282. if (!firmware_has_feature(FW_FEATURE_SPLPAR))
  283. return 0;
  284. if (!dtl_cache)
  285. return 0;
  286. alloc_dtl_buffers(0);
  287. /* Register the DTL for the current (boot) cpu */
  288. register_dtl_buffer(smp_processor_id());
  289. return 0;
  290. }
  291. #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
  292. static inline int alloc_dispatch_logs(void)
  293. {
  294. return 0;
  295. }
  296. #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
  297. static int alloc_dispatch_log_kmem_cache(void)
  298. {
  299. void (*ctor)(void *) = get_dtl_cache_ctor();
  300. dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
  301. DISPATCH_LOG_BYTES, 0, ctor);
  302. if (!dtl_cache) {
  303. pr_warn("Failed to create dispatch trace log buffer cache\n");
  304. pr_warn("Stolen time statistics will be unreliable\n");
  305. return 0;
  306. }
  307. return alloc_dispatch_logs();
  308. }
  309. machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
  310. DEFINE_PER_CPU(u64, idle_spurr_cycles);
  311. DEFINE_PER_CPU(u64, idle_entry_purr_snap);
  312. DEFINE_PER_CPU(u64, idle_entry_spurr_snap);
  313. static void pseries_lpar_idle(void)
  314. {
  315. /*
  316. * Default handler to go into low thread priority and possibly
  317. * low power mode by ceding processor to hypervisor
  318. */
  319. if (!prep_irq_for_idle())
  320. return;
  321. /* Indicate to hypervisor that we are idle. */
  322. pseries_idle_prolog();
  323. /*
  324. * Yield the processor to the hypervisor. We return if
  325. * an external interrupt occurs (which are driven prior
  326. * to returning here) or if a prod occurs from another
  327. * processor. When returning here, external interrupts
  328. * are enabled.
  329. */
  330. cede_processor();
  331. pseries_idle_epilog();
  332. }
  333. static bool pseries_reloc_on_exception_enabled;
  334. bool pseries_reloc_on_exception(void)
  335. {
  336. return pseries_reloc_on_exception_enabled;
  337. }
  338. EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
  339. /*
  340. * Enable relocation on during exceptions. This has partition wide scope and
  341. * may take a while to complete, if it takes longer than one second we will
  342. * just give up rather than wasting any more time on this - if that turns out
  343. * to ever be a problem in practice we can move this into a kernel thread to
  344. * finish off the process later in boot.
  345. */
  346. bool pseries_enable_reloc_on_exc(void)
  347. {
  348. long rc;
  349. unsigned int delay, total_delay = 0;
  350. while (1) {
  351. rc = enable_reloc_on_exceptions();
  352. if (!H_IS_LONG_BUSY(rc)) {
  353. if (rc == H_P2) {
  354. pr_info("Relocation on exceptions not"
  355. " supported\n");
  356. return false;
  357. } else if (rc != H_SUCCESS) {
  358. pr_warn("Unable to enable relocation"
  359. " on exceptions: %ld\n", rc);
  360. return false;
  361. }
  362. pseries_reloc_on_exception_enabled = true;
  363. return true;
  364. }
  365. delay = get_longbusy_msecs(rc);
  366. total_delay += delay;
  367. if (total_delay > 1000) {
  368. pr_warn("Warning: Giving up waiting to enable "
  369. "relocation on exceptions (%u msec)!\n",
  370. total_delay);
  371. return false;
  372. }
  373. mdelay(delay);
  374. }
  375. }
  376. EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
  377. void pseries_disable_reloc_on_exc(void)
  378. {
  379. long rc;
  380. while (1) {
  381. rc = disable_reloc_on_exceptions();
  382. if (!H_IS_LONG_BUSY(rc))
  383. break;
  384. mdelay(get_longbusy_msecs(rc));
  385. }
  386. if (rc == H_SUCCESS)
  387. pseries_reloc_on_exception_enabled = false;
  388. else
  389. pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
  390. rc);
  391. }
  392. EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
  393. #ifdef __LITTLE_ENDIAN__
  394. void pseries_big_endian_exceptions(void)
  395. {
  396. long rc;
  397. while (1) {
  398. rc = enable_big_endian_exceptions();
  399. if (!H_IS_LONG_BUSY(rc))
  400. break;
  401. mdelay(get_longbusy_msecs(rc));
  402. }
  403. /*
  404. * At this point it is unlikely panic() will get anything
  405. * out to the user, since this is called very late in kexec
  406. * but at least this will stop us from continuing on further
  407. * and creating an even more difficult to debug situation.
  408. *
  409. * There is a known problem when kdump'ing, if cpus are offline
  410. * the above call will fail. Rather than panicking again, keep
  411. * going and hope the kdump kernel is also little endian, which
  412. * it usually is.
  413. */
  414. if (rc && !kdump_in_progress())
  415. panic("Could not enable big endian exceptions");
  416. }
  417. void __init pseries_little_endian_exceptions(void)
  418. {
  419. long rc;
  420. while (1) {
  421. rc = enable_little_endian_exceptions();
  422. if (!H_IS_LONG_BUSY(rc))
  423. break;
  424. mdelay(get_longbusy_msecs(rc));
  425. }
  426. if (rc) {
  427. ppc_md.progress("H_SET_MODE LE exception fail", 0);
  428. panic("Could not enable little endian exceptions");
  429. }
  430. }
  431. #endif
  432. static void __init pSeries_discover_phbs(void)
  433. {
  434. struct device_node *node;
  435. struct pci_controller *phb;
  436. struct device_node *root = of_find_node_by_path("/");
  437. for_each_child_of_node(root, node) {
  438. if (!of_node_is_type(node, "pci") &&
  439. !of_node_is_type(node, "pciex"))
  440. continue;
  441. phb = pcibios_alloc_controller(node);
  442. if (!phb)
  443. continue;
  444. rtas_setup_phb(phb);
  445. pci_process_bridge_OF_ranges(phb, node, 0);
  446. isa_bridge_find_early(phb);
  447. phb->controller_ops = pseries_pci_controller_ops;
  448. /* create pci_dn's for DT nodes under this PHB */
  449. pci_devs_phb_init_dynamic(phb);
  450. pseries_msi_allocate_domains(phb);
  451. }
  452. of_node_put(root);
  453. /*
  454. * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
  455. * in chosen.
  456. */
  457. of_pci_check_probe_only();
  458. }
  459. static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
  460. {
  461. /*
  462. * The features below are disabled by default, so we instead look to see
  463. * if firmware has *enabled* them, and set them if so.
  464. */
  465. if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
  466. security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
  467. if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
  468. security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
  469. if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
  470. security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
  471. if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
  472. security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
  473. if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
  474. security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
  475. if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
  476. security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
  477. if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
  478. security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
  479. if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
  480. security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
  481. if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
  482. security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
  483. if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
  484. security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
  485. /*
  486. * The features below are enabled by default, so we instead look to see
  487. * if firmware has *disabled* them, and clear them if so.
  488. * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
  489. * H_CPU_BEHAV_FAVOUR_SECURITY is.
  490. */
  491. if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
  492. security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
  493. pseries_security_flavor = 0;
  494. } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
  495. pseries_security_flavor = 1;
  496. else
  497. pseries_security_flavor = 2;
  498. if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
  499. security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
  500. if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
  501. security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
  502. if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
  503. security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
  504. if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
  505. security_ftr_clear(SEC_FTR_STF_BARRIER);
  506. if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
  507. security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
  508. }
  509. void pseries_setup_security_mitigations(void)
  510. {
  511. struct h_cpu_char_result result;
  512. enum l1d_flush_type types;
  513. bool enable;
  514. long rc;
  515. /*
  516. * Set features to the defaults assumed by init_cpu_char_feature_flags()
  517. * so it can set/clear again any features that might have changed after
  518. * migration, and in case the hypercall fails and it is not even called.
  519. */
  520. powerpc_security_features = SEC_FTR_DEFAULT;
  521. rc = plpar_get_cpu_characteristics(&result);
  522. if (rc == H_SUCCESS)
  523. init_cpu_char_feature_flags(&result);
  524. /*
  525. * We're the guest so this doesn't apply to us, clear it to simplify
  526. * handling of it elsewhere.
  527. */
  528. security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
  529. types = L1D_FLUSH_FALLBACK;
  530. if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
  531. types |= L1D_FLUSH_MTTRIG;
  532. if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
  533. types |= L1D_FLUSH_ORI;
  534. enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
  535. security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
  536. setup_rfi_flush(types, enable);
  537. setup_count_cache_flush();
  538. enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
  539. security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
  540. setup_entry_flush(enable);
  541. enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
  542. security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
  543. setup_uaccess_flush(enable);
  544. setup_stf_barrier();
  545. }
  546. #ifdef CONFIG_PCI_IOV
  547. enum rtas_iov_fw_value_map {
  548. NUM_RES_PROPERTY = 0, /* Number of Resources */
  549. LOW_INT = 1, /* Lowest 32 bits of Address */
  550. START_OF_ENTRIES = 2, /* Always start of entry */
  551. APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */
  552. WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
  553. NEXT_ENTRY = 7 /* Go to next entry on array */
  554. };
  555. enum get_iov_fw_value_index {
  556. BAR_ADDRS = 1, /* Get Bar Address */
  557. APERTURE_SIZE = 2, /* Get Aperture Size */
  558. WDW_SIZE = 3 /* Get Window Size */
  559. };
  560. static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
  561. enum get_iov_fw_value_index value)
  562. {
  563. const int *indexes;
  564. struct device_node *dn = pci_device_to_OF_node(dev);
  565. int i, num_res, ret = 0;
  566. indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
  567. if (!indexes)
  568. return 0;
  569. /*
  570. * First element in the array is the number of Bars
  571. * returned. Search through the list to find the matching
  572. * bar
  573. */
  574. num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
  575. if (resno >= num_res)
  576. return 0; /* or an error */
  577. i = START_OF_ENTRIES + NEXT_ENTRY * resno;
  578. switch (value) {
  579. case BAR_ADDRS:
  580. ret = of_read_number(&indexes[i], 2);
  581. break;
  582. case APERTURE_SIZE:
  583. ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
  584. break;
  585. case WDW_SIZE:
  586. ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
  587. break;
  588. }
  589. return ret;
  590. }
  591. static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
  592. {
  593. struct resource *res;
  594. resource_size_t base, size;
  595. int i, r, num_res;
  596. num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
  597. num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
  598. for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
  599. i += NEXT_ENTRY, r++) {
  600. res = &dev->resource[r + PCI_IOV_RESOURCES];
  601. base = of_read_number(&indexes[i], 2);
  602. size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
  603. res->flags = pci_parse_of_flags(of_read_number
  604. (&indexes[i + LOW_INT], 1), 0);
  605. res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
  606. res->name = pci_name(dev);
  607. res->start = base;
  608. res->end = base + size - 1;
  609. }
  610. }
  611. static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
  612. {
  613. struct resource *res, *root, *conflict;
  614. resource_size_t base, size;
  615. int i, r, num_res;
  616. /*
  617. * First element in the array is the number of Bars
  618. * returned. Search through the list to find the matching
  619. * bars assign them from firmware into resources structure.
  620. */
  621. num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
  622. for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
  623. i += NEXT_ENTRY, r++) {
  624. res = &dev->resource[r + PCI_IOV_RESOURCES];
  625. base = of_read_number(&indexes[i], 2);
  626. size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
  627. res->name = pci_name(dev);
  628. res->start = base;
  629. res->end = base + size - 1;
  630. root = &iomem_resource;
  631. dev_dbg(&dev->dev,
  632. "pSeries IOV BAR %d: trying firmware assignment %pR\n",
  633. r + PCI_IOV_RESOURCES, res);
  634. conflict = request_resource_conflict(root, res);
  635. if (conflict) {
  636. dev_info(&dev->dev,
  637. "BAR %d: %pR conflicts with %s %pR\n",
  638. r + PCI_IOV_RESOURCES, res,
  639. conflict->name, conflict);
  640. res->flags |= IORESOURCE_UNSET;
  641. }
  642. }
  643. }
  644. static void pseries_disable_sriov_resources(struct pci_dev *pdev)
  645. {
  646. int i;
  647. pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
  648. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
  649. pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
  650. }
  651. static void pseries_pci_fixup_resources(struct pci_dev *pdev)
  652. {
  653. const int *indexes;
  654. struct device_node *dn = pci_device_to_OF_node(pdev);
  655. /*Firmware must support open sriov otherwise dont configure*/
  656. indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
  657. if (indexes)
  658. of_pci_set_vf_bar_size(pdev, indexes);
  659. else
  660. pseries_disable_sriov_resources(pdev);
  661. }
  662. static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
  663. {
  664. const int *indexes;
  665. struct device_node *dn = pci_device_to_OF_node(pdev);
  666. if (!pdev->is_physfn)
  667. return;
  668. /*Firmware must support open sriov otherwise don't configure*/
  669. indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
  670. if (indexes)
  671. of_pci_parse_iov_addrs(pdev, indexes);
  672. else
  673. pseries_disable_sriov_resources(pdev);
  674. }
  675. static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
  676. int resno)
  677. {
  678. const __be32 *reg;
  679. struct device_node *dn = pci_device_to_OF_node(pdev);
  680. /*Firmware must support open sriov otherwise report regular alignment*/
  681. reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
  682. if (!reg)
  683. return pci_iov_resource_size(pdev, resno);
  684. if (!pdev->is_physfn)
  685. return 0;
  686. return pseries_get_iov_fw_value(pdev,
  687. resno - PCI_IOV_RESOURCES,
  688. APERTURE_SIZE);
  689. }
  690. #endif
  691. static void __init pSeries_setup_arch(void)
  692. {
  693. set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
  694. /* Discover PIC type and setup ppc_md accordingly */
  695. smp_init_pseries();
  696. if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
  697. if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
  698. panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
  699. /* openpic global configuration register (64-bit format). */
  700. /* openpic Interrupt Source Unit pointer (64-bit format). */
  701. /* python0 facility area (mmio) (64-bit format) REAL address. */
  702. /* init to some ~sane value until calibrate_delay() runs */
  703. loops_per_jiffy = 50000000;
  704. fwnmi_init();
  705. pseries_setup_security_mitigations();
  706. if (!radix_enabled())
  707. pseries_lpar_read_hblkrm_characteristics();
  708. /* By default, only probe PCI (can be overridden by rtas_pci) */
  709. pci_add_flags(PCI_PROBE_ONLY);
  710. /* Find and initialize PCI host bridges */
  711. init_pci_config_tokens();
  712. of_reconfig_notifier_register(&pci_dn_reconfig_nb);
  713. pSeries_nvram_init();
  714. if (firmware_has_feature(FW_FEATURE_LPAR)) {
  715. vpa_init(boot_cpuid);
  716. if (lppaca_shared_proc()) {
  717. static_branch_enable(&shared_processor);
  718. pv_spinlocks_init();
  719. #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
  720. static_key_slow_inc(&paravirt_steal_enabled);
  721. if (steal_acc)
  722. static_key_slow_inc(&paravirt_steal_rq_enabled);
  723. #endif
  724. }
  725. ppc_md.power_save = pseries_lpar_idle;
  726. ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
  727. #ifdef CONFIG_PCI_IOV
  728. ppc_md.pcibios_fixup_resources =
  729. pseries_pci_fixup_resources;
  730. ppc_md.pcibios_fixup_sriov =
  731. pseries_pci_fixup_iov_resources;
  732. ppc_md.pcibios_iov_resource_alignment =
  733. pseries_pci_iov_resource_alignment;
  734. #endif
  735. } else {
  736. /* No special idle routine */
  737. ppc_md.enable_pmcs = power4_enable_pmcs;
  738. }
  739. ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
  740. pseries_rng_init();
  741. }
  742. static void pseries_panic(char *str)
  743. {
  744. panic_flush_kmsg_end();
  745. rtas_os_term(str);
  746. }
  747. static int __init pSeries_init_panel(void)
  748. {
  749. /* Manually leave the kernel version on the panel. */
  750. #ifdef __BIG_ENDIAN__
  751. ppc_md.progress("Linux ppc64\n", 0);
  752. #else
  753. ppc_md.progress("Linux ppc64le\n", 0);
  754. #endif
  755. ppc_md.progress(init_utsname()->version, 0);
  756. return 0;
  757. }
  758. machine_arch_initcall(pseries, pSeries_init_panel);
  759. static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
  760. {
  761. return plpar_hcall_norets(H_SET_DABR, dabr);
  762. }
  763. static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
  764. {
  765. /* Have to set at least one bit in the DABRX according to PAPR */
  766. if (dabrx == 0 && dabr == 0)
  767. dabrx = DABRX_USER;
  768. /* PAPR says we can only set kernel and user bits */
  769. dabrx &= DABRX_KERNEL | DABRX_USER;
  770. return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
  771. }
  772. static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
  773. {
  774. /* PAPR says we can't set HYP */
  775. dawrx &= ~DAWRX_HYP;
  776. if (nr == 0)
  777. return plpar_set_watchpoint0(dawr, dawrx);
  778. else
  779. return plpar_set_watchpoint1(dawr, dawrx);
  780. }
  781. #define CMO_CHARACTERISTICS_TOKEN 44
  782. #define CMO_MAXLENGTH 1026
  783. void pSeries_coalesce_init(void)
  784. {
  785. struct hvcall_mpp_x_data mpp_x_data;
  786. if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
  787. powerpc_firmware_features |= FW_FEATURE_XCMO;
  788. else
  789. powerpc_firmware_features &= ~FW_FEATURE_XCMO;
  790. }
  791. /**
  792. * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
  793. * handle that here. (Stolen from parse_system_parameter_string)
  794. */
  795. static void __init pSeries_cmo_feature_init(void)
  796. {
  797. char *ptr, *key, *value, *end;
  798. int call_status;
  799. int page_order = IOMMU_PAGE_SHIFT_4K;
  800. pr_debug(" -> fw_cmo_feature_init()\n");
  801. spin_lock(&rtas_data_buf_lock);
  802. memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
  803. call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
  804. NULL,
  805. CMO_CHARACTERISTICS_TOKEN,
  806. __pa(rtas_data_buf),
  807. RTAS_DATA_BUF_SIZE);
  808. if (call_status != 0) {
  809. spin_unlock(&rtas_data_buf_lock);
  810. pr_debug("CMO not available\n");
  811. pr_debug(" <- fw_cmo_feature_init()\n");
  812. return;
  813. }
  814. end = rtas_data_buf + CMO_MAXLENGTH - 2;
  815. ptr = rtas_data_buf + 2; /* step over strlen value */
  816. key = value = ptr;
  817. while (*ptr && (ptr <= end)) {
  818. /* Separate the key and value by replacing '=' with '\0' and
  819. * point the value at the string after the '='
  820. */
  821. if (ptr[0] == '=') {
  822. ptr[0] = '\0';
  823. value = ptr + 1;
  824. } else if (ptr[0] == '\0' || ptr[0] == ',') {
  825. /* Terminate the string containing the key/value pair */
  826. ptr[0] = '\0';
  827. if (key == value) {
  828. pr_debug("Malformed key/value pair\n");
  829. /* Never found a '=', end processing */
  830. break;
  831. }
  832. if (0 == strcmp(key, "CMOPageSize"))
  833. page_order = simple_strtol(value, NULL, 10);
  834. else if (0 == strcmp(key, "PrPSP"))
  835. CMO_PrPSP = simple_strtol(value, NULL, 10);
  836. else if (0 == strcmp(key, "SecPSP"))
  837. CMO_SecPSP = simple_strtol(value, NULL, 10);
  838. value = key = ptr + 1;
  839. }
  840. ptr++;
  841. }
  842. /* Page size is returned as the power of 2 of the page size,
  843. * convert to the page size in bytes before returning
  844. */
  845. CMO_PageSize = 1 << page_order;
  846. pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
  847. if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
  848. pr_info("CMO enabled\n");
  849. pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
  850. CMO_SecPSP);
  851. powerpc_firmware_features |= FW_FEATURE_CMO;
  852. pSeries_coalesce_init();
  853. } else
  854. pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
  855. CMO_SecPSP);
  856. spin_unlock(&rtas_data_buf_lock);
  857. pr_debug(" <- fw_cmo_feature_init()\n");
  858. }
  859. static void __init pseries_add_hw_description(void)
  860. {
  861. struct device_node *dn;
  862. const char *s;
  863. dn = of_find_node_by_path("/openprom");
  864. if (dn) {
  865. if (of_property_read_string(dn, "model", &s) == 0)
  866. seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
  867. of_node_put(dn);
  868. }
  869. dn = of_find_node_by_path("/hypervisor");
  870. if (dn) {
  871. if (of_property_read_string(dn, "compatible", &s) == 0)
  872. seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
  873. of_node_put(dn);
  874. return;
  875. }
  876. if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
  877. of_property_read_bool(of_root, "ibm,fw-net-version"))
  878. seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
  879. }
  880. /*
  881. * Early initialization. Relocation is on but do not reference unbolted pages
  882. */
  883. static void __init pseries_init(void)
  884. {
  885. pr_debug(" -> pseries_init()\n");
  886. pseries_add_hw_description();
  887. #ifdef CONFIG_HVC_CONSOLE
  888. if (firmware_has_feature(FW_FEATURE_LPAR))
  889. hvc_vio_init_early();
  890. #endif
  891. if (firmware_has_feature(FW_FEATURE_XDABR))
  892. ppc_md.set_dabr = pseries_set_xdabr;
  893. else if (firmware_has_feature(FW_FEATURE_DABR))
  894. ppc_md.set_dabr = pseries_set_dabr;
  895. if (firmware_has_feature(FW_FEATURE_SET_MODE))
  896. ppc_md.set_dawr = pseries_set_dawr;
  897. pSeries_cmo_feature_init();
  898. iommu_init_early_pSeries();
  899. pr_debug(" <- pseries_init()\n");
  900. }
  901. /**
  902. * pseries_power_off - tell firmware about how to power off the system.
  903. *
  904. * This function calls either the power-off rtas token in normal cases
  905. * or the ibm,power-off-ups token (if present & requested) in case of
  906. * a power failure. If power-off token is used, power on will only be
  907. * possible with power button press. If ibm,power-off-ups token is used
  908. * it will allow auto poweron after power is restored.
  909. */
  910. static void pseries_power_off(void)
  911. {
  912. int rc;
  913. int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");
  914. if (rtas_flash_term_hook)
  915. rtas_flash_term_hook(SYS_POWER_OFF);
  916. if (rtas_poweron_auto == 0 ||
  917. rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
  918. rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
  919. printk(KERN_INFO "RTAS power-off returned %d\n", rc);
  920. } else {
  921. rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
  922. printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
  923. }
  924. for (;;);
  925. }
  926. static int __init pSeries_probe(void)
  927. {
  928. if (!of_node_is_type(of_root, "chrp"))
  929. return 0;
  930. /* Cell blades firmware claims to be chrp while it's not. Until this
  931. * is fixed, we need to avoid those here.
  932. */
  933. if (of_machine_is_compatible("IBM,CPBW-1.0") ||
  934. of_machine_is_compatible("IBM,CBEA"))
  935. return 0;
  936. pm_power_off = pseries_power_off;
  937. pr_debug("Machine is%s LPAR !\n",
  938. (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
  939. pseries_init();
  940. return 1;
  941. }
  942. static int pSeries_pci_probe_mode(struct pci_bus *bus)
  943. {
  944. if (firmware_has_feature(FW_FEATURE_LPAR))
  945. return PCI_PROBE_DEVTREE;
  946. return PCI_PROBE_NORMAL;
  947. }
  948. struct pci_controller_ops pseries_pci_controller_ops = {
  949. .probe_mode = pSeries_pci_probe_mode,
  950. };
  951. define_machine(pseries) {
  952. .name = "pSeries",
  953. .probe = pSeries_probe,
  954. .setup_arch = pSeries_setup_arch,
  955. .init_IRQ = pseries_init_irq,
  956. .show_cpuinfo = pSeries_show_cpuinfo,
  957. .log_error = pSeries_log_error,
  958. .discover_phbs = pSeries_discover_phbs,
  959. .pcibios_fixup = pSeries_final_fixup,
  960. .restart = rtas_restart,
  961. .halt = rtas_halt,
  962. .panic = pseries_panic,
  963. .get_boot_time = rtas_get_boot_time,
  964. .get_rtc_time = rtas_get_rtc_time,
  965. .set_rtc_time = rtas_set_rtc_time,
  966. .calibrate_decr = generic_calibrate_decr,
  967. .progress = rtas_progress,
  968. .system_reset_exception = pSeries_system_reset_exception,
  969. .machine_check_early = pseries_machine_check_realmode,
  970. .machine_check_exception = pSeries_machine_check_exception,
  971. .machine_check_log_err = pSeries_machine_check_log_err,
  972. #ifdef CONFIG_KEXEC_CORE
  973. .machine_kexec = pseries_machine_kexec,
  974. .kexec_cpu_down = pseries_kexec_cpu_down,
  975. #endif
  976. #ifdef CONFIG_MEMORY_HOTPLUG
  977. .memory_block_size = pseries_memory_block_size,
  978. #endif
  979. };