mce.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Machine check exception handling.
  4. *
  5. * Copyright 2013 IBM Corporation
  6. * Author: Mahesh Salgaonkar <[email protected]>
  7. */
  8. #undef DEBUG
  9. #define pr_fmt(fmt) "mce: " fmt
  10. #include <linux/hardirq.h>
  11. #include <linux/types.h>
  12. #include <linux/ptrace.h>
  13. #include <linux/percpu.h>
  14. #include <linux/export.h>
  15. #include <linux/irq_work.h>
  16. #include <linux/extable.h>
  17. #include <linux/ftrace.h>
  18. #include <linux/memblock.h>
  19. #include <linux/of.h>
  20. #include <asm/interrupt.h>
  21. #include <asm/machdep.h>
  22. #include <asm/mce.h>
  23. #include <asm/nmi.h>
  24. #include "setup.h"
  25. static void machine_check_ue_event(struct machine_check_event *evt);
  26. static void machine_process_ue_event(struct work_struct *work);
  27. static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
  28. static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
  29. int mce_register_notifier(struct notifier_block *nb)
  30. {
  31. return blocking_notifier_chain_register(&mce_notifier_list, nb);
  32. }
  33. EXPORT_SYMBOL_GPL(mce_register_notifier);
  34. int mce_unregister_notifier(struct notifier_block *nb)
  35. {
  36. return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
  37. }
  38. EXPORT_SYMBOL_GPL(mce_unregister_notifier);
  39. static void mce_set_error_info(struct machine_check_event *mce,
  40. struct mce_error_info *mce_err)
  41. {
  42. mce->error_type = mce_err->error_type;
  43. switch (mce_err->error_type) {
  44. case MCE_ERROR_TYPE_UE:
  45. mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
  46. break;
  47. case MCE_ERROR_TYPE_SLB:
  48. mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
  49. break;
  50. case MCE_ERROR_TYPE_ERAT:
  51. mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
  52. break;
  53. case MCE_ERROR_TYPE_TLB:
  54. mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
  55. break;
  56. case MCE_ERROR_TYPE_USER:
  57. mce->u.user_error.user_error_type = mce_err->u.user_error_type;
  58. break;
  59. case MCE_ERROR_TYPE_RA:
  60. mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
  61. break;
  62. case MCE_ERROR_TYPE_LINK:
  63. mce->u.link_error.link_error_type = mce_err->u.link_error_type;
  64. break;
  65. case MCE_ERROR_TYPE_UNKNOWN:
  66. default:
  67. break;
  68. }
  69. }
  70. void mce_irq_work_queue(void)
  71. {
  72. /* Raise decrementer interrupt */
  73. arch_irq_work_raise();
  74. set_mce_pending_irq_work();
  75. }
  76. /*
  77. * Decode and save high level MCE information into per cpu buffer which
  78. * is an array of machine_check_event structure.
  79. */
  80. void save_mce_event(struct pt_regs *regs, long handled,
  81. struct mce_error_info *mce_err,
  82. uint64_t nip, uint64_t addr, uint64_t phys_addr)
  83. {
  84. int index = local_paca->mce_info->mce_nest_count++;
  85. struct machine_check_event *mce;
  86. mce = &local_paca->mce_info->mce_event[index];
  87. /*
  88. * Return if we don't have enough space to log mce event.
  89. * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
  90. * the check below will stop buffer overrun.
  91. */
  92. if (index >= MAX_MC_EVT)
  93. return;
  94. /* Populate generic machine check info */
  95. mce->version = MCE_V1;
  96. mce->srr0 = nip;
  97. mce->srr1 = regs->msr;
  98. mce->gpr3 = regs->gpr[3];
  99. mce->in_use = 1;
  100. mce->cpu = get_paca()->paca_index;
  101. /* Mark it recovered if we have handled it and MSR(RI=1). */
  102. if (handled && (regs->msr & MSR_RI))
  103. mce->disposition = MCE_DISPOSITION_RECOVERED;
  104. else
  105. mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
  106. mce->initiator = mce_err->initiator;
  107. mce->severity = mce_err->severity;
  108. mce->sync_error = mce_err->sync_error;
  109. mce->error_class = mce_err->error_class;
  110. /*
  111. * Populate the mce error_type and type-specific error_type.
  112. */
  113. mce_set_error_info(mce, mce_err);
  114. if (mce->error_type == MCE_ERROR_TYPE_UE)
  115. mce->u.ue_error.ignore_event = mce_err->ignore_event;
  116. if (!addr)
  117. return;
  118. if (mce->error_type == MCE_ERROR_TYPE_TLB) {
  119. mce->u.tlb_error.effective_address_provided = true;
  120. mce->u.tlb_error.effective_address = addr;
  121. } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
  122. mce->u.slb_error.effective_address_provided = true;
  123. mce->u.slb_error.effective_address = addr;
  124. } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
  125. mce->u.erat_error.effective_address_provided = true;
  126. mce->u.erat_error.effective_address = addr;
  127. } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
  128. mce->u.user_error.effective_address_provided = true;
  129. mce->u.user_error.effective_address = addr;
  130. } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
  131. mce->u.ra_error.effective_address_provided = true;
  132. mce->u.ra_error.effective_address = addr;
  133. } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
  134. mce->u.link_error.effective_address_provided = true;
  135. mce->u.link_error.effective_address = addr;
  136. } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
  137. mce->u.ue_error.effective_address_provided = true;
  138. mce->u.ue_error.effective_address = addr;
  139. if (phys_addr != ULONG_MAX) {
  140. mce->u.ue_error.physical_address_provided = true;
  141. mce->u.ue_error.physical_address = phys_addr;
  142. machine_check_ue_event(mce);
  143. }
  144. }
  145. return;
  146. }
  147. /*
  148. * get_mce_event:
  149. * mce Pointer to machine_check_event structure to be filled.
  150. * release Flag to indicate whether to free the event slot or not.
  151. * 0 <= do not release the mce event. Caller will invoke
  152. * release_mce_event() once event has been consumed.
  153. * 1 <= release the slot.
  154. *
  155. * return 1 = success
  156. * 0 = failure
  157. *
  158. * get_mce_event() will be called by platform specific machine check
  159. * handle routine and in KVM.
  160. * When we call get_mce_event(), we are still in interrupt context and
  161. * preemption will not be scheduled until ret_from_expect() routine
  162. * is called.
  163. */
  164. int get_mce_event(struct machine_check_event *mce, bool release)
  165. {
  166. int index = local_paca->mce_info->mce_nest_count - 1;
  167. struct machine_check_event *mc_evt;
  168. int ret = 0;
  169. /* Sanity check */
  170. if (index < 0)
  171. return ret;
  172. /* Check if we have MCE info to process. */
  173. if (index < MAX_MC_EVT) {
  174. mc_evt = &local_paca->mce_info->mce_event[index];
  175. /* Copy the event structure and release the original */
  176. if (mce)
  177. *mce = *mc_evt;
  178. if (release)
  179. mc_evt->in_use = 0;
  180. ret = 1;
  181. }
  182. /* Decrement the count to free the slot. */
  183. if (release)
  184. local_paca->mce_info->mce_nest_count--;
  185. return ret;
  186. }
  187. void release_mce_event(void)
  188. {
  189. get_mce_event(NULL, true);
  190. }
  191. static void machine_check_ue_work(void)
  192. {
  193. schedule_work(&mce_ue_event_work);
  194. }
  195. /*
  196. * Queue up the MCE event which then can be handled later.
  197. */
  198. static void machine_check_ue_event(struct machine_check_event *evt)
  199. {
  200. int index;
  201. index = local_paca->mce_info->mce_ue_count++;
  202. /* If queue is full, just return for now. */
  203. if (index >= MAX_MC_EVT) {
  204. local_paca->mce_info->mce_ue_count--;
  205. return;
  206. }
  207. memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
  208. evt, sizeof(*evt));
  209. /* Queue work to process this event later. */
  210. mce_irq_work_queue();
  211. }
  212. /*
  213. * Queue up the MCE event which then can be handled later.
  214. */
  215. void machine_check_queue_event(void)
  216. {
  217. int index;
  218. struct machine_check_event evt;
  219. if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
  220. return;
  221. index = local_paca->mce_info->mce_queue_count++;
  222. /* If queue is full, just return for now. */
  223. if (index >= MAX_MC_EVT) {
  224. local_paca->mce_info->mce_queue_count--;
  225. return;
  226. }
  227. memcpy(&local_paca->mce_info->mce_event_queue[index],
  228. &evt, sizeof(evt));
  229. mce_irq_work_queue();
  230. }
  231. void mce_common_process_ue(struct pt_regs *regs,
  232. struct mce_error_info *mce_err)
  233. {
  234. const struct exception_table_entry *entry;
  235. entry = search_kernel_exception_table(regs->nip);
  236. if (entry) {
  237. mce_err->ignore_event = true;
  238. regs_set_return_ip(regs, extable_fixup(entry));
  239. }
  240. }
  241. /*
  242. * process pending MCE event from the mce event queue. This function will be
  243. * called during syscall exit.
  244. */
  245. static void machine_process_ue_event(struct work_struct *work)
  246. {
  247. int index;
  248. struct machine_check_event *evt;
  249. while (local_paca->mce_info->mce_ue_count > 0) {
  250. index = local_paca->mce_info->mce_ue_count - 1;
  251. evt = &local_paca->mce_info->mce_ue_event_queue[index];
  252. blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
  253. #ifdef CONFIG_MEMORY_FAILURE
  254. /*
  255. * This should probably queued elsewhere, but
  256. * oh! well
  257. *
  258. * Don't report this machine check because the caller has a
  259. * asked us to ignore the event, it has a fixup handler which
  260. * will do the appropriate error handling and reporting.
  261. */
  262. if (evt->error_type == MCE_ERROR_TYPE_UE) {
  263. if (evt->u.ue_error.ignore_event) {
  264. local_paca->mce_info->mce_ue_count--;
  265. continue;
  266. }
  267. if (evt->u.ue_error.physical_address_provided) {
  268. unsigned long pfn;
  269. pfn = evt->u.ue_error.physical_address >>
  270. PAGE_SHIFT;
  271. memory_failure(pfn, 0);
  272. } else
  273. pr_warn("Failed to identify bad address from "
  274. "where the uncorrectable error (UE) "
  275. "was generated\n");
  276. }
  277. #endif
  278. local_paca->mce_info->mce_ue_count--;
  279. }
  280. }
  281. /*
  282. * process pending MCE event from the mce event queue. This function will be
  283. * called during syscall exit.
  284. */
  285. static void machine_check_process_queued_event(void)
  286. {
  287. int index;
  288. struct machine_check_event *evt;
  289. add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
  290. /*
  291. * For now just print it to console.
  292. * TODO: log this error event to FSP or nvram.
  293. */
  294. while (local_paca->mce_info->mce_queue_count > 0) {
  295. index = local_paca->mce_info->mce_queue_count - 1;
  296. evt = &local_paca->mce_info->mce_event_queue[index];
  297. if (evt->error_type == MCE_ERROR_TYPE_UE &&
  298. evt->u.ue_error.ignore_event) {
  299. local_paca->mce_info->mce_queue_count--;
  300. continue;
  301. }
  302. machine_check_print_event_info(evt, false, false);
  303. local_paca->mce_info->mce_queue_count--;
  304. }
  305. }
  306. void set_mce_pending_irq_work(void)
  307. {
  308. local_paca->mce_pending_irq_work = 1;
  309. }
  310. void clear_mce_pending_irq_work(void)
  311. {
  312. local_paca->mce_pending_irq_work = 0;
  313. }
  314. void mce_run_irq_context_handlers(void)
  315. {
  316. if (unlikely(local_paca->mce_pending_irq_work)) {
  317. if (ppc_md.machine_check_log_err)
  318. ppc_md.machine_check_log_err();
  319. machine_check_process_queued_event();
  320. machine_check_ue_work();
  321. clear_mce_pending_irq_work();
  322. }
  323. }
  324. void machine_check_print_event_info(struct machine_check_event *evt,
  325. bool user_mode, bool in_guest)
  326. {
  327. const char *level, *sevstr, *subtype, *err_type, *initiator;
  328. uint64_t ea = 0, pa = 0;
  329. int n = 0;
  330. char dar_str[50];
  331. char pa_str[50];
  332. static const char *mc_ue_types[] = {
  333. "Indeterminate",
  334. "Instruction fetch",
  335. "Page table walk ifetch",
  336. "Load/Store",
  337. "Page table walk Load/Store",
  338. };
  339. static const char *mc_slb_types[] = {
  340. "Indeterminate",
  341. "Parity",
  342. "Multihit",
  343. };
  344. static const char *mc_erat_types[] = {
  345. "Indeterminate",
  346. "Parity",
  347. "Multihit",
  348. };
  349. static const char *mc_tlb_types[] = {
  350. "Indeterminate",
  351. "Parity",
  352. "Multihit",
  353. };
  354. static const char *mc_user_types[] = {
  355. "Indeterminate",
  356. "tlbie(l) invalid",
  357. "scv invalid",
  358. };
  359. static const char *mc_ra_types[] = {
  360. "Indeterminate",
  361. "Instruction fetch (bad)",
  362. "Instruction fetch (foreign/control memory)",
  363. "Page table walk ifetch (bad)",
  364. "Page table walk ifetch (foreign/control memory)",
  365. "Load (bad)",
  366. "Store (bad)",
  367. "Page table walk Load/Store (bad)",
  368. "Page table walk Load/Store (foreign/control memory)",
  369. "Load/Store (foreign/control memory)",
  370. };
  371. static const char *mc_link_types[] = {
  372. "Indeterminate",
  373. "Instruction fetch (timeout)",
  374. "Page table walk ifetch (timeout)",
  375. "Load (timeout)",
  376. "Store (timeout)",
  377. "Page table walk Load/Store (timeout)",
  378. };
  379. static const char *mc_error_class[] = {
  380. "Unknown",
  381. "Hardware error",
  382. "Probable Hardware error (some chance of software cause)",
  383. "Software error",
  384. "Probable Software error (some chance of hardware cause)",
  385. };
  386. /* Print things out */
  387. if (evt->version != MCE_V1) {
  388. pr_err("Machine Check Exception, Unknown event version %d !\n",
  389. evt->version);
  390. return;
  391. }
  392. switch (evt->severity) {
  393. case MCE_SEV_NO_ERROR:
  394. level = KERN_INFO;
  395. sevstr = "Harmless";
  396. break;
  397. case MCE_SEV_WARNING:
  398. level = KERN_WARNING;
  399. sevstr = "Warning";
  400. break;
  401. case MCE_SEV_SEVERE:
  402. level = KERN_ERR;
  403. sevstr = "Severe";
  404. break;
  405. case MCE_SEV_FATAL:
  406. default:
  407. level = KERN_ERR;
  408. sevstr = "Fatal";
  409. break;
  410. }
  411. switch(evt->initiator) {
  412. case MCE_INITIATOR_CPU:
  413. initiator = "CPU";
  414. break;
  415. case MCE_INITIATOR_PCI:
  416. initiator = "PCI";
  417. break;
  418. case MCE_INITIATOR_ISA:
  419. initiator = "ISA";
  420. break;
  421. case MCE_INITIATOR_MEMORY:
  422. initiator = "Memory";
  423. break;
  424. case MCE_INITIATOR_POWERMGM:
  425. initiator = "Power Management";
  426. break;
  427. case MCE_INITIATOR_UNKNOWN:
  428. default:
  429. initiator = "Unknown";
  430. break;
  431. }
  432. switch (evt->error_type) {
  433. case MCE_ERROR_TYPE_UE:
  434. err_type = "UE";
  435. subtype = evt->u.ue_error.ue_error_type <
  436. ARRAY_SIZE(mc_ue_types) ?
  437. mc_ue_types[evt->u.ue_error.ue_error_type]
  438. : "Unknown";
  439. if (evt->u.ue_error.effective_address_provided)
  440. ea = evt->u.ue_error.effective_address;
  441. if (evt->u.ue_error.physical_address_provided)
  442. pa = evt->u.ue_error.physical_address;
  443. break;
  444. case MCE_ERROR_TYPE_SLB:
  445. err_type = "SLB";
  446. subtype = evt->u.slb_error.slb_error_type <
  447. ARRAY_SIZE(mc_slb_types) ?
  448. mc_slb_types[evt->u.slb_error.slb_error_type]
  449. : "Unknown";
  450. if (evt->u.slb_error.effective_address_provided)
  451. ea = evt->u.slb_error.effective_address;
  452. break;
  453. case MCE_ERROR_TYPE_ERAT:
  454. err_type = "ERAT";
  455. subtype = evt->u.erat_error.erat_error_type <
  456. ARRAY_SIZE(mc_erat_types) ?
  457. mc_erat_types[evt->u.erat_error.erat_error_type]
  458. : "Unknown";
  459. if (evt->u.erat_error.effective_address_provided)
  460. ea = evt->u.erat_error.effective_address;
  461. break;
  462. case MCE_ERROR_TYPE_TLB:
  463. err_type = "TLB";
  464. subtype = evt->u.tlb_error.tlb_error_type <
  465. ARRAY_SIZE(mc_tlb_types) ?
  466. mc_tlb_types[evt->u.tlb_error.tlb_error_type]
  467. : "Unknown";
  468. if (evt->u.tlb_error.effective_address_provided)
  469. ea = evt->u.tlb_error.effective_address;
  470. break;
  471. case MCE_ERROR_TYPE_USER:
  472. err_type = "User";
  473. subtype = evt->u.user_error.user_error_type <
  474. ARRAY_SIZE(mc_user_types) ?
  475. mc_user_types[evt->u.user_error.user_error_type]
  476. : "Unknown";
  477. if (evt->u.user_error.effective_address_provided)
  478. ea = evt->u.user_error.effective_address;
  479. break;
  480. case MCE_ERROR_TYPE_RA:
  481. err_type = "Real address";
  482. subtype = evt->u.ra_error.ra_error_type <
  483. ARRAY_SIZE(mc_ra_types) ?
  484. mc_ra_types[evt->u.ra_error.ra_error_type]
  485. : "Unknown";
  486. if (evt->u.ra_error.effective_address_provided)
  487. ea = evt->u.ra_error.effective_address;
  488. break;
  489. case MCE_ERROR_TYPE_LINK:
  490. err_type = "Link";
  491. subtype = evt->u.link_error.link_error_type <
  492. ARRAY_SIZE(mc_link_types) ?
  493. mc_link_types[evt->u.link_error.link_error_type]
  494. : "Unknown";
  495. if (evt->u.link_error.effective_address_provided)
  496. ea = evt->u.link_error.effective_address;
  497. break;
  498. case MCE_ERROR_TYPE_DCACHE:
  499. err_type = "D-Cache";
  500. subtype = "Unknown";
  501. break;
  502. case MCE_ERROR_TYPE_ICACHE:
  503. err_type = "I-Cache";
  504. subtype = "Unknown";
  505. break;
  506. default:
  507. case MCE_ERROR_TYPE_UNKNOWN:
  508. err_type = "Unknown";
  509. subtype = "";
  510. break;
  511. }
  512. dar_str[0] = pa_str[0] = '\0';
  513. if (ea && evt->srr0 != ea) {
  514. /* Load/Store address */
  515. n = sprintf(dar_str, "DAR: %016llx ", ea);
  516. if (pa)
  517. sprintf(dar_str + n, "paddr: %016llx ", pa);
  518. } else if (pa) {
  519. sprintf(pa_str, " paddr: %016llx", pa);
  520. }
  521. printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
  522. level, evt->cpu, sevstr, in_guest ? "Guest" : "",
  523. err_type, subtype, dar_str,
  524. evt->disposition == MCE_DISPOSITION_RECOVERED ?
  525. "Recovered" : "Not recovered");
  526. if (in_guest || user_mode) {
  527. printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
  528. level, evt->cpu, current->pid, current->comm,
  529. in_guest ? "Guest " : "", evt->srr0, pa_str);
  530. } else {
  531. printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
  532. level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
  533. }
  534. printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
  535. subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
  536. mc_error_class[evt->error_class] : "Unknown";
  537. printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
  538. #ifdef CONFIG_PPC_64S_HASH_MMU
  539. /* Display faulty slb contents for SLB errors. */
  540. if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
  541. slb_dump_contents(local_paca->mce_faulty_slbs);
  542. #endif
  543. }
  544. EXPORT_SYMBOL_GPL(machine_check_print_event_info);
  545. /*
  546. * This function is called in real mode. Strictly no printk's please.
  547. *
  548. * regs->nip and regs->msr contains srr0 and ssr1.
  549. */
  550. DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
  551. {
  552. long handled = 0;
  553. hv_nmi_check_nonrecoverable(regs);
  554. /*
  555. * See if platform is capable of handling machine check.
  556. */
  557. if (ppc_md.machine_check_early)
  558. handled = ppc_md.machine_check_early(regs);
  559. return handled;
  560. }
  561. /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
  562. static enum {
  563. DTRIG_UNKNOWN,
  564. DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
  565. DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
  566. } hmer_debug_trig_function;
  567. static int init_debug_trig_function(void)
  568. {
  569. int pvr;
  570. struct device_node *cpun;
  571. struct property *prop = NULL;
  572. const char *str;
  573. /* First look in the device tree */
  574. preempt_disable();
  575. cpun = of_get_cpu_node(smp_processor_id(), NULL);
  576. if (cpun) {
  577. of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
  578. prop, str) {
  579. if (strcmp(str, "bit17-vector-ci-load") == 0)
  580. hmer_debug_trig_function = DTRIG_VECTOR_CI;
  581. else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
  582. hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
  583. }
  584. of_node_put(cpun);
  585. }
  586. preempt_enable();
  587. /* If we found the property, don't look at PVR */
  588. if (prop)
  589. goto out;
  590. pvr = mfspr(SPRN_PVR);
  591. /* Check for POWER9 Nimbus (scale-out) */
  592. if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
  593. /* DD2.2 and later */
  594. if ((pvr & 0xfff) >= 0x202)
  595. hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
  596. /* DD2.0 and DD2.1 - used for vector CI load emulation */
  597. else if ((pvr & 0xfff) >= 0x200)
  598. hmer_debug_trig_function = DTRIG_VECTOR_CI;
  599. }
  600. out:
  601. switch (hmer_debug_trig_function) {
  602. case DTRIG_VECTOR_CI:
  603. pr_debug("HMI debug trigger used for vector CI load\n");
  604. break;
  605. case DTRIG_SUSPEND_ESCAPE:
  606. pr_debug("HMI debug trigger used for TM suspend escape\n");
  607. break;
  608. default:
  609. break;
  610. }
  611. return 0;
  612. }
  613. __initcall(init_debug_trig_function);
  614. /*
  615. * Handle HMIs that occur as a result of a debug trigger.
  616. * Return values:
  617. * -1 means this is not a HMI cause that we know about
  618. * 0 means no further handling is required
  619. * 1 means further handling is required
  620. */
  621. long hmi_handle_debugtrig(struct pt_regs *regs)
  622. {
  623. unsigned long hmer = mfspr(SPRN_HMER);
  624. long ret = 0;
  625. /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
  626. if (!((hmer & HMER_DEBUG_TRIG)
  627. && hmer_debug_trig_function != DTRIG_UNKNOWN))
  628. return -1;
  629. hmer &= ~HMER_DEBUG_TRIG;
  630. /* HMER is a write-AND register */
  631. mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
  632. switch (hmer_debug_trig_function) {
  633. case DTRIG_VECTOR_CI:
  634. /*
  635. * Now to avoid problems with soft-disable we
  636. * only do the emulation if we are coming from
  637. * host user space
  638. */
  639. if (regs && user_mode(regs))
  640. ret = local_paca->hmi_p9_special_emu = 1;
  641. break;
  642. default:
  643. break;
  644. }
  645. /*
  646. * See if any other HMI causes remain to be handled
  647. */
  648. if (hmer & mfspr(SPRN_HMEER))
  649. return -1;
  650. return ret;
  651. }
  652. /*
  653. * Return values:
  654. */
  655. DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
  656. {
  657. int ret;
  658. local_paca->hmi_irqs++;
  659. ret = hmi_handle_debugtrig(regs);
  660. if (ret >= 0)
  661. return ret;
  662. wait_for_subcore_guest_exit();
  663. if (ppc_md.hmi_exception_early)
  664. ppc_md.hmi_exception_early(regs);
  665. wait_for_tb_resync();
  666. return 1;
  667. }
  668. void __init mce_init(void)
  669. {
  670. struct mce_info *mce_info;
  671. u64 limit;
  672. int i;
  673. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  674. for_each_possible_cpu(i) {
  675. mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
  676. __alignof__(*mce_info),
  677. MEMBLOCK_LOW_LIMIT,
  678. limit, early_cpu_to_node(i));
  679. if (!mce_info)
  680. goto err;
  681. paca_ptrs[i]->mce_info = mce_info;
  682. }
  683. return;
  684. err:
  685. panic("Failed to allocate memory for MCE event data\n");
  686. }