severity.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * MCE grading rules.
  4. * Copyright 2008, 2009 Intel Corporation.
  5. *
  6. * Author: Andi Kleen
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/seq_file.h>
  10. #include <linux/init.h>
  11. #include <linux/debugfs.h>
  12. #include <linux/uaccess.h>
  13. #include <asm/mce.h>
  14. #include <asm/intel-family.h>
  15. #include <asm/traps.h>
  16. #include <asm/insn.h>
  17. #include <asm/insn-eval.h>
  18. #include "internal.h"
  19. /*
  20. * Grade an mce by severity. In general the most severe ones are processed
  21. * first. Since there are quite a lot of combinations test the bits in a
  22. * table-driven way. The rules are simply processed in order, first
  23. * match wins.
  24. *
  25. * Note this is only used for machine check exceptions, the corrected
  26. * errors use much simpler rules. The exceptions still check for the corrected
  27. * errors, but only to leave them alone for the CMCI handler (except for
  28. * panic situations)
  29. */
  30. enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
  31. enum ser { SER_REQUIRED = 1, NO_SER = 2 };
  32. enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
  33. static struct severity {
  34. u64 mask;
  35. u64 result;
  36. unsigned char sev;
  37. unsigned char mcgmask;
  38. unsigned char mcgres;
  39. unsigned char ser;
  40. unsigned char context;
  41. unsigned char excp;
  42. unsigned char covered;
  43. unsigned char cpu_model;
  44. unsigned char cpu_minstepping;
  45. unsigned char bank_lo, bank_hi;
  46. char *msg;
  47. } severities[] = {
  48. #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
  49. #define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
  50. #define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
  51. #define KERNEL .context = IN_KERNEL
  52. #define USER .context = IN_USER
  53. #define KERNEL_RECOV .context = IN_KERNEL_RECOV
  54. #define SER .ser = SER_REQUIRED
  55. #define NOSER .ser = NO_SER
  56. #define EXCP .excp = EXCP_CONTEXT
  57. #define NOEXCP .excp = NO_EXCP
  58. #define BITCLR(x) .mask = x, .result = 0
  59. #define BITSET(x) .mask = x, .result = x
  60. #define MCGMASK(x, y) .mcgmask = x, .mcgres = y
  61. #define MASK(x, y) .mask = x, .result = y
  62. #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
  63. #define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
  64. #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
  65. #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
  66. MCESEV(
  67. NO, "Invalid",
  68. BITCLR(MCI_STATUS_VAL)
  69. ),
  70. MCESEV(
  71. NO, "Not enabled",
  72. EXCP, BITCLR(MCI_STATUS_EN)
  73. ),
  74. MCESEV(
  75. PANIC, "Processor context corrupt",
  76. BITSET(MCI_STATUS_PCC)
  77. ),
  78. /* When MCIP is not set something is very confused */
  79. MCESEV(
  80. PANIC, "MCIP not set in MCA handler",
  81. EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
  82. ),
  83. /* Neither return not error IP -- no chance to recover -> PANIC */
  84. MCESEV(
  85. PANIC, "Neither restart nor error IP",
  86. EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
  87. ),
  88. MCESEV(
  89. PANIC, "In kernel and no restart IP",
  90. EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
  91. ),
  92. MCESEV(
  93. PANIC, "In kernel and no restart IP",
  94. EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
  95. ),
  96. MCESEV(
  97. KEEP, "Corrected error",
  98. NOSER, BITCLR(MCI_STATUS_UC)
  99. ),
  100. /*
  101. * known AO MCACODs reported via MCE or CMC:
  102. *
  103. * SRAO could be signaled either via a machine check exception or
  104. * CMCI with the corresponding bit S 1 or 0. So we don't need to
  105. * check bit S for SRAO.
  106. */
  107. MCESEV(
  108. AO, "Action optional: memory scrubbing error",
  109. SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
  110. ),
  111. MCESEV(
  112. AO, "Action optional: last level cache writeback error",
  113. SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
  114. ),
  115. /*
  116. * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
  117. * to report uncorrected errors using CMCI with a special signature.
  118. * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
  119. * in one of the memory controller banks.
  120. * Set severity to "AO" for same action as normal patrol scrub error.
  121. */
  122. MCESEV(
  123. AO, "Uncorrected Patrol Scrub Error",
  124. SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
  125. MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
  126. ),
  127. /* ignore OVER for UCNA */
  128. MCESEV(
  129. UCNA, "Uncorrected no action required",
  130. SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
  131. ),
  132. MCESEV(
  133. PANIC, "Illegal combination (UCNA with AR=1)",
  134. SER,
  135. MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
  136. ),
  137. MCESEV(
  138. KEEP, "Non signaled machine check",
  139. SER, BITCLR(MCI_STATUS_S)
  140. ),
  141. MCESEV(
  142. PANIC, "Action required with lost events",
  143. SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
  144. ),
  145. /* known AR MCACODs: */
  146. #ifdef CONFIG_MEMORY_FAILURE
  147. MCESEV(
  148. KEEP, "Action required but unaffected thread is continuable",
  149. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
  150. MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
  151. ),
  152. MCESEV(
  153. AR, "Action required: data load in error recoverable area of kernel",
  154. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
  155. KERNEL_RECOV
  156. ),
  157. MCESEV(
  158. AR, "Action required: data load error in a user process",
  159. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
  160. USER
  161. ),
  162. MCESEV(
  163. AR, "Action required: instruction fetch error in a user process",
  164. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
  165. USER
  166. ),
  167. MCESEV(
  168. PANIC, "Data load in unrecoverable area of kernel",
  169. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
  170. KERNEL
  171. ),
  172. MCESEV(
  173. PANIC, "Instruction fetch error in kernel",
  174. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
  175. KERNEL
  176. ),
  177. #endif
  178. MCESEV(
  179. PANIC, "Action required: unknown MCACOD",
  180. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
  181. ),
  182. MCESEV(
  183. SOME, "Action optional: unknown MCACOD",
  184. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
  185. ),
  186. MCESEV(
  187. SOME, "Action optional with lost events",
  188. SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
  189. ),
  190. MCESEV(
  191. PANIC, "Overflowed uncorrected",
  192. BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
  193. ),
  194. MCESEV(
  195. UC, "Uncorrected",
  196. BITSET(MCI_STATUS_UC)
  197. ),
  198. MCESEV(
  199. SOME, "No match",
  200. BITSET(0)
  201. ) /* always matches. keep at end */
  202. };
  203. #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
  204. (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
  205. static bool is_copy_from_user(struct pt_regs *regs)
  206. {
  207. u8 insn_buf[MAX_INSN_SIZE];
  208. unsigned long addr;
  209. struct insn insn;
  210. int ret;
  211. if (!regs)
  212. return false;
  213. if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
  214. return false;
  215. ret = insn_decode_kernel(&insn, insn_buf);
  216. if (ret < 0)
  217. return false;
  218. switch (insn.opcode.value) {
  219. /* MOV mem,reg */
  220. case 0x8A: case 0x8B:
  221. /* MOVZ mem,reg */
  222. case 0xB60F: case 0xB70F:
  223. addr = (unsigned long)insn_get_addr_ref(&insn, regs);
  224. break;
  225. /* REP MOVS */
  226. case 0xA4: case 0xA5:
  227. addr = regs->si;
  228. break;
  229. default:
  230. return false;
  231. }
  232. if (fault_in_kernel_space(addr))
  233. return false;
  234. current->mce_vaddr = (void __user *)addr;
  235. return true;
  236. }
  237. /*
  238. * If mcgstatus indicated that ip/cs on the stack were
  239. * no good, then "m->cs" will be zero and we will have
  240. * to assume the worst case (IN_KERNEL) as we actually
  241. * have no idea what we were executing when the machine
  242. * check hit.
  243. * If we do have a good "m->cs" (or a faked one in the
  244. * case we were executing in VM86 mode) we can use it to
  245. * distinguish an exception taken in user from from one
  246. * taken in the kernel.
  247. */
  248. static noinstr int error_context(struct mce *m, struct pt_regs *regs)
  249. {
  250. int fixup_type;
  251. bool copy_user;
  252. if ((m->cs & 3) == 3)
  253. return IN_USER;
  254. if (!mc_recoverable(m->mcgstatus))
  255. return IN_KERNEL;
  256. /* Allow instrumentation around external facilities usage. */
  257. instrumentation_begin();
  258. fixup_type = ex_get_fixup_type(m->ip);
  259. copy_user = is_copy_from_user(regs);
  260. instrumentation_end();
  261. switch (fixup_type) {
  262. case EX_TYPE_UACCESS:
  263. case EX_TYPE_COPY:
  264. if (!copy_user)
  265. return IN_KERNEL;
  266. m->kflags |= MCE_IN_KERNEL_COPYIN;
  267. fallthrough;
  268. case EX_TYPE_FAULT_MCE_SAFE:
  269. case EX_TYPE_DEFAULT_MCE_SAFE:
  270. m->kflags |= MCE_IN_KERNEL_RECOV;
  271. return IN_KERNEL_RECOV;
  272. default:
  273. return IN_KERNEL;
  274. }
  275. }
  276. /* See AMD PPR(s) section Machine Check Error Handling. */
  277. static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
  278. {
  279. char *panic_msg = NULL;
  280. int ret;
  281. /*
  282. * Default return value: Action required, the error must be handled
  283. * immediately.
  284. */
  285. ret = MCE_AR_SEVERITY;
  286. /* Processor Context Corrupt, no need to fumble too much, die! */
  287. if (m->status & MCI_STATUS_PCC) {
  288. panic_msg = "Processor Context Corrupt";
  289. ret = MCE_PANIC_SEVERITY;
  290. goto out;
  291. }
  292. if (m->status & MCI_STATUS_DEFERRED) {
  293. ret = MCE_DEFERRED_SEVERITY;
  294. goto out;
  295. }
  296. /*
  297. * If the UC bit is not set, the system either corrected or deferred
  298. * the error. No action will be required after logging the error.
  299. */
  300. if (!(m->status & MCI_STATUS_UC)) {
  301. ret = MCE_KEEP_SEVERITY;
  302. goto out;
  303. }
  304. /*
  305. * On MCA overflow, without the MCA overflow recovery feature the
  306. * system will not be able to recover, panic.
  307. */
  308. if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
  309. panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
  310. ret = MCE_PANIC_SEVERITY;
  311. goto out;
  312. }
  313. if (!mce_flags.succor) {
  314. panic_msg = "Uncorrected error without MCA Recovery";
  315. ret = MCE_PANIC_SEVERITY;
  316. goto out;
  317. }
  318. if (error_context(m, regs) == IN_KERNEL) {
  319. panic_msg = "Uncorrected unrecoverable error in kernel context";
  320. ret = MCE_PANIC_SEVERITY;
  321. }
  322. out:
  323. if (msg && panic_msg)
  324. *msg = panic_msg;
  325. return ret;
  326. }
  327. static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
  328. {
  329. enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
  330. enum context ctx = error_context(m, regs);
  331. struct severity *s;
  332. for (s = severities;; s++) {
  333. if ((m->status & s->mask) != s->result)
  334. continue;
  335. if ((m->mcgstatus & s->mcgmask) != s->mcgres)
  336. continue;
  337. if (s->ser == SER_REQUIRED && !mca_cfg.ser)
  338. continue;
  339. if (s->ser == NO_SER && mca_cfg.ser)
  340. continue;
  341. if (s->context && ctx != s->context)
  342. continue;
  343. if (s->excp && excp != s->excp)
  344. continue;
  345. if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
  346. continue;
  347. if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
  348. continue;
  349. if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
  350. continue;
  351. if (msg)
  352. *msg = s->msg;
  353. s->covered = 1;
  354. if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
  355. return MCE_PANIC_SEVERITY;
  356. return s->sev;
  357. }
  358. }
  359. int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
  360. {
  361. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
  362. boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
  363. return mce_severity_amd(m, regs, msg, is_excp);
  364. else
  365. return mce_severity_intel(m, regs, msg, is_excp);
  366. }
  367. #ifdef CONFIG_DEBUG_FS
  368. static void *s_start(struct seq_file *f, loff_t *pos)
  369. {
  370. if (*pos >= ARRAY_SIZE(severities))
  371. return NULL;
  372. return &severities[*pos];
  373. }
  374. static void *s_next(struct seq_file *f, void *data, loff_t *pos)
  375. {
  376. if (++(*pos) >= ARRAY_SIZE(severities))
  377. return NULL;
  378. return &severities[*pos];
  379. }
  380. static void s_stop(struct seq_file *f, void *data)
  381. {
  382. }
  383. static int s_show(struct seq_file *f, void *data)
  384. {
  385. struct severity *ser = data;
  386. seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
  387. return 0;
  388. }
  389. static const struct seq_operations severities_seq_ops = {
  390. .start = s_start,
  391. .next = s_next,
  392. .stop = s_stop,
  393. .show = s_show,
  394. };
  395. static int severities_coverage_open(struct inode *inode, struct file *file)
  396. {
  397. return seq_open(file, &severities_seq_ops);
  398. }
  399. static ssize_t severities_coverage_write(struct file *file,
  400. const char __user *ubuf,
  401. size_t count, loff_t *ppos)
  402. {
  403. int i;
  404. for (i = 0; i < ARRAY_SIZE(severities); i++)
  405. severities[i].covered = 0;
  406. return count;
  407. }
  408. static const struct file_operations severities_coverage_fops = {
  409. .open = severities_coverage_open,
  410. .release = seq_release,
  411. .read = seq_read,
  412. .write = severities_coverage_write,
  413. .llseek = seq_lseek,
  414. };
  415. static int __init severities_debugfs_init(void)
  416. {
  417. struct dentry *dmce;
  418. dmce = mce_get_debugfs_dir();
  419. debugfs_create_file("severities-coverage", 0444, dmce, NULL,
  420. &severities_coverage_fops);
  421. return 0;
  422. }
  423. late_initcall(severities_debugfs_init);
  424. #endif /* CONFIG_DEBUG_FS */