skx_common.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. *
  4. * Shared code by both skx_edac and i10nm_edac. Originally split out
  5. * from the skx_edac driver.
  6. *
  7. * This file is linked into both skx_edac and i10nm_edac drivers. In
  8. * order to avoid link errors, this file must be like a pure library
  9. * without including symbols and defines which would otherwise conflict,
  10. * when linked once into a module and into a built-in object, at the
  11. * same time. For example, __this_module symbol references when that
  12. * file is being linked into a built-in object.
  13. *
  14. * Copyright (c) 2018, Intel Corporation.
  15. */
  16. #include <linux/acpi.h>
  17. #include <linux/dmi.h>
  18. #include <linux/adxl.h>
  19. #include <acpi/nfit.h>
  20. #include <asm/mce.h>
  21. #include "edac_module.h"
  22. #include "skx_common.h"
  23. static const char * const component_names[] = {
  24. [INDEX_SOCKET] = "ProcessorSocketId",
  25. [INDEX_MEMCTRL] = "MemoryControllerId",
  26. [INDEX_CHANNEL] = "ChannelId",
  27. [INDEX_DIMM] = "DimmSlotId",
  28. [INDEX_CS] = "ChipSelect",
  29. [INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
  30. [INDEX_NM_CHANNEL] = "NmChannelId",
  31. [INDEX_NM_DIMM] = "NmDimmSlotId",
  32. [INDEX_NM_CS] = "NmChipSelect",
  33. };
  34. static int component_indices[ARRAY_SIZE(component_names)];
  35. static int adxl_component_count;
  36. static const char * const *adxl_component_names;
  37. static u64 *adxl_values;
  38. static char *adxl_msg;
  39. static unsigned long adxl_nm_bitmap;
  40. static char skx_msg[MSG_SIZE];
  41. static skx_decode_f driver_decode;
  42. static skx_show_retry_log_f skx_show_retry_rd_err_log;
  43. static u64 skx_tolm, skx_tohm;
  44. static LIST_HEAD(dev_edac_list);
  45. static bool skx_mem_cfg_2lm;
  46. int __init skx_adxl_get(void)
  47. {
  48. const char * const *names;
  49. int i, j;
  50. names = adxl_get_component_names();
  51. if (!names) {
  52. skx_printk(KERN_NOTICE, "No firmware support for address translation.\n");
  53. return -ENODEV;
  54. }
  55. for (i = 0; i < INDEX_MAX; i++) {
  56. for (j = 0; names[j]; j++) {
  57. if (!strcmp(component_names[i], names[j])) {
  58. component_indices[i] = j;
  59. if (i >= INDEX_NM_FIRST)
  60. adxl_nm_bitmap |= 1 << i;
  61. break;
  62. }
  63. }
  64. if (!names[j] && i < INDEX_NM_FIRST)
  65. goto err;
  66. }
  67. if (skx_mem_cfg_2lm) {
  68. if (!adxl_nm_bitmap)
  69. skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
  70. else
  71. edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
  72. }
  73. adxl_component_names = names;
  74. while (*names++)
  75. adxl_component_count++;
  76. adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
  77. GFP_KERNEL);
  78. if (!adxl_values) {
  79. adxl_component_count = 0;
  80. return -ENOMEM;
  81. }
  82. adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
  83. if (!adxl_msg) {
  84. adxl_component_count = 0;
  85. kfree(adxl_values);
  86. return -ENOMEM;
  87. }
  88. return 0;
  89. err:
  90. skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
  91. component_names[i]);
  92. for (j = 0; names[j]; j++)
  93. skx_printk(KERN_CONT, "%s ", names[j]);
  94. skx_printk(KERN_CONT, "\n");
  95. return -ENODEV;
  96. }
  97. void __exit skx_adxl_put(void)
  98. {
  99. kfree(adxl_values);
  100. kfree(adxl_msg);
  101. }
  102. static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
  103. {
  104. struct skx_dev *d;
  105. int i, len = 0;
  106. if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
  107. res->addr < BIT_ULL(32))) {
  108. edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
  109. return false;
  110. }
  111. if (adxl_decode(res->addr, adxl_values)) {
  112. edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
  113. return false;
  114. }
  115. res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
  116. if (error_in_1st_level_mem) {
  117. res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
  118. (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
  119. res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
  120. (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
  121. res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
  122. (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
  123. res->cs = (adxl_nm_bitmap & BIT_NM_CS) ?
  124. (int)adxl_values[component_indices[INDEX_NM_CS]] : -1;
  125. } else {
  126. res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
  127. res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
  128. res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
  129. res->cs = (int)adxl_values[component_indices[INDEX_CS]];
  130. }
  131. if (res->imc > NUM_IMC - 1 || res->imc < 0) {
  132. skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
  133. return false;
  134. }
  135. list_for_each_entry(d, &dev_edac_list, list) {
  136. if (d->imc[0].src_id == res->socket) {
  137. res->dev = d;
  138. break;
  139. }
  140. }
  141. if (!res->dev) {
  142. skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
  143. res->socket, res->imc);
  144. return false;
  145. }
  146. for (i = 0; i < adxl_component_count; i++) {
  147. if (adxl_values[i] == ~0x0ull)
  148. continue;
  149. len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
  150. adxl_component_names[i], adxl_values[i]);
  151. if (MSG_SIZE - len <= 0)
  152. break;
  153. }
  154. res->decoded_by_adxl = true;
  155. return true;
  156. }
  157. void skx_set_mem_cfg(bool mem_cfg_2lm)
  158. {
  159. skx_mem_cfg_2lm = mem_cfg_2lm;
  160. }
  161. void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
  162. {
  163. driver_decode = decode;
  164. skx_show_retry_rd_err_log = show_retry_log;
  165. }
  166. int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
  167. {
  168. u32 reg;
  169. if (pci_read_config_dword(d->util_all, off, &reg)) {
  170. skx_printk(KERN_ERR, "Failed to read src id\n");
  171. return -ENODEV;
  172. }
  173. *id = GET_BITFIELD(reg, 12, 14);
  174. return 0;
  175. }
  176. int skx_get_node_id(struct skx_dev *d, u8 *id)
  177. {
  178. u32 reg;
  179. if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
  180. skx_printk(KERN_ERR, "Failed to read node id\n");
  181. return -ENODEV;
  182. }
  183. *id = GET_BITFIELD(reg, 0, 2);
  184. return 0;
  185. }
  186. static int get_width(u32 mtr)
  187. {
  188. switch (GET_BITFIELD(mtr, 8, 9)) {
  189. case 0:
  190. return DEV_X4;
  191. case 1:
  192. return DEV_X8;
  193. case 2:
  194. return DEV_X16;
  195. }
  196. return DEV_UNKNOWN;
  197. }
  198. /*
  199. * We use the per-socket device @cfg->did to count how many sockets are present,
  200. * and to detemine which PCI buses are associated with each socket. Allocate
  201. * and build the full list of all the skx_dev structures that we need here.
  202. */
  203. int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
  204. {
  205. struct pci_dev *pdev, *prev;
  206. struct skx_dev *d;
  207. u32 reg;
  208. int ndev = 0;
  209. prev = NULL;
  210. for (;;) {
  211. pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev);
  212. if (!pdev)
  213. break;
  214. ndev++;
  215. d = kzalloc(sizeof(*d), GFP_KERNEL);
  216. if (!d) {
  217. pci_dev_put(pdev);
  218. return -ENOMEM;
  219. }
  220. if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, &reg)) {
  221. kfree(d);
  222. pci_dev_put(pdev);
  223. skx_printk(KERN_ERR, "Failed to read bus idx\n");
  224. return -ENODEV;
  225. }
  226. d->bus[0] = GET_BITFIELD(reg, 0, 7);
  227. d->bus[1] = GET_BITFIELD(reg, 8, 15);
  228. if (cfg->type == SKX) {
  229. d->seg = pci_domain_nr(pdev->bus);
  230. d->bus[2] = GET_BITFIELD(reg, 16, 23);
  231. d->bus[3] = GET_BITFIELD(reg, 24, 31);
  232. } else {
  233. d->seg = GET_BITFIELD(reg, 16, 23);
  234. }
  235. edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
  236. d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
  237. list_add_tail(&d->list, &dev_edac_list);
  238. prev = pdev;
  239. }
  240. if (list)
  241. *list = &dev_edac_list;
  242. return ndev;
  243. }
  244. int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
  245. {
  246. struct pci_dev *pdev;
  247. u32 reg;
  248. pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL);
  249. if (!pdev) {
  250. edac_dbg(2, "Can't get tolm/tohm\n");
  251. return -ENODEV;
  252. }
  253. if (pci_read_config_dword(pdev, off[0], &reg)) {
  254. skx_printk(KERN_ERR, "Failed to read tolm\n");
  255. goto fail;
  256. }
  257. skx_tolm = reg;
  258. if (pci_read_config_dword(pdev, off[1], &reg)) {
  259. skx_printk(KERN_ERR, "Failed to read lower tohm\n");
  260. goto fail;
  261. }
  262. skx_tohm = reg;
  263. if (pci_read_config_dword(pdev, off[2], &reg)) {
  264. skx_printk(KERN_ERR, "Failed to read upper tohm\n");
  265. goto fail;
  266. }
  267. skx_tohm |= (u64)reg << 32;
  268. pci_dev_put(pdev);
  269. *tolm = skx_tolm;
  270. *tohm = skx_tohm;
  271. edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm);
  272. return 0;
  273. fail:
  274. pci_dev_put(pdev);
  275. return -ENODEV;
  276. }
  277. static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
  278. int minval, int maxval, const char *name)
  279. {
  280. u32 val = GET_BITFIELD(reg, lobit, hibit);
  281. if (val < minval || val > maxval) {
  282. edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
  283. return -EINVAL;
  284. }
  285. return val + add;
  286. }
  287. #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
  288. #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
  289. #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
  290. int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
  291. struct skx_imc *imc, int chan, int dimmno,
  292. struct res_config *cfg)
  293. {
  294. int banks, ranks, rows, cols, npages;
  295. enum mem_type mtype;
  296. u64 size;
  297. ranks = numrank(mtr);
  298. rows = numrow(mtr);
  299. cols = imc->hbm_mc ? 6 : numcol(mtr);
  300. if (imc->hbm_mc) {
  301. banks = 32;
  302. mtype = MEM_HBM2;
  303. } else if (cfg->support_ddr5 && (amap & 0x8)) {
  304. banks = 32;
  305. mtype = MEM_DDR5;
  306. } else {
  307. banks = 16;
  308. mtype = MEM_DDR4;
  309. }
  310. /*
  311. * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
  312. */
  313. size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
  314. npages = MiB_TO_PAGES(size);
  315. edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
  316. imc->mc, chan, dimmno, size, npages,
  317. banks, 1 << ranks, rows, cols);
  318. imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0);
  319. imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9);
  320. imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
  321. imc->chan[chan].dimms[dimmno].rowbits = rows;
  322. imc->chan[chan].dimms[dimmno].colbits = cols;
  323. dimm->nr_pages = npages;
  324. dimm->grain = 32;
  325. dimm->dtype = get_width(mtr);
  326. dimm->mtype = mtype;
  327. dimm->edac_mode = EDAC_SECDED; /* likely better than this */
  328. if (imc->hbm_mc)
  329. snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
  330. imc->src_id, imc->lmc, chan);
  331. else
  332. snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
  333. imc->src_id, imc->lmc, chan, dimmno);
  334. return 1;
  335. }
  336. int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
  337. int chan, int dimmno, const char *mod_str)
  338. {
  339. int smbios_handle;
  340. u32 dev_handle;
  341. u16 flags;
  342. u64 size = 0;
  343. dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
  344. imc->src_id, 0);
  345. smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
  346. if (smbios_handle == -EOPNOTSUPP) {
  347. pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str);
  348. goto unknown_size;
  349. }
  350. if (smbios_handle < 0) {
  351. skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
  352. goto unknown_size;
  353. }
  354. if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
  355. skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
  356. goto unknown_size;
  357. }
  358. size = dmi_memdev_size(smbios_handle);
  359. if (size == ~0ull)
  360. skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
  361. dev_handle, smbios_handle);
  362. unknown_size:
  363. dimm->nr_pages = size >> PAGE_SHIFT;
  364. dimm->grain = 32;
  365. dimm->dtype = DEV_UNKNOWN;
  366. dimm->mtype = MEM_NVDIMM;
  367. dimm->edac_mode = EDAC_SECDED; /* likely better than this */
  368. edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
  369. imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
  370. snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
  371. imc->src_id, imc->lmc, chan, dimmno);
  372. return (size == 0 || size == ~0ull) ? 0 : 1;
  373. }
  374. int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
  375. const char *ctl_name, const char *mod_str,
  376. get_dimm_config_f get_dimm_config,
  377. struct res_config *cfg)
  378. {
  379. struct mem_ctl_info *mci;
  380. struct edac_mc_layer layers[2];
  381. struct skx_pvt *pvt;
  382. int rc;
  383. /* Allocate a new MC control structure */
  384. layers[0].type = EDAC_MC_LAYER_CHANNEL;
  385. layers[0].size = NUM_CHANNELS;
  386. layers[0].is_virt_csrow = false;
  387. layers[1].type = EDAC_MC_LAYER_SLOT;
  388. layers[1].size = NUM_DIMMS;
  389. layers[1].is_virt_csrow = true;
  390. mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
  391. sizeof(struct skx_pvt));
  392. if (unlikely(!mci))
  393. return -ENOMEM;
  394. edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
  395. /* Associate skx_dev and mci for future usage */
  396. imc->mci = mci;
  397. pvt = mci->pvt_info;
  398. pvt->imc = imc;
  399. mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
  400. imc->node_id, imc->lmc);
  401. if (!mci->ctl_name) {
  402. rc = -ENOMEM;
  403. goto fail0;
  404. }
  405. mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
  406. if (cfg->support_ddr5)
  407. mci->mtype_cap |= MEM_FLAG_DDR5;
  408. mci->edac_ctl_cap = EDAC_FLAG_NONE;
  409. mci->edac_cap = EDAC_FLAG_NONE;
  410. mci->mod_name = mod_str;
  411. mci->dev_name = pci_name(pdev);
  412. mci->ctl_page_to_phys = NULL;
  413. rc = get_dimm_config(mci, cfg);
  414. if (rc < 0)
  415. goto fail;
  416. /* Record ptr to the generic device */
  417. mci->pdev = &pdev->dev;
  418. /* Add this new MC control structure to EDAC's list of MCs */
  419. if (unlikely(edac_mc_add_mc(mci))) {
  420. edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
  421. rc = -EINVAL;
  422. goto fail;
  423. }
  424. return 0;
  425. fail:
  426. kfree(mci->ctl_name);
  427. fail0:
  428. edac_mc_free(mci);
  429. imc->mci = NULL;
  430. return rc;
  431. }
  432. static void skx_unregister_mci(struct skx_imc *imc)
  433. {
  434. struct mem_ctl_info *mci = imc->mci;
  435. if (!mci)
  436. return;
  437. edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
  438. /* Remove MC sysfs nodes */
  439. edac_mc_del_mc(mci->pdev);
  440. edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
  441. kfree(mci->ctl_name);
  442. edac_mc_free(mci);
  443. }
  444. static void skx_mce_output_error(struct mem_ctl_info *mci,
  445. const struct mce *m,
  446. struct decoded_addr *res)
  447. {
  448. enum hw_event_mc_err_type tp_event;
  449. char *optype;
  450. bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
  451. bool overflow = GET_BITFIELD(m->status, 62, 62);
  452. bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
  453. bool scrub_err = false;
  454. bool recoverable;
  455. int len;
  456. u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
  457. u32 mscod = GET_BITFIELD(m->status, 16, 31);
  458. u32 errcode = GET_BITFIELD(m->status, 0, 15);
  459. u32 optypenum = GET_BITFIELD(m->status, 4, 6);
  460. recoverable = GET_BITFIELD(m->status, 56, 56);
  461. if (uncorrected_error) {
  462. core_err_cnt = 1;
  463. if (ripv) {
  464. tp_event = HW_EVENT_ERR_UNCORRECTED;
  465. } else {
  466. tp_event = HW_EVENT_ERR_FATAL;
  467. }
  468. } else {
  469. tp_event = HW_EVENT_ERR_CORRECTED;
  470. }
  471. /*
  472. * According to Intel Architecture spec vol 3B,
  473. * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
  474. * memory errors should fit one of these masks:
  475. * 000f 0000 1mmm cccc (binary)
  476. * 000f 0010 1mmm cccc (binary) [RAM used as cache]
  477. * where:
  478. * f = Correction Report Filtering Bit. If 1, subsequent errors
  479. * won't be shown
  480. * mmm = error type
  481. * cccc = channel
  482. * If the mask doesn't match, report an error to the parsing logic
  483. */
  484. if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
  485. optype = "Can't parse: it is not a mem";
  486. } else {
  487. switch (optypenum) {
  488. case 0:
  489. optype = "generic undef request error";
  490. break;
  491. case 1:
  492. optype = "memory read error";
  493. break;
  494. case 2:
  495. optype = "memory write error";
  496. break;
  497. case 3:
  498. optype = "addr/cmd error";
  499. break;
  500. case 4:
  501. optype = "memory scrubbing error";
  502. scrub_err = true;
  503. break;
  504. default:
  505. optype = "reserved";
  506. break;
  507. }
  508. }
  509. if (res->decoded_by_adxl) {
  510. len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
  511. overflow ? " OVERFLOW" : "",
  512. (uncorrected_error && recoverable) ? " recoverable" : "",
  513. mscod, errcode, adxl_msg);
  514. } else {
  515. len = snprintf(skx_msg, MSG_SIZE,
  516. "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
  517. overflow ? " OVERFLOW" : "",
  518. (uncorrected_error && recoverable) ? " recoverable" : "",
  519. mscod, errcode,
  520. res->socket, res->imc, res->rank,
  521. res->row, res->column, res->bank_address, res->bank_group);
  522. }
  523. if (skx_show_retry_rd_err_log)
  524. skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
  525. edac_dbg(0, "%s\n", skx_msg);
  526. /* Call the helper to output message */
  527. edac_mc_handle_error(tp_event, mci, core_err_cnt,
  528. m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
  529. res->channel, res->dimm, -1,
  530. optype, skx_msg);
  531. }
  532. static bool skx_error_in_1st_level_mem(const struct mce *m)
  533. {
  534. u32 errcode;
  535. if (!skx_mem_cfg_2lm)
  536. return false;
  537. errcode = GET_BITFIELD(m->status, 0, 15);
  538. if ((errcode & 0xef80) != 0x280)
  539. return false;
  540. return true;
  541. }
  542. int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
  543. void *data)
  544. {
  545. struct mce *mce = (struct mce *)data;
  546. struct decoded_addr res;
  547. struct mem_ctl_info *mci;
  548. char *type;
  549. if (mce->kflags & MCE_HANDLED_CEC)
  550. return NOTIFY_DONE;
  551. /* ignore unless this is memory related with an address */
  552. if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
  553. return NOTIFY_DONE;
  554. memset(&res, 0, sizeof(res));
  555. res.mce = mce;
  556. res.addr = mce->addr;
  557. /* Try driver decoder first */
  558. if (!(driver_decode && driver_decode(&res))) {
  559. /* Then try firmware decoder (ACPI DSM methods) */
  560. if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
  561. return NOTIFY_DONE;
  562. }
  563. mci = res.dev->imc[res.imc].mci;
  564. if (!mci)
  565. return NOTIFY_DONE;
  566. if (mce->mcgstatus & MCG_STATUS_MCIP)
  567. type = "Exception";
  568. else
  569. type = "Event";
  570. skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
  571. skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
  572. "Bank %d: 0x%llx\n", mce->extcpu, type,
  573. mce->mcgstatus, mce->bank, mce->status);
  574. skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
  575. skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
  576. skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
  577. skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
  578. "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
  579. mce->time, mce->socketid, mce->apicid);
  580. skx_mce_output_error(mci, mce, &res);
  581. mce->kflags |= MCE_HANDLED_EDAC;
  582. return NOTIFY_DONE;
  583. }
  584. void skx_remove(void)
  585. {
  586. int i, j;
  587. struct skx_dev *d, *tmp;
  588. edac_dbg(0, "\n");
  589. list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
  590. list_del(&d->list);
  591. for (i = 0; i < NUM_IMC; i++) {
  592. if (d->imc[i].mci)
  593. skx_unregister_mci(&d->imc[i]);
  594. if (d->imc[i].mdev)
  595. pci_dev_put(d->imc[i].mdev);
  596. if (d->imc[i].mbase)
  597. iounmap(d->imc[i].mbase);
  598. for (j = 0; j < NUM_CHANNELS; j++) {
  599. if (d->imc[i].chan[j].cdev)
  600. pci_dev_put(d->imc[i].chan[j].cdev);
  601. }
  602. }
  603. if (d->util_all)
  604. pci_dev_put(d->util_all);
  605. if (d->pcu_cr3)
  606. pci_dev_put(d->pcu_cr3);
  607. if (d->sad_all)
  608. pci_dev_put(d->sad_all);
  609. if (d->uracu)
  610. pci_dev_put(d->uracu);
  611. kfree(d);
  612. }
  613. }