i10nm_base.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Driver for Intel(R) 10nm server memory controller.
  4. * Copyright (c) 2019, Intel Corporation.
  5. *
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/io.h>
  9. #include <asm/cpu_device_id.h>
  10. #include <asm/intel-family.h>
  11. #include <asm/mce.h>
  12. #include "edac_module.h"
  13. #include "skx_common.h"
  14. #define I10NM_REVISION "v0.0.5"
  15. #define EDAC_MOD_STR "i10nm_edac"
  16. /* Debug macros */
  17. #define i10nm_printk(level, fmt, arg...) \
  18. edac_printk(level, "i10nm", fmt, ##arg)
  19. #define I10NM_GET_SCK_BAR(d, reg) \
  20. pci_read_config_dword((d)->uracu, 0xd0, &(reg))
  21. #define I10NM_GET_IMC_BAR(d, i, reg) \
  22. pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
  23. #define I10NM_GET_SAD(d, offset, i, reg)\
  24. pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
  25. #define I10NM_GET_HBM_IMC_BAR(d, reg) \
  26. pci_read_config_dword((d)->uracu, 0xd4, &(reg))
  27. #define I10NM_GET_CAPID3_CFG(d, reg) \
  28. pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
  29. #define I10NM_GET_DIMMMTR(m, i, j) \
  30. readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
  31. (i) * (m)->chan_mmio_sz + (j) * 4)
  32. #define I10NM_GET_MCDDRTCFG(m, i) \
  33. readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
  34. (i) * (m)->chan_mmio_sz)
  35. #define I10NM_GET_MCMTR(m, i) \
  36. readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
  37. (i) * (m)->chan_mmio_sz)
  38. #define I10NM_GET_AMAP(m, i) \
  39. readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
  40. (i) * (m)->chan_mmio_sz)
  41. #define I10NM_GET_REG32(m, i, offset) \
  42. readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
  43. #define I10NM_GET_REG64(m, i, offset) \
  44. readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
  45. #define I10NM_SET_REG32(m, i, offset, v) \
  46. writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
  47. #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
  48. #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
  49. #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
  50. GET_BITFIELD(reg, 0, 10) + 1) << 12)
  51. #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
  52. ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
  53. #define I10NM_HBM_IMC_MMIO_SIZE 0x9000
  54. #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
  55. #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
  56. #define I10NM_MAX_SAD 16
  57. #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
  58. #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
  59. #define RETRY_RD_ERR_LOG_UC BIT(1)
  60. #define RETRY_RD_ERR_LOG_NOOVER BIT(14)
  61. #define RETRY_RD_ERR_LOG_EN BIT(15)
  62. #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
  63. #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
  64. static struct list_head *i10nm_edac_list;
  65. static struct res_config *res_cfg;
  66. static int retry_rd_err_log;
  67. static int decoding_via_mca;
  68. static bool mem_cfg_2lm;
  69. static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
  70. static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
  71. static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
  72. static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
  73. static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
  74. static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
  75. static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
  76. static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
  77. static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
  78. static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
  79. u32 *offsets_scrub, u32 *offsets_demand,
  80. u32 *offsets_demand2)
  81. {
  82. u32 s, d, d2;
  83. s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
  84. d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
  85. if (offsets_demand2)
  86. d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
  87. if (enable) {
  88. /* Save default configurations */
  89. imc->chan[chan].retry_rd_err_log_s = s;
  90. imc->chan[chan].retry_rd_err_log_d = d;
  91. if (offsets_demand2)
  92. imc->chan[chan].retry_rd_err_log_d2 = d2;
  93. s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
  94. s |= RETRY_RD_ERR_LOG_EN;
  95. d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
  96. d |= RETRY_RD_ERR_LOG_EN;
  97. if (offsets_demand2) {
  98. d2 &= ~RETRY_RD_ERR_LOG_UC;
  99. d2 |= RETRY_RD_ERR_LOG_NOOVER;
  100. d2 |= RETRY_RD_ERR_LOG_EN;
  101. }
  102. } else {
  103. /* Restore default configurations */
  104. if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
  105. s |= RETRY_RD_ERR_LOG_UC;
  106. if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
  107. s |= RETRY_RD_ERR_LOG_NOOVER;
  108. if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
  109. s &= ~RETRY_RD_ERR_LOG_EN;
  110. if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
  111. d |= RETRY_RD_ERR_LOG_UC;
  112. if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
  113. d |= RETRY_RD_ERR_LOG_NOOVER;
  114. if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
  115. d &= ~RETRY_RD_ERR_LOG_EN;
  116. if (offsets_demand2) {
  117. if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
  118. d2 |= RETRY_RD_ERR_LOG_UC;
  119. if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
  120. d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
  121. if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
  122. d2 &= ~RETRY_RD_ERR_LOG_EN;
  123. }
  124. }
  125. I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
  126. I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
  127. if (offsets_demand2)
  128. I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
  129. }
  130. static void enable_retry_rd_err_log(bool enable)
  131. {
  132. struct skx_imc *imc;
  133. struct skx_dev *d;
  134. int i, j;
  135. edac_dbg(2, "\n");
  136. list_for_each_entry(d, i10nm_edac_list, list)
  137. for (i = 0; i < I10NM_NUM_IMC; i++) {
  138. imc = &d->imc[i];
  139. if (!imc->mbase)
  140. continue;
  141. for (j = 0; j < I10NM_NUM_CHANNELS; j++) {
  142. if (imc->hbm_mc) {
  143. __enable_retry_rd_err_log(imc, j, enable,
  144. res_cfg->offsets_scrub_hbm0,
  145. res_cfg->offsets_demand_hbm0,
  146. NULL);
  147. __enable_retry_rd_err_log(imc, j, enable,
  148. res_cfg->offsets_scrub_hbm1,
  149. res_cfg->offsets_demand_hbm1,
  150. NULL);
  151. } else {
  152. __enable_retry_rd_err_log(imc, j, enable,
  153. res_cfg->offsets_scrub,
  154. res_cfg->offsets_demand,
  155. res_cfg->offsets_demand2);
  156. }
  157. }
  158. }
  159. }
  160. static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
  161. int len, bool scrub_err)
  162. {
  163. struct skx_imc *imc = &res->dev->imc[res->imc];
  164. u32 log0, log1, log2, log3, log4;
  165. u32 corr0, corr1, corr2, corr3;
  166. u32 lxg0, lxg1, lxg3, lxg4;
  167. u32 *xffsets = NULL;
  168. u64 log2a, log5;
  169. u64 lxg2a, lxg5;
  170. u32 *offsets;
  171. int n, pch;
  172. if (!imc->mbase)
  173. return;
  174. if (imc->hbm_mc) {
  175. pch = res->cs & 1;
  176. if (pch)
  177. offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
  178. res_cfg->offsets_demand_hbm1;
  179. else
  180. offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
  181. res_cfg->offsets_demand_hbm0;
  182. } else {
  183. if (scrub_err) {
  184. offsets = res_cfg->offsets_scrub;
  185. } else {
  186. offsets = res_cfg->offsets_demand;
  187. xffsets = res_cfg->offsets_demand2;
  188. }
  189. }
  190. log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
  191. log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
  192. log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
  193. log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
  194. log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
  195. if (xffsets) {
  196. lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
  197. lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
  198. lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
  199. lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
  200. lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
  201. }
  202. if (res_cfg->type == SPR) {
  203. log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
  204. n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
  205. log0, log1, log2a, log3, log4, log5);
  206. if (len - n > 0) {
  207. if (xffsets) {
  208. lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
  209. n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
  210. lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
  211. } else {
  212. n += snprintf(msg + n, len - n, "]");
  213. }
  214. }
  215. } else {
  216. log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
  217. n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
  218. log0, log1, log2, log3, log4, log5);
  219. }
  220. if (imc->hbm_mc) {
  221. if (pch) {
  222. corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
  223. corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
  224. corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
  225. corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
  226. } else {
  227. corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
  228. corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
  229. corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
  230. corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
  231. }
  232. } else {
  233. corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
  234. corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
  235. corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
  236. corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
  237. }
  238. if (len - n > 0)
  239. snprintf(msg + n, len - n,
  240. " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
  241. corr0 & 0xffff, corr0 >> 16,
  242. corr1 & 0xffff, corr1 >> 16,
  243. corr2 & 0xffff, corr2 >> 16,
  244. corr3 & 0xffff, corr3 >> 16);
  245. /* Clear status bits */
  246. if (retry_rd_err_log == 2) {
  247. if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
  248. log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
  249. I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
  250. }
  251. if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
  252. lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
  253. I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
  254. }
  255. }
  256. }
  257. static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
  258. unsigned int dev, unsigned int fun)
  259. {
  260. struct pci_dev *pdev;
  261. pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
  262. if (!pdev) {
  263. edac_dbg(2, "No device %02x:%02x.%x\n",
  264. bus, dev, fun);
  265. return NULL;
  266. }
  267. if (unlikely(pci_enable_device(pdev) < 0)) {
  268. edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
  269. bus, dev, fun);
  270. pci_dev_put(pdev);
  271. return NULL;
  272. }
  273. return pdev;
  274. }
  275. static bool i10nm_check_2lm(struct res_config *cfg)
  276. {
  277. struct skx_dev *d;
  278. u32 reg;
  279. int i;
  280. list_for_each_entry(d, i10nm_edac_list, list) {
  281. d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
  282. PCI_SLOT(cfg->sad_all_devfn),
  283. PCI_FUNC(cfg->sad_all_devfn));
  284. if (!d->sad_all)
  285. continue;
  286. for (i = 0; i < I10NM_MAX_SAD; i++) {
  287. I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
  288. if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
  289. edac_dbg(2, "2-level memory configuration.\n");
  290. return true;
  291. }
  292. }
  293. }
  294. return false;
  295. }
  296. /*
  297. * Check whether the error comes from DDRT by ICX/Tremont model specific error code.
  298. * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS.
  299. */
  300. static bool i10nm_mscod_is_ddrt(u32 mscod)
  301. {
  302. switch (mscod) {
  303. case 0x0106: case 0x0107:
  304. case 0x0800: case 0x0804:
  305. case 0x0806 ... 0x0808:
  306. case 0x080a ... 0x080e:
  307. case 0x0810: case 0x0811:
  308. case 0x0816: case 0x081e:
  309. case 0x081f:
  310. return true;
  311. }
  312. return false;
  313. }
  314. static bool i10nm_mc_decode_available(struct mce *mce)
  315. {
  316. u8 bank;
  317. if (!decoding_via_mca || mem_cfg_2lm)
  318. return false;
  319. if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
  320. != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
  321. return false;
  322. bank = mce->bank;
  323. switch (res_cfg->type) {
  324. case I10NM:
  325. if (bank < 13 || bank > 26)
  326. return false;
  327. /* DDRT errors can't be decoded from MCA bank registers */
  328. if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
  329. return false;
  330. if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
  331. return false;
  332. /* Check whether one of {13,14,17,18,21,22,25,26} */
  333. return ((bank - 13) & BIT(1)) == 0;
  334. default:
  335. return false;
  336. }
  337. }
  338. static bool i10nm_mc_decode(struct decoded_addr *res)
  339. {
  340. struct mce *m = res->mce;
  341. struct skx_dev *d;
  342. u8 bank;
  343. if (!i10nm_mc_decode_available(m))
  344. return false;
  345. list_for_each_entry(d, i10nm_edac_list, list) {
  346. if (d->imc[0].src_id == m->socketid) {
  347. res->socket = m->socketid;
  348. res->dev = d;
  349. break;
  350. }
  351. }
  352. switch (res_cfg->type) {
  353. case I10NM:
  354. bank = m->bank - 13;
  355. res->imc = bank / 4;
  356. res->channel = bank % 2;
  357. break;
  358. default:
  359. return false;
  360. }
  361. if (!res->dev) {
  362. skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
  363. m->socketid, res->imc);
  364. return false;
  365. }
  366. res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
  367. res->row = GET_BITFIELD(m->misc, 19, 39);
  368. res->bank_group = GET_BITFIELD(m->misc, 40, 41);
  369. res->bank_address = GET_BITFIELD(m->misc, 42, 43);
  370. res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
  371. res->rank = GET_BITFIELD(m->misc, 56, 58);
  372. res->dimm = res->rank >> 2;
  373. res->rank = res->rank % 4;
  374. return true;
  375. }
  376. static int i10nm_get_ddr_munits(void)
  377. {
  378. struct pci_dev *mdev;
  379. void __iomem *mbase;
  380. unsigned long size;
  381. struct skx_dev *d;
  382. int i, j = 0;
  383. u32 reg, off;
  384. u64 base;
  385. list_for_each_entry(d, i10nm_edac_list, list) {
  386. d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
  387. if (!d->util_all)
  388. return -ENODEV;
  389. d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
  390. if (!d->uracu)
  391. return -ENODEV;
  392. if (I10NM_GET_SCK_BAR(d, reg)) {
  393. i10nm_printk(KERN_ERR, "Failed to socket bar\n");
  394. return -ENODEV;
  395. }
  396. base = I10NM_GET_SCK_MMIO_BASE(reg);
  397. edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
  398. j++, base, reg);
  399. for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
  400. mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
  401. 12 + i, 0);
  402. if (i == 0 && !mdev) {
  403. i10nm_printk(KERN_ERR, "No IMC found\n");
  404. return -ENODEV;
  405. }
  406. if (!mdev)
  407. continue;
  408. d->imc[i].mdev = mdev;
  409. if (I10NM_GET_IMC_BAR(d, i, reg)) {
  410. i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
  411. return -ENODEV;
  412. }
  413. off = I10NM_GET_IMC_MMIO_OFFSET(reg);
  414. size = I10NM_GET_IMC_MMIO_SIZE(reg);
  415. edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
  416. i, base + off, size, reg);
  417. mbase = ioremap(base + off, size);
  418. if (!mbase) {
  419. i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
  420. base + off);
  421. return -ENODEV;
  422. }
  423. d->imc[i].mbase = mbase;
  424. }
  425. }
  426. return 0;
  427. }
  428. static bool i10nm_check_hbm_imc(struct skx_dev *d)
  429. {
  430. u32 reg;
  431. if (I10NM_GET_CAPID3_CFG(d, reg)) {
  432. i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
  433. return false;
  434. }
  435. return I10NM_IS_HBM_PRESENT(reg) != 0;
  436. }
  437. static int i10nm_get_hbm_munits(void)
  438. {
  439. struct pci_dev *mdev;
  440. void __iomem *mbase;
  441. u32 reg, off, mcmtr;
  442. struct skx_dev *d;
  443. int i, lmc;
  444. u64 base;
  445. list_for_each_entry(d, i10nm_edac_list, list) {
  446. d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
  447. if (!d->pcu_cr3)
  448. return -ENODEV;
  449. if (!i10nm_check_hbm_imc(d)) {
  450. i10nm_printk(KERN_DEBUG, "No hbm memory\n");
  451. return -ENODEV;
  452. }
  453. if (I10NM_GET_SCK_BAR(d, reg)) {
  454. i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
  455. return -ENODEV;
  456. }
  457. base = I10NM_GET_SCK_MMIO_BASE(reg);
  458. if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
  459. i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
  460. return -ENODEV;
  461. }
  462. base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
  463. lmc = I10NM_NUM_DDR_IMC;
  464. for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
  465. mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
  466. 12 + i / 4, 1 + i % 4);
  467. if (i == 0 && !mdev) {
  468. i10nm_printk(KERN_ERR, "No hbm mc found\n");
  469. return -ENODEV;
  470. }
  471. if (!mdev)
  472. continue;
  473. d->imc[lmc].mdev = mdev;
  474. off = i * I10NM_HBM_IMC_MMIO_SIZE;
  475. edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
  476. lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
  477. mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
  478. if (!mbase) {
  479. pci_dev_put(d->imc[lmc].mdev);
  480. d->imc[lmc].mdev = NULL;
  481. i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
  482. base + off);
  483. return -ENOMEM;
  484. }
  485. d->imc[lmc].mbase = mbase;
  486. d->imc[lmc].hbm_mc = true;
  487. mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
  488. if (!I10NM_IS_HBM_IMC(mcmtr)) {
  489. iounmap(d->imc[lmc].mbase);
  490. d->imc[lmc].mbase = NULL;
  491. d->imc[lmc].hbm_mc = false;
  492. pci_dev_put(d->imc[lmc].mdev);
  493. d->imc[lmc].mdev = NULL;
  494. i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
  495. return -ENODEV;
  496. }
  497. lmc++;
  498. }
  499. }
  500. return 0;
  501. }
  502. static struct res_config i10nm_cfg0 = {
  503. .type = I10NM,
  504. .decs_did = 0x3452,
  505. .busno_cfg_offset = 0xcc,
  506. .ddr_chan_mmio_sz = 0x4000,
  507. .sad_all_devfn = PCI_DEVFN(29, 0),
  508. .sad_all_offset = 0x108,
  509. .offsets_scrub = offsets_scrub_icx,
  510. .offsets_demand = offsets_demand_icx,
  511. };
  512. static struct res_config i10nm_cfg1 = {
  513. .type = I10NM,
  514. .decs_did = 0x3452,
  515. .busno_cfg_offset = 0xd0,
  516. .ddr_chan_mmio_sz = 0x4000,
  517. .sad_all_devfn = PCI_DEVFN(29, 0),
  518. .sad_all_offset = 0x108,
  519. .offsets_scrub = offsets_scrub_icx,
  520. .offsets_demand = offsets_demand_icx,
  521. };
  522. static struct res_config spr_cfg = {
  523. .type = SPR,
  524. .decs_did = 0x3252,
  525. .busno_cfg_offset = 0xd0,
  526. .ddr_chan_mmio_sz = 0x8000,
  527. .hbm_chan_mmio_sz = 0x4000,
  528. .support_ddr5 = true,
  529. .sad_all_devfn = PCI_DEVFN(10, 0),
  530. .sad_all_offset = 0x300,
  531. .offsets_scrub = offsets_scrub_spr,
  532. .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
  533. .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
  534. .offsets_demand = offsets_demand_spr,
  535. .offsets_demand2 = offsets_demand2_spr,
  536. .offsets_demand_hbm0 = offsets_demand_spr_hbm0,
  537. .offsets_demand_hbm1 = offsets_demand_spr_hbm1,
  538. };
  539. static const struct x86_cpu_id i10nm_cpuids[] = {
  540. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
  541. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
  542. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
  543. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
  544. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
  545. X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
  546. {}
  547. };
  548. MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
  549. static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
  550. {
  551. u32 mcmtr;
  552. mcmtr = I10NM_GET_MCMTR(imc, chan);
  553. edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
  554. return !!GET_BITFIELD(mcmtr, 2, 2);
  555. }
  556. static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
  557. struct res_config *cfg)
  558. {
  559. struct skx_pvt *pvt = mci->pvt_info;
  560. struct skx_imc *imc = pvt->imc;
  561. u32 mtr, amap, mcddrtcfg;
  562. struct dimm_info *dimm;
  563. int i, j, ndimms;
  564. for (i = 0; i < imc->num_channels; i++) {
  565. if (!imc->mbase)
  566. continue;
  567. ndimms = 0;
  568. amap = I10NM_GET_AMAP(imc, i);
  569. mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
  570. for (j = 0; j < imc->num_dimms; j++) {
  571. dimm = edac_get_dimm(mci, i, j, 0);
  572. mtr = I10NM_GET_DIMMMTR(imc, i, j);
  573. edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
  574. mtr, mcddrtcfg, imc->mc, i, j);
  575. if (IS_DIMM_PRESENT(mtr))
  576. ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
  577. imc, i, j, cfg);
  578. else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
  579. ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
  580. EDAC_MOD_STR);
  581. }
  582. if (ndimms && !i10nm_check_ecc(imc, i)) {
  583. i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n",
  584. imc->mc, i);
  585. return -ENODEV;
  586. }
  587. }
  588. return 0;
  589. }
  590. static struct notifier_block i10nm_mce_dec = {
  591. .notifier_call = skx_mce_check_error,
  592. .priority = MCE_PRIO_EDAC,
  593. };
  594. #ifdef CONFIG_EDAC_DEBUG
  595. /*
  596. * Debug feature.
  597. * Exercise the address decode logic by writing an address to
  598. * /sys/kernel/debug/edac/i10nm_test/addr.
  599. */
  600. static struct dentry *i10nm_test;
  601. static int debugfs_u64_set(void *data, u64 val)
  602. {
  603. struct mce m;
  604. pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
  605. memset(&m, 0, sizeof(m));
  606. /* ADDRV + MemRd + Unknown channel */
  607. m.status = MCI_STATUS_ADDRV + 0x90;
  608. /* One corrected error */
  609. m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
  610. m.addr = val;
  611. skx_mce_check_error(NULL, 0, &m);
  612. return 0;
  613. }
  614. DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
  615. static void setup_i10nm_debug(void)
  616. {
  617. i10nm_test = edac_debugfs_create_dir("i10nm_test");
  618. if (!i10nm_test)
  619. return;
  620. if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
  621. NULL, &fops_u64_wo)) {
  622. debugfs_remove(i10nm_test);
  623. i10nm_test = NULL;
  624. }
  625. }
  626. static void teardown_i10nm_debug(void)
  627. {
  628. debugfs_remove_recursive(i10nm_test);
  629. }
  630. #else
  631. static inline void setup_i10nm_debug(void) {}
  632. static inline void teardown_i10nm_debug(void) {}
  633. #endif /*CONFIG_EDAC_DEBUG*/
  634. static int __init i10nm_init(void)
  635. {
  636. u8 mc = 0, src_id = 0, node_id = 0;
  637. const struct x86_cpu_id *id;
  638. struct res_config *cfg;
  639. const char *owner;
  640. struct skx_dev *d;
  641. int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
  642. u64 tolm, tohm;
  643. edac_dbg(2, "\n");
  644. owner = edac_get_owner();
  645. if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
  646. return -EBUSY;
  647. if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
  648. return -ENODEV;
  649. id = x86_match_cpu(i10nm_cpuids);
  650. if (!id)
  651. return -ENODEV;
  652. cfg = (struct res_config *)id->driver_data;
  653. res_cfg = cfg;
  654. rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
  655. if (rc)
  656. return rc;
  657. rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list);
  658. if (rc < 0)
  659. goto fail;
  660. if (rc == 0) {
  661. i10nm_printk(KERN_ERR, "No memory controllers found\n");
  662. return -ENODEV;
  663. }
  664. mem_cfg_2lm = i10nm_check_2lm(cfg);
  665. skx_set_mem_cfg(mem_cfg_2lm);
  666. rc = i10nm_get_ddr_munits();
  667. if (i10nm_get_hbm_munits() && rc)
  668. goto fail;
  669. list_for_each_entry(d, i10nm_edac_list, list) {
  670. rc = skx_get_src_id(d, 0xf8, &src_id);
  671. if (rc < 0)
  672. goto fail;
  673. rc = skx_get_node_id(d, &node_id);
  674. if (rc < 0)
  675. goto fail;
  676. edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
  677. for (i = 0; i < I10NM_NUM_IMC; i++) {
  678. if (!d->imc[i].mdev)
  679. continue;
  680. d->imc[i].mc = mc++;
  681. d->imc[i].lmc = i;
  682. d->imc[i].src_id = src_id;
  683. d->imc[i].node_id = node_id;
  684. if (d->imc[i].hbm_mc) {
  685. d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
  686. d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
  687. d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
  688. } else {
  689. d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
  690. d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
  691. d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
  692. }
  693. rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
  694. "Intel_10nm Socket", EDAC_MOD_STR,
  695. i10nm_get_dimm_config, cfg);
  696. if (rc < 0)
  697. goto fail;
  698. }
  699. }
  700. rc = skx_adxl_get();
  701. if (rc)
  702. goto fail;
  703. opstate_init();
  704. mce_register_decode_chain(&i10nm_mce_dec);
  705. setup_i10nm_debug();
  706. if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
  707. skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
  708. if (retry_rd_err_log == 2)
  709. enable_retry_rd_err_log(true);
  710. } else {
  711. skx_set_decode(i10nm_mc_decode, NULL);
  712. }
  713. i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
  714. return 0;
  715. fail:
  716. skx_remove();
  717. return rc;
  718. }
  719. static void __exit i10nm_exit(void)
  720. {
  721. edac_dbg(2, "\n");
  722. if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
  723. skx_set_decode(NULL, NULL);
  724. if (retry_rd_err_log == 2)
  725. enable_retry_rd_err_log(false);
  726. }
  727. teardown_i10nm_debug();
  728. mce_unregister_decode_chain(&i10nm_mce_dec);
  729. skx_adxl_put();
  730. skx_remove();
  731. }
  732. module_init(i10nm_init);
  733. module_exit(i10nm_exit);
  734. static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
  735. {
  736. unsigned long val;
  737. int ret;
  738. ret = kstrtoul(buf, 0, &val);
  739. if (ret || val > 1)
  740. return -EINVAL;
  741. if (val && mem_cfg_2lm) {
  742. i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
  743. return -EIO;
  744. }
  745. ret = param_set_int(buf, kp);
  746. return ret;
  747. }
  748. static const struct kernel_param_ops decoding_via_mca_param_ops = {
  749. .set = set_decoding_via_mca,
  750. .get = param_get_int,
  751. };
  752. module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
  753. MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
  754. module_param(retry_rd_err_log, int, 0444);
  755. MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
  756. MODULE_LICENSE("GPL v2");
  757. MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");