pcie-hisi-error.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Driver for handling the PCIe controller errors on
  4. * HiSilicon HIP SoCs.
  5. *
  6. * Copyright (c) 2020 HiSilicon Limited.
  7. */
  8. #include <linux/acpi.h>
  9. #include <acpi/ghes.h>
  10. #include <linux/bitops.h>
  11. #include <linux/delay.h>
  12. #include <linux/pci.h>
  13. #include <linux/platform_device.h>
  14. #include <linux/kfifo.h>
  15. #include <linux/spinlock.h>
  16. /* HISI PCIe controller error definitions */
  17. #define HISI_PCIE_ERR_MISC_REGS 33
  18. #define HISI_PCIE_LOCAL_VALID_VERSION BIT(0)
  19. #define HISI_PCIE_LOCAL_VALID_SOC_ID BIT(1)
  20. #define HISI_PCIE_LOCAL_VALID_SOCKET_ID BIT(2)
  21. #define HISI_PCIE_LOCAL_VALID_NIMBUS_ID BIT(3)
  22. #define HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID BIT(4)
  23. #define HISI_PCIE_LOCAL_VALID_CORE_ID BIT(5)
  24. #define HISI_PCIE_LOCAL_VALID_PORT_ID BIT(6)
  25. #define HISI_PCIE_LOCAL_VALID_ERR_TYPE BIT(7)
  26. #define HISI_PCIE_LOCAL_VALID_ERR_SEVERITY BIT(8)
  27. #define HISI_PCIE_LOCAL_VALID_ERR_MISC 9
  28. static guid_t hisi_pcie_sec_guid =
  29. GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D,
  30. 0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72);
  31. /*
  32. * Firmware reports the socket port ID where the error occurred. These
  33. * macros convert that to the core ID and core port ID required by the
  34. * ACPI reset method.
  35. */
  36. #define HISI_PCIE_PORT_ID(core, v) (((v) >> 1) + ((core) << 3))
  37. #define HISI_PCIE_CORE_ID(v) ((v) >> 3)
  38. #define HISI_PCIE_CORE_PORT_ID(v) (((v) & 7) << 1)
  39. struct hisi_pcie_error_data {
  40. u64 val_bits;
  41. u8 version;
  42. u8 soc_id;
  43. u8 socket_id;
  44. u8 nimbus_id;
  45. u8 sub_module_id;
  46. u8 core_id;
  47. u8 port_id;
  48. u8 err_severity;
  49. u16 err_type;
  50. u8 reserv[2];
  51. u32 err_misc[HISI_PCIE_ERR_MISC_REGS];
  52. };
  53. struct hisi_pcie_error_private {
  54. struct notifier_block nb;
  55. struct device *dev;
  56. };
  57. enum hisi_pcie_submodule_id {
  58. HISI_PCIE_SUB_MODULE_ID_AP,
  59. HISI_PCIE_SUB_MODULE_ID_TL,
  60. HISI_PCIE_SUB_MODULE_ID_MAC,
  61. HISI_PCIE_SUB_MODULE_ID_DL,
  62. HISI_PCIE_SUB_MODULE_ID_SDI,
  63. };
  64. static const char * const hisi_pcie_sub_module[] = {
  65. [HISI_PCIE_SUB_MODULE_ID_AP] = "AP Layer",
  66. [HISI_PCIE_SUB_MODULE_ID_TL] = "TL Layer",
  67. [HISI_PCIE_SUB_MODULE_ID_MAC] = "MAC Layer",
  68. [HISI_PCIE_SUB_MODULE_ID_DL] = "DL Layer",
  69. [HISI_PCIE_SUB_MODULE_ID_SDI] = "SDI Layer",
  70. };
  71. enum hisi_pcie_err_severity {
  72. HISI_PCIE_ERR_SEV_RECOVERABLE,
  73. HISI_PCIE_ERR_SEV_FATAL,
  74. HISI_PCIE_ERR_SEV_CORRECTED,
  75. HISI_PCIE_ERR_SEV_NONE,
  76. };
  77. static const char * const hisi_pcie_error_sev[] = {
  78. [HISI_PCIE_ERR_SEV_RECOVERABLE] = "recoverable",
  79. [HISI_PCIE_ERR_SEV_FATAL] = "fatal",
  80. [HISI_PCIE_ERR_SEV_CORRECTED] = "corrected",
  81. [HISI_PCIE_ERR_SEV_NONE] = "none",
  82. };
  83. static const char *hisi_pcie_get_string(const char * const *array,
  84. size_t n, u32 id)
  85. {
  86. u32 index;
  87. for (index = 0; index < n; index++) {
  88. if (index == id && array[index])
  89. return array[index];
  90. }
  91. return "unknown";
  92. }
  93. static int hisi_pcie_port_reset(struct platform_device *pdev,
  94. u32 chip_id, u32 port_id)
  95. {
  96. struct device *dev = &pdev->dev;
  97. acpi_handle handle = ACPI_HANDLE(dev);
  98. union acpi_object arg[3];
  99. struct acpi_object_list arg_list;
  100. acpi_status s;
  101. unsigned long long data = 0;
  102. arg[0].type = ACPI_TYPE_INTEGER;
  103. arg[0].integer.value = chip_id;
  104. arg[1].type = ACPI_TYPE_INTEGER;
  105. arg[1].integer.value = HISI_PCIE_CORE_ID(port_id);
  106. arg[2].type = ACPI_TYPE_INTEGER;
  107. arg[2].integer.value = HISI_PCIE_CORE_PORT_ID(port_id);
  108. arg_list.count = 3;
  109. arg_list.pointer = arg;
  110. s = acpi_evaluate_integer(handle, "RST", &arg_list, &data);
  111. if (ACPI_FAILURE(s)) {
  112. dev_err(dev, "No RST method\n");
  113. return -EIO;
  114. }
  115. if (data) {
  116. dev_err(dev, "Failed to Reset\n");
  117. return -EIO;
  118. }
  119. return 0;
  120. }
  121. static int hisi_pcie_port_do_recovery(struct platform_device *dev,
  122. u32 chip_id, u32 port_id)
  123. {
  124. acpi_status s;
  125. struct device *device = &dev->dev;
  126. acpi_handle root_handle = ACPI_HANDLE(device);
  127. struct acpi_pci_root *pci_root;
  128. struct pci_bus *root_bus;
  129. struct pci_dev *pdev;
  130. u32 domain, busnr, devfn;
  131. s = acpi_get_parent(root_handle, &root_handle);
  132. if (ACPI_FAILURE(s))
  133. return -ENODEV;
  134. pci_root = acpi_pci_find_root(root_handle);
  135. if (!pci_root)
  136. return -ENODEV;
  137. root_bus = pci_root->bus;
  138. domain = pci_root->segment;
  139. busnr = root_bus->number;
  140. devfn = PCI_DEVFN(port_id, 0);
  141. pdev = pci_get_domain_bus_and_slot(domain, busnr, devfn);
  142. if (!pdev) {
  143. dev_info(device, "Fail to get root port %04x:%02x:%02x.%d device\n",
  144. domain, busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
  145. return -ENODEV;
  146. }
  147. pci_stop_and_remove_bus_device_locked(pdev);
  148. pci_dev_put(pdev);
  149. if (hisi_pcie_port_reset(dev, chip_id, port_id))
  150. return -EIO;
  151. /*
  152. * The initialization time of subordinate devices after
  153. * hot reset is no more than 1s, which is required by
  154. * the PCI spec v5.0 sec 6.6.1. The time will shorten
  155. * if Readiness Notifications mechanisms are used. But
  156. * wait 1s here to adapt any conditions.
  157. */
  158. ssleep(1UL);
  159. /* add root port and downstream devices */
  160. pci_lock_rescan_remove();
  161. pci_rescan_bus(root_bus);
  162. pci_unlock_rescan_remove();
  163. return 0;
  164. }
  165. static void hisi_pcie_handle_error(struct platform_device *pdev,
  166. const struct hisi_pcie_error_data *edata)
  167. {
  168. struct device *dev = &pdev->dev;
  169. int idx, rc;
  170. const unsigned long valid_bits[] = {BITMAP_FROM_U64(edata->val_bits)};
  171. if (edata->val_bits == 0) {
  172. dev_warn(dev, "%s: no valid error information\n", __func__);
  173. return;
  174. }
  175. dev_info(dev, "\nHISI : HIP : PCIe controller error\n");
  176. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOC_ID)
  177. dev_info(dev, "Table version = %d\n", edata->version);
  178. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOCKET_ID)
  179. dev_info(dev, "Socket ID = %d\n", edata->socket_id);
  180. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_NIMBUS_ID)
  181. dev_info(dev, "Nimbus ID = %d\n", edata->nimbus_id);
  182. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID)
  183. dev_info(dev, "Sub Module = %s\n",
  184. hisi_pcie_get_string(hisi_pcie_sub_module,
  185. ARRAY_SIZE(hisi_pcie_sub_module),
  186. edata->sub_module_id));
  187. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_CORE_ID)
  188. dev_info(dev, "Core ID = core%d\n", edata->core_id);
  189. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_PORT_ID)
  190. dev_info(dev, "Port ID = port%d\n", edata->port_id);
  191. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_SEVERITY)
  192. dev_info(dev, "Error severity = %s\n",
  193. hisi_pcie_get_string(hisi_pcie_error_sev,
  194. ARRAY_SIZE(hisi_pcie_error_sev),
  195. edata->err_severity));
  196. if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_TYPE)
  197. dev_info(dev, "Error type = 0x%x\n", edata->err_type);
  198. dev_info(dev, "Reg Dump:\n");
  199. idx = HISI_PCIE_LOCAL_VALID_ERR_MISC;
  200. for_each_set_bit_from(idx, valid_bits,
  201. HISI_PCIE_LOCAL_VALID_ERR_MISC + HISI_PCIE_ERR_MISC_REGS)
  202. dev_info(dev, "ERR_MISC_%d = 0x%x\n", idx - HISI_PCIE_LOCAL_VALID_ERR_MISC,
  203. edata->err_misc[idx - HISI_PCIE_LOCAL_VALID_ERR_MISC]);
  204. if (edata->err_severity != HISI_PCIE_ERR_SEV_RECOVERABLE)
  205. return;
  206. /* Recovery for the PCIe controller errors, try reset
  207. * PCI port for the error recovery
  208. */
  209. rc = hisi_pcie_port_do_recovery(pdev, edata->socket_id,
  210. HISI_PCIE_PORT_ID(edata->core_id, edata->port_id));
  211. if (rc)
  212. dev_info(dev, "fail to do hisi pcie port reset\n");
  213. }
  214. static int hisi_pcie_notify_error(struct notifier_block *nb,
  215. unsigned long event, void *data)
  216. {
  217. struct acpi_hest_generic_data *gdata = data;
  218. const struct hisi_pcie_error_data *error_data = acpi_hest_get_payload(gdata);
  219. struct hisi_pcie_error_private *priv;
  220. struct device *dev;
  221. struct platform_device *pdev;
  222. guid_t err_sec_guid;
  223. u8 socket;
  224. import_guid(&err_sec_guid, gdata->section_type);
  225. if (!guid_equal(&err_sec_guid, &hisi_pcie_sec_guid))
  226. return NOTIFY_DONE;
  227. priv = container_of(nb, struct hisi_pcie_error_private, nb);
  228. dev = priv->dev;
  229. if (device_property_read_u8(dev, "socket", &socket))
  230. return NOTIFY_DONE;
  231. if (error_data->socket_id != socket)
  232. return NOTIFY_DONE;
  233. pdev = container_of(dev, struct platform_device, dev);
  234. hisi_pcie_handle_error(pdev, error_data);
  235. return NOTIFY_OK;
  236. }
  237. static int hisi_pcie_error_handler_probe(struct platform_device *pdev)
  238. {
  239. struct hisi_pcie_error_private *priv;
  240. int ret;
  241. priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
  242. if (!priv)
  243. return -ENOMEM;
  244. priv->nb.notifier_call = hisi_pcie_notify_error;
  245. priv->dev = &pdev->dev;
  246. ret = ghes_register_vendor_record_notifier(&priv->nb);
  247. if (ret) {
  248. dev_err(&pdev->dev,
  249. "Failed to register hisi pcie controller error handler with apei\n");
  250. return ret;
  251. }
  252. platform_set_drvdata(pdev, priv);
  253. return 0;
  254. }
  255. static int hisi_pcie_error_handler_remove(struct platform_device *pdev)
  256. {
  257. struct hisi_pcie_error_private *priv = platform_get_drvdata(pdev);
  258. ghes_unregister_vendor_record_notifier(&priv->nb);
  259. return 0;
  260. }
  261. static const struct acpi_device_id hisi_pcie_acpi_match[] = {
  262. { "HISI0361", 0 },
  263. { }
  264. };
  265. static struct platform_driver hisi_pcie_error_handler_driver = {
  266. .driver = {
  267. .name = "hisi-pcie-error-handler",
  268. .acpi_match_table = hisi_pcie_acpi_match,
  269. },
  270. .probe = hisi_pcie_error_handler_probe,
  271. .remove = hisi_pcie_error_handler_remove,
  272. };
  273. module_platform_driver(hisi_pcie_error_handler_driver);
  274. MODULE_DESCRIPTION("HiSilicon HIP PCIe controller error handling driver");
  275. MODULE_LICENSE("GPL v2");