pci_event.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corp. 2012
  4. *
  5. * Author(s):
  6. * Jan Glauber <[email protected]>
  7. */
  8. #define KMSG_COMPONENT "zpci"
  9. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  10. #include <linux/kernel.h>
  11. #include <linux/pci.h>
  12. #include <asm/pci_debug.h>
  13. #include <asm/pci_dma.h>
  14. #include <asm/sclp.h>
  15. #include "pci_bus.h"
  16. /* Content Code Description for PCI Function Error */
  17. struct zpci_ccdf_err {
  18. u32 reserved1;
  19. u32 fh; /* function handle */
  20. u32 fid; /* function id */
  21. u32 ett : 4; /* expected table type */
  22. u32 mvn : 12; /* MSI vector number */
  23. u32 dmaas : 8; /* DMA address space */
  24. u32 : 6;
  25. u32 q : 1; /* event qualifier */
  26. u32 rw : 1; /* read/write */
  27. u64 faddr; /* failing address */
  28. u32 reserved3;
  29. u16 reserved4;
  30. u16 pec; /* PCI event code */
  31. } __packed;
  32. /* Content Code Description for PCI Function Availability */
  33. struct zpci_ccdf_avail {
  34. u32 reserved1;
  35. u32 fh; /* function handle */
  36. u32 fid; /* function id */
  37. u32 reserved2;
  38. u32 reserved3;
  39. u32 reserved4;
  40. u32 reserved5;
  41. u16 reserved6;
  42. u16 pec; /* PCI event code */
  43. } __packed;
  44. static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
  45. {
  46. switch (ers_res) {
  47. case PCI_ERS_RESULT_CAN_RECOVER:
  48. case PCI_ERS_RESULT_RECOVERED:
  49. case PCI_ERS_RESULT_NEED_RESET:
  50. return false;
  51. default:
  52. return true;
  53. }
  54. }
  55. static bool is_passed_through(struct zpci_dev *zdev)
  56. {
  57. return zdev->s390_domain;
  58. }
  59. static bool is_driver_supported(struct pci_driver *driver)
  60. {
  61. if (!driver || !driver->err_handler)
  62. return false;
  63. if (!driver->err_handler->error_detected)
  64. return false;
  65. if (!driver->err_handler->slot_reset)
  66. return false;
  67. if (!driver->err_handler->resume)
  68. return false;
  69. return true;
  70. }
  71. static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
  72. struct pci_driver *driver)
  73. {
  74. pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
  75. ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
  76. if (ers_result_indicates_abort(ers_res))
  77. pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
  78. else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
  79. pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
  80. return ers_res;
  81. }
  82. static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
  83. struct pci_driver *driver)
  84. {
  85. pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
  86. struct zpci_dev *zdev = to_zpci(pdev);
  87. int rc;
  88. pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
  89. rc = zpci_reset_load_store_blocked(zdev);
  90. if (rc) {
  91. pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
  92. /* Let's try a full reset instead */
  93. return PCI_ERS_RESULT_NEED_RESET;
  94. }
  95. if (driver->err_handler->mmio_enabled) {
  96. ers_res = driver->err_handler->mmio_enabled(pdev);
  97. if (ers_result_indicates_abort(ers_res)) {
  98. pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
  99. pci_name(pdev));
  100. return ers_res;
  101. } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
  102. pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
  103. return ers_res;
  104. }
  105. }
  106. pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
  107. rc = zpci_clear_error_state(zdev);
  108. if (!rc) {
  109. pdev->error_state = pci_channel_io_normal;
  110. } else {
  111. pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
  112. /* Let's try a full reset instead */
  113. return PCI_ERS_RESULT_NEED_RESET;
  114. }
  115. return ers_res;
  116. }
  117. static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
  118. struct pci_driver *driver)
  119. {
  120. pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
  121. pr_info("%s: Initiating reset\n", pci_name(pdev));
  122. if (zpci_hot_reset_device(to_zpci(pdev))) {
  123. pr_err("%s: The reset request failed\n", pci_name(pdev));
  124. return ers_res;
  125. }
  126. pdev->error_state = pci_channel_io_normal;
  127. ers_res = driver->err_handler->slot_reset(pdev);
  128. if (ers_result_indicates_abort(ers_res)) {
  129. pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
  130. return ers_res;
  131. }
  132. return ers_res;
  133. }
  134. /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
  135. * @pdev: PCI function to recover currently in the error state
  136. *
  137. * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
  138. * With the simplification that recovery always happens per function
  139. * and the platform determines which functions are affected for
  140. * multi-function devices.
  141. */
  142. static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
  143. {
  144. pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
  145. struct pci_driver *driver;
  146. /*
  147. * Ensure that the PCI function is not removed concurrently, no driver
  148. * is unbound or probed and that userspace can't access its
  149. * configuration space while we perform recovery.
  150. */
  151. pci_dev_lock(pdev);
  152. if (pdev->error_state == pci_channel_io_perm_failure) {
  153. ers_res = PCI_ERS_RESULT_DISCONNECT;
  154. goto out_unlock;
  155. }
  156. pdev->error_state = pci_channel_io_frozen;
  157. if (is_passed_through(to_zpci(pdev))) {
  158. pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
  159. pci_name(pdev));
  160. goto out_unlock;
  161. }
  162. driver = to_pci_driver(pdev->dev.driver);
  163. if (!is_driver_supported(driver)) {
  164. if (!driver)
  165. pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
  166. pci_name(pdev));
  167. else
  168. pr_info("%s: The %s driver bound to the device does not support error recovery\n",
  169. pci_name(pdev),
  170. driver->name);
  171. goto out_unlock;
  172. }
  173. ers_res = zpci_event_notify_error_detected(pdev, driver);
  174. if (ers_result_indicates_abort(ers_res))
  175. goto out_unlock;
  176. if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
  177. ers_res = zpci_event_do_error_state_clear(pdev, driver);
  178. if (ers_result_indicates_abort(ers_res))
  179. goto out_unlock;
  180. }
  181. if (ers_res == PCI_ERS_RESULT_NEED_RESET)
  182. ers_res = zpci_event_do_reset(pdev, driver);
  183. if (ers_res != PCI_ERS_RESULT_RECOVERED) {
  184. pr_err("%s: Automatic recovery failed; operator intervention is required\n",
  185. pci_name(pdev));
  186. goto out_unlock;
  187. }
  188. pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
  189. if (driver->err_handler->resume)
  190. driver->err_handler->resume(pdev);
  191. out_unlock:
  192. pci_dev_unlock(pdev);
  193. return ers_res;
  194. }
  195. /* zpci_event_io_failure - Report PCI channel failure state to driver
  196. * @pdev: PCI function for which to report
  197. * @es: PCI channel failure state to report
  198. */
  199. static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
  200. {
  201. struct pci_driver *driver;
  202. pci_dev_lock(pdev);
  203. pdev->error_state = es;
  204. /**
  205. * While vfio-pci's error_detected callback notifies user-space QEMU
  206. * reacts to this by freezing the guest. In an s390 environment PCI
  207. * errors are rarely fatal so this is overkill. Instead in the future
  208. * we will inject the error event and let the guest recover the device
  209. * itself.
  210. */
  211. if (is_passed_through(to_zpci(pdev)))
  212. goto out;
  213. driver = to_pci_driver(pdev->dev.driver);
  214. if (driver && driver->err_handler && driver->err_handler->error_detected)
  215. driver->err_handler->error_detected(pdev, pdev->error_state);
  216. out:
  217. pci_dev_unlock(pdev);
  218. }
  219. static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
  220. {
  221. struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
  222. struct pci_dev *pdev = NULL;
  223. pci_ers_result_t ers_res;
  224. zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
  225. ccdf->fid, ccdf->fh, ccdf->pec);
  226. zpci_err("error CCDF:\n");
  227. zpci_err_hex(ccdf, sizeof(*ccdf));
  228. if (zdev) {
  229. zpci_update_fh(zdev, ccdf->fh);
  230. if (zdev->zbus->bus)
  231. pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
  232. }
  233. pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
  234. pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
  235. if (!pdev)
  236. goto no_pdev;
  237. switch (ccdf->pec) {
  238. case 0x003a: /* Service Action or Error Recovery Successful */
  239. ers_res = zpci_event_attempt_error_recovery(pdev);
  240. if (ers_res != PCI_ERS_RESULT_RECOVERED)
  241. zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
  242. break;
  243. default:
  244. /*
  245. * Mark as frozen not permanently failed because the device
  246. * could be subsequently recovered by the platform.
  247. */
  248. zpci_event_io_failure(pdev, pci_channel_io_frozen);
  249. break;
  250. }
  251. pci_dev_put(pdev);
  252. no_pdev:
  253. zpci_zdev_put(zdev);
  254. }
  255. void zpci_event_error(void *data)
  256. {
  257. if (zpci_is_enabled())
  258. __zpci_event_error(data);
  259. }
  260. static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
  261. {
  262. zpci_update_fh(zdev, fh);
  263. /* Give the driver a hint that the function is
  264. * already unusable.
  265. */
  266. zpci_bus_remove_device(zdev, true);
  267. /* Even though the device is already gone we still
  268. * need to free zPCI resources as part of the disable.
  269. */
  270. if (zdev->dma_table)
  271. zpci_dma_exit_device(zdev);
  272. if (zdev_enabled(zdev))
  273. zpci_disable_device(zdev);
  274. zdev->state = ZPCI_FN_STATE_STANDBY;
  275. }
  276. static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
  277. {
  278. struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
  279. bool existing_zdev = !!zdev;
  280. enum zpci_state state;
  281. zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
  282. ccdf->fid, ccdf->fh, ccdf->pec);
  283. switch (ccdf->pec) {
  284. case 0x0301: /* Reserved|Standby -> Configured */
  285. if (!zdev) {
  286. zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
  287. if (IS_ERR(zdev))
  288. break;
  289. } else {
  290. /* the configuration request may be stale */
  291. if (zdev->state != ZPCI_FN_STATE_STANDBY)
  292. break;
  293. zdev->state = ZPCI_FN_STATE_CONFIGURED;
  294. }
  295. zpci_scan_configured_device(zdev, ccdf->fh);
  296. break;
  297. case 0x0302: /* Reserved -> Standby */
  298. if (!zdev)
  299. zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
  300. else
  301. zpci_update_fh(zdev, ccdf->fh);
  302. break;
  303. case 0x0303: /* Deconfiguration requested */
  304. if (zdev) {
  305. /* The event may have been queued before we confirgured
  306. * the device.
  307. */
  308. if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
  309. break;
  310. zpci_update_fh(zdev, ccdf->fh);
  311. zpci_deconfigure_device(zdev);
  312. }
  313. break;
  314. case 0x0304: /* Configured -> Standby|Reserved */
  315. if (zdev) {
  316. /* The event may have been queued before we confirgured
  317. * the device.:
  318. */
  319. if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
  320. zpci_event_hard_deconfigured(zdev, ccdf->fh);
  321. /* The 0x0304 event may immediately reserve the device */
  322. if (!clp_get_state(zdev->fid, &state) &&
  323. state == ZPCI_FN_STATE_RESERVED) {
  324. zpci_device_reserved(zdev);
  325. }
  326. }
  327. break;
  328. case 0x0306: /* 0x308 or 0x302 for multiple devices */
  329. zpci_remove_reserved_devices();
  330. clp_scan_pci_devices();
  331. break;
  332. case 0x0308: /* Standby -> Reserved */
  333. if (!zdev)
  334. break;
  335. zpci_device_reserved(zdev);
  336. break;
  337. default:
  338. break;
  339. }
  340. if (existing_zdev)
  341. zpci_zdev_put(zdev);
  342. }
  343. void zpci_event_availability(void *data)
  344. {
  345. if (zpci_is_enabled())
  346. __zpci_event_availability(data);
  347. }