123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <[email protected]>
- */
- #define KMSG_COMPONENT "zpci"
- #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
- #include <linux/kernel.h>
- #include <linux/pci.h>
- #include <asm/pci_debug.h>
- #include <asm/pci_dma.h>
- #include <asm/sclp.h>
- #include "pci_bus.h"
- /* Content Code Description for PCI Function Error */
- struct zpci_ccdf_err {
- u32 reserved1;
- u32 fh; /* function handle */
- u32 fid; /* function id */
- u32 ett : 4; /* expected table type */
- u32 mvn : 12; /* MSI vector number */
- u32 dmaas : 8; /* DMA address space */
- u32 : 6;
- u32 q : 1; /* event qualifier */
- u32 rw : 1; /* read/write */
- u64 faddr; /* failing address */
- u32 reserved3;
- u16 reserved4;
- u16 pec; /* PCI event code */
- } __packed;
- /* Content Code Description for PCI Function Availability */
- struct zpci_ccdf_avail {
- u32 reserved1;
- u32 fh; /* function handle */
- u32 fid; /* function id */
- u32 reserved2;
- u32 reserved3;
- u32 reserved4;
- u32 reserved5;
- u16 reserved6;
- u16 pec; /* PCI event code */
- } __packed;
- static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
- {
- switch (ers_res) {
- case PCI_ERS_RESULT_CAN_RECOVER:
- case PCI_ERS_RESULT_RECOVERED:
- case PCI_ERS_RESULT_NEED_RESET:
- return false;
- default:
- return true;
- }
- }
- static bool is_passed_through(struct zpci_dev *zdev)
- {
- return zdev->s390_domain;
- }
- static bool is_driver_supported(struct pci_driver *driver)
- {
- if (!driver || !driver->err_handler)
- return false;
- if (!driver->err_handler->error_detected)
- return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
- return true;
- }
- static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
- struct pci_driver *driver)
- {
- pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
- ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
- if (ers_result_indicates_abort(ers_res))
- pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
- else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
- static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
- struct pci_driver *driver)
- {
- pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
- struct zpci_dev *zdev = to_zpci(pdev);
- int rc;
- pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
- rc = zpci_reset_load_store_blocked(zdev);
- if (rc) {
- pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
- /* Let's try a full reset instead */
- return PCI_ERS_RESULT_NEED_RESET;
- }
- if (driver->err_handler->mmio_enabled) {
- ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
- }
- pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
- rc = zpci_clear_error_state(zdev);
- if (!rc) {
- pdev->error_state = pci_channel_io_normal;
- } else {
- pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
- /* Let's try a full reset instead */
- return PCI_ERS_RESULT_NEED_RESET;
- }
- return ers_res;
- }
- static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
- struct pci_driver *driver)
- {
- pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
- pr_info("%s: Initiating reset\n", pci_name(pdev));
- if (zpci_hot_reset_device(to_zpci(pdev))) {
- pr_err("%s: The reset request failed\n", pci_name(pdev));
- return ers_res;
- }
- pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
- return ers_res;
- }
- return ers_res;
- }
- /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
- * @pdev: PCI function to recover currently in the error state
- *
- * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
- * With the simplification that recovery always happens per function
- * and the platform determines which functions are affected for
- * multi-function devices.
- */
- static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
- {
- pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
- struct pci_driver *driver;
- /*
- * Ensure that the PCI function is not removed concurrently, no driver
- * is unbound or probed and that userspace can't access its
- * configuration space while we perform recovery.
- */
- pci_dev_lock(pdev);
- if (pdev->error_state == pci_channel_io_perm_failure) {
- ers_res = PCI_ERS_RESULT_DISCONNECT;
- goto out_unlock;
- }
- pdev->error_state = pci_channel_io_frozen;
- if (is_passed_through(to_zpci(pdev))) {
- pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
- pci_name(pdev));
- goto out_unlock;
- }
- driver = to_pci_driver(pdev->dev.driver);
- if (!is_driver_supported(driver)) {
- if (!driver)
- pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
- pci_name(pdev));
- else
- pr_info("%s: The %s driver bound to the device does not support error recovery\n",
- pci_name(pdev),
- driver->name);
- goto out_unlock;
- }
- ers_res = zpci_event_notify_error_detected(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
- goto out_unlock;
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
- ers_res = zpci_event_do_error_state_clear(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
- goto out_unlock;
- }
- if (ers_res == PCI_ERS_RESULT_NEED_RESET)
- ers_res = zpci_event_do_reset(pdev, driver);
- if (ers_res != PCI_ERS_RESULT_RECOVERED) {
- pr_err("%s: Automatic recovery failed; operator intervention is required\n",
- pci_name(pdev));
- goto out_unlock;
- }
- pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
- if (driver->err_handler->resume)
- driver->err_handler->resume(pdev);
- out_unlock:
- pci_dev_unlock(pdev);
- return ers_res;
- }
- /* zpci_event_io_failure - Report PCI channel failure state to driver
- * @pdev: PCI function for which to report
- * @es: PCI channel failure state to report
- */
- static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
- {
- struct pci_driver *driver;
- pci_dev_lock(pdev);
- pdev->error_state = es;
- /**
- * While vfio-pci's error_detected callback notifies user-space QEMU
- * reacts to this by freezing the guest. In an s390 environment PCI
- * errors are rarely fatal so this is overkill. Instead in the future
- * we will inject the error event and let the guest recover the device
- * itself.
- */
- if (is_passed_through(to_zpci(pdev)))
- goto out;
- driver = to_pci_driver(pdev->dev.driver);
- if (driver && driver->err_handler && driver->err_handler->error_detected)
- driver->err_handler->error_detected(pdev, pdev->error_state);
- out:
- pci_dev_unlock(pdev);
- }
- static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
- {
- struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = NULL;
- pci_ers_result_t ers_res;
- zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
- ccdf->fid, ccdf->fh, ccdf->pec);
- zpci_err("error CCDF:\n");
- zpci_err_hex(ccdf, sizeof(*ccdf));
- if (zdev) {
- zpci_update_fh(zdev, ccdf->fh);
- if (zdev->zbus->bus)
- pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
- }
- pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
- pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
- if (!pdev)
- goto no_pdev;
- switch (ccdf->pec) {
- case 0x003a: /* Service Action or Error Recovery Successful */
- ers_res = zpci_event_attempt_error_recovery(pdev);
- if (ers_res != PCI_ERS_RESULT_RECOVERED)
- zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
- break;
- default:
- /*
- * Mark as frozen not permanently failed because the device
- * could be subsequently recovered by the platform.
- */
- zpci_event_io_failure(pdev, pci_channel_io_frozen);
- break;
- }
- pci_dev_put(pdev);
- no_pdev:
- zpci_zdev_put(zdev);
- }
- void zpci_event_error(void *data)
- {
- if (zpci_is_enabled())
- __zpci_event_error(data);
- }
- static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
- {
- zpci_update_fh(zdev, fh);
- /* Give the driver a hint that the function is
- * already unusable.
- */
- zpci_bus_remove_device(zdev, true);
- /* Even though the device is already gone we still
- * need to free zPCI resources as part of the disable.
- */
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
- zdev->state = ZPCI_FN_STATE_STANDBY;
- }
- static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
- {
- struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- bool existing_zdev = !!zdev;
- enum zpci_state state;
- zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
- ccdf->fid, ccdf->fh, ccdf->pec);
- switch (ccdf->pec) {
- case 0x0301: /* Reserved|Standby -> Configured */
- if (!zdev) {
- zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
- if (IS_ERR(zdev))
- break;
- } else {
- /* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
- break;
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
- }
- zpci_scan_configured_device(zdev, ccdf->fh);
- break;
- case 0x0302: /* Reserved -> Standby */
- if (!zdev)
- zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
- else
- zpci_update_fh(zdev, ccdf->fh);
- break;
- case 0x0303: /* Deconfiguration requested */
- if (zdev) {
- /* The event may have been queued before we confirgured
- * the device.
- */
- if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
- break;
- zpci_update_fh(zdev, ccdf->fh);
- zpci_deconfigure_device(zdev);
- }
- break;
- case 0x0304: /* Configured -> Standby|Reserved */
- if (zdev) {
- /* The event may have been queued before we confirgured
- * the device.:
- */
- if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
- zpci_event_hard_deconfigured(zdev, ccdf->fh);
- /* The 0x0304 event may immediately reserve the device */
- if (!clp_get_state(zdev->fid, &state) &&
- state == ZPCI_FN_STATE_RESERVED) {
- zpci_device_reserved(zdev);
- }
- }
- break;
- case 0x0306: /* 0x308 or 0x302 for multiple devices */
- zpci_remove_reserved_devices();
- clp_scan_pci_devices();
- break;
- case 0x0308: /* Standby -> Reserved */
- if (!zdev)
- break;
- zpci_device_reserved(zdev);
- break;
- default:
- break;
- }
- if (existing_zdev)
- zpci_zdev_put(zdev);
- }
- void zpci_event_availability(void *data)
- {
- if (zpci_is_enabled())
- __zpci_event_availability(data);
- }
|