Merge branch 'pci/error'

- Use pci_channel_state_t instead of enum pci_channel_state (Luc Van
  Oostenryck)

- Simplify __aer_print_error() (Bjorn Helgaas)

- Log AER correctable errors as warning, not error (Matt Jolly)

- Rename pci_aer_clear_device_status() to pcie_clear_device_status() (Bjorn
  Helgaas)

- Clear PCIe Device Status errors only if OS owns AER (Jonathan Cameron)

* pci/error:
  PCI/ERR: Clear PCIe Device Status errors only if OS owns AER
  PCI/ERR: Rename pci_aer_clear_device_status() to pcie_clear_device_status()
  PCI/AER: Log correctable errors as warning, not error
  PCI/AER: Simplify __aer_print_error()
  PCI: Use 'pci_channel_state_t' instead of 'enum pci_channel_state'
This commit is contained in:
Bjorn Helgaas
2020-08-05 18:24:15 -05:00
20 changed files with 83 additions and 57 deletions

View File

@@ -2173,6 +2173,14 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
}
EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
void pcie_clear_device_status(struct pci_dev *dev)
{
u16 sta;
pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
}
/**
* pcie_clear_root_pme_status - Clear root port PME interrupt status.
* @dev: PCIe root port or event collector.

View File

@@ -92,6 +92,7 @@ void pci_refresh_power_state(struct pci_dev *dev);
int pci_power_up(struct pci_dev *dev);
void pci_disable_enabled_device(struct pci_dev *dev);
int pci_finish_runtime_suspend(struct pci_dev *dev);
void pcie_clear_device_status(struct pci_dev *dev);
void pcie_clear_root_pme_status(struct pci_dev *dev);
bool pci_check_pme_status(struct pci_dev *dev);
void pci_pme_wakeup_bus(struct pci_bus *bus);
@@ -555,7 +556,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
/* PCI error reporting and recovery */
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
@@ -658,7 +659,6 @@ void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
extern const struct attribute_group aer_stats_attr_group;
void pci_aer_clear_fatal_status(struct pci_dev *dev);
void pci_aer_clear_device_status(struct pci_dev *dev);
int pci_aer_clear_status(struct pci_dev *dev);
int pci_aer_raw_clear_status(struct pci_dev *dev);
#else
@@ -666,7 +666,6 @@ static inline void pci_no_aer(void) { }
static inline void pci_aer_init(struct pci_dev *d) { }
static inline void pci_aer_exit(struct pci_dev *d) { }
static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
static inline void pci_aer_clear_device_status(struct pci_dev *dev) { }
static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; }
static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; }
#endif

View File

@@ -241,14 +241,6 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
void pci_aer_clear_device_status(struct pci_dev *dev)
{
u16 sta;
pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
}
int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
{
int aer = dev->aer_cap;
@@ -447,7 +439,7 @@ static const char *aer_error_layer[] = {
"Transaction Layer"
};
static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
static const char *aer_correctable_error_string[] = {
"RxErr", /* Bit Position 0 */
NULL,
NULL,
@@ -464,9 +456,25 @@ static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
"NonFatalErr", /* Bit Position 13 */
"CorrIntErr", /* Bit Position 14 */
"HeaderOF", /* Bit Position 15 */
NULL, /* Bit Position 16 */
NULL, /* Bit Position 17 */
NULL, /* Bit Position 18 */
NULL, /* Bit Position 19 */
NULL, /* Bit Position 20 */
NULL, /* Bit Position 21 */
NULL, /* Bit Position 22 */
NULL, /* Bit Position 23 */
NULL, /* Bit Position 24 */
NULL, /* Bit Position 25 */
NULL, /* Bit Position 26 */
NULL, /* Bit Position 27 */
NULL, /* Bit Position 28 */
NULL, /* Bit Position 29 */
NULL, /* Bit Position 30 */
NULL, /* Bit Position 31 */
};
static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
static const char *aer_uncorrectable_error_string[] = {
"Undefined", /* Bit Position 0 */
NULL,
NULL,
@@ -494,6 +502,11 @@ static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
"AtomicOpBlocked", /* Bit Position 24 */
"TLPBlockedErr", /* Bit Position 25 */
"PoisonTLPBlocked", /* Bit Position 26 */
NULL, /* Bit Position 27 */
NULL, /* Bit Position 28 */
NULL, /* Bit Position 29 */
NULL, /* Bit Position 30 */
NULL, /* Bit Position 31 */
};
static const char *aer_agent_string[] = {
@@ -650,24 +663,26 @@ static void __print_tlp_header(struct pci_dev *dev,
static void __aer_print_error(struct pci_dev *dev,
struct aer_err_info *info)
{
const char **strings;
unsigned long status = info->status & ~info->mask;
const char *errmsg = NULL;
const char *level, *errmsg;
int i;
for_each_set_bit(i, &status, 32) {
if (info->severity == AER_CORRECTABLE)
errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
aer_correctable_error_string[i] : NULL;
else
errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
aer_uncorrectable_error_string[i] : NULL;
if (info->severity == AER_CORRECTABLE) {
strings = aer_correctable_error_string;
level = KERN_WARNING;
} else {
strings = aer_uncorrectable_error_string;
level = KERN_ERR;
}
if (errmsg)
pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
if (!errmsg)
errmsg = "Unknown Error Bit";
pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
else
pci_err(dev, " [%2d] Unknown Error Bit%s\n",
i, info->first_error == i ? " (First)" : "");
}
pci_dev_aer_stats_incr(dev, info);
}
@@ -676,6 +691,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
int layer, agent;
int id = ((dev->bus->number << 8) | dev->devfn);
const char *level;
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
@@ -686,13 +702,14 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device,
info->status, info->mask);
pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device, info->status, info->mask);
__aer_print_error(dev, info);
@@ -922,7 +939,8 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (aer)
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
pci_aer_clear_device_status(dev);
if (pcie_aer_is_native(dev))
pcie_clear_device_status(dev);
} else if (info->severity == AER_NONFATAL)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)

View File

@@ -46,7 +46,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig,
}
static int report_error_detected(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
enum pci_ers_result *result)
{
pci_ers_result_t vote;
@@ -147,7 +147,7 @@ out:
}
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
{
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
@@ -197,7 +197,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast resume message\n");
pci_walk_bus(bus, report_resume, &status);
pci_aer_clear_device_status(dev);
if (pcie_aer_is_native(dev))
pcie_clear_device_status(dev);
pci_aer_clear_nonfatal_status(dev);
pci_info(dev, "device recovery successful\n");
return status;

View File

@@ -146,7 +146,7 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
}
static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
enum pci_channel_state error)
pci_channel_state_t error)
{
/* Root Port has no impact. Always recovers. */
return PCI_ERS_RESULT_CAN_RECOVER;