net/mlx5_core: Add pci error handlers to mlx5_core driver
This patch implement the pci_error_handlers for mlx5_core which allow the driver to recover from PCI error. Once an error is detected in the PCI, the mlx5_pci_err_detected is called and it: 1) Marks the device to be in 'Internal Error' state. 2) Dispatches an event to the mlx5_ib to flush all the outstanding cqes with error. 3) Returns all the on going commands with error. 4) Unloads the driver. Afterwards, the FW is reset and mlx5_pci_slot_reset is called and it enables the device and restore it's pci state. If the later succeeds, mlx5_pci_resume is called, and it loads the SW stack. Signed-off-by: Majd Dibbiny <majd@mellanox.com> Signed-off-by: Eli Cohen <eli@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
fd76ee4da5
commit
89d44f0a6c
@@ -487,8 +487,26 @@ struct mlx5_priv {
|
||||
spinlock_t ctx_lock;
|
||||
};
|
||||
|
||||
enum mlx5_device_state {
|
||||
MLX5_DEVICE_STATE_UP,
|
||||
MLX5_DEVICE_STATE_INTERNAL_ERROR,
|
||||
};
|
||||
|
||||
enum mlx5_interface_state {
|
||||
MLX5_INTERFACE_STATE_DOWN,
|
||||
MLX5_INTERFACE_STATE_UP,
|
||||
};
|
||||
|
||||
enum mlx5_pci_status {
|
||||
MLX5_PCI_STATUS_DISABLED,
|
||||
MLX5_PCI_STATUS_ENABLED,
|
||||
};
|
||||
|
||||
struct mlx5_core_dev {
|
||||
struct pci_dev *pdev;
|
||||
/* sync pci state */
|
||||
struct mutex pci_status_mutex;
|
||||
enum mlx5_pci_status pci_status;
|
||||
u8 rev_id;
|
||||
char board_id[MLX5_BOARD_ID_LEN];
|
||||
struct mlx5_cmd cmd;
|
||||
@@ -497,6 +515,10 @@ struct mlx5_core_dev {
|
||||
u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
|
||||
phys_addr_t iseg_base;
|
||||
struct mlx5_init_seg __iomem *iseg;
|
||||
enum mlx5_device_state state;
|
||||
/* sync interface state */
|
||||
struct mutex intf_state_mutex;
|
||||
enum mlx5_interface_state interface_state;
|
||||
void (*event) (struct mlx5_core_dev *dev,
|
||||
enum mlx5_dev_event event,
|
||||
unsigned long param);
|
||||
|
Reference in New Issue
Block a user