RDMA/ocrdma: Depend on async link events from CNA
Recently Dough Ledford reported a deadlock happening between ocrdma-load sequence and NetworkManager service issuing "open" on be2net interface. The deadlock happens when any be2net hook (e.g. open/close) is called in parallel to insmod ocrdma.ko. A. be2net is sending administrative open/close event to ocrdma holding device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net. So sequence of locks is rtnl_lock---> device_list lock B. When new ocrdma roce device gets registered, infiniband stack now takes rtnl_lock in ib_register_device() in GID initialization routines. So sequence of locks in this path is device_list lock ---> rtnl_lock. This improper locking sequence causes deadlock. With this patch we stop using administrative open and close events injected by be2net driver. These events were used to dispatch PORT_ACTIVE and PORT_ERROR events to the IB-stack. This patch implements a logic to receive async-link-events generated from CNA whenever link-state-change is detected. Now on, these async-events will be used to dispatch PORT_ACTIVE and PORT_ERROR events to IB-stack. Depending on async-events from CNA removes the need to hold device-list-mutex and thus breaks the busy-wait scenario. Reported-by: Doug Ledford <dledford@redhat.com> CC: Sathya Perla <sathya.perla@avagotech.com> Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com> Signed-off-by: Selvin Xavier <selvin.xavier@avagotech.com> Signed-off-by: Devesh Sharma <devesh.sharma@avagotech.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:

committed by
Doug Ledford

parent
36ac0db0db
commit
10a214dc99
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
|
||||
u32 valid_ae_event;
|
||||
};
|
||||
|
||||
#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
|
||||
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
|
||||
enum ocrdma_async_event_code {
|
||||
OCRDMA_ASYNC_LINK_EVE_CODE = 0x01,
|
||||
OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05,
|
||||
OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14
|
||||
};
|
||||
|
||||
enum ocrdma_async_grp5_events {
|
||||
OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
|
||||
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
|
||||
OCRDMA_MAX_ASYNC_ERRORS
|
||||
};
|
||||
|
||||
struct ocrdma_ae_lnkst_mcqe {
|
||||
u32 speed_state_ptn;
|
||||
u32 qos_reason_falut;
|
||||
u32 evt_tag;
|
||||
u32 valid_ae_event;
|
||||
};
|
||||
|
||||
enum {
|
||||
OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F,
|
||||
OCRDMA_AE_LSC_PT_SHIFT = 0x06,
|
||||
OCRDMA_AE_LSC_PT_MASK = (0x03 <<
|
||||
OCRDMA_AE_LSC_PT_SHIFT),
|
||||
OCRDMA_AE_LSC_LS_SHIFT = 0x08,
|
||||
OCRDMA_AE_LSC_LS_MASK = (0xFF <<
|
||||
OCRDMA_AE_LSC_LS_SHIFT),
|
||||
OCRDMA_AE_LSC_LD_SHIFT = 0x10,
|
||||
OCRDMA_AE_LSC_LD_MASK = (0xFF <<
|
||||
OCRDMA_AE_LSC_LD_SHIFT),
|
||||
OCRDMA_AE_LSC_PPS_SHIFT = 0x18,
|
||||
OCRDMA_AE_LSC_PPS_MASK = (0xFF <<
|
||||
OCRDMA_AE_LSC_PPS_SHIFT),
|
||||
OCRDMA_AE_LSC_PPF_MASK = 0xFF,
|
||||
OCRDMA_AE_LSC_ER_SHIFT = 0x08,
|
||||
OCRDMA_AE_LSC_ER_MASK = (0xFF <<
|
||||
OCRDMA_AE_LSC_ER_SHIFT),
|
||||
OCRDMA_AE_LSC_QOS_SHIFT = 0x10,
|
||||
OCRDMA_AE_LSC_QOS_MASK = (0xFFFF <<
|
||||
OCRDMA_AE_LSC_QOS_SHIFT)
|
||||
};
|
||||
|
||||
enum {
|
||||
OCRDMA_AE_LSC_PLINK_DOWN = 0x00,
|
||||
OCRDMA_AE_LSC_PLINK_UP = 0x01,
|
||||
OCRDMA_AE_LSC_LLINK_DOWN = 0x02,
|
||||
OCRDMA_AE_LSC_LLINK_MASK = 0x02,
|
||||
OCRDMA_AE_LSC_LLINK_UP = 0x03
|
||||
};
|
||||
|
||||
/* mailbox command request and responses */
|
||||
enum {
|
||||
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
|
||||
@@ -676,7 +717,7 @@ enum {
|
||||
OCRDMA_PHY_PFLT_SHIFT = 0x18,
|
||||
OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
|
||||
OCRDMA_QOS_LNKSP_SHIFT = 0x10,
|
||||
OCRDMA_LLST_MASK = 0xFF,
|
||||
OCRDMA_LINK_ST_MASK = 0x01,
|
||||
OCRDMA_PLFC_MASK = 0x00000400,
|
||||
OCRDMA_PLFC_SHIFT = 0x8,
|
||||
OCRDMA_PLRFC_MASK = 0x00000200,
|
||||
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {
|
||||
|
||||
u32 pflt_pps_ld_pnum;
|
||||
u32 qos_lsp;
|
||||
u32 res_lls;
|
||||
u32 res_lnk_st;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
Reference in New Issue
Block a user