RDMA/ocrdma: Depend on async link events from CNA

Recently Dough Ledford reported a deadlock happening
between ocrdma-load sequence and NetworkManager service
issuing "open" on be2net interface.

The deadlock happens when any be2net hook (e.g. open/close) is called
in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
    takes rtnl_lock in ib_register_device() in GID initialization routines.
    So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

With this patch we stop using administrative open and close events
injected by be2net driver. These events were used to dispatch PORT_ACTIVE
and PORT_ERROR events to the IB-stack. This patch implements a logic
to receive async-link-events generated from CNA whenever link-state-change
is detected. Now on, these async-events will be used to dispatch
PORT_ACTIVE and PORT_ERROR events to IB-stack.

Depending on async-events from CNA removes the need to hold device-list-mutex
and thus breaks the busy-wait scenario.

Reported-by: Doug Ledford <dledford@redhat.com>
CC: Sathya Perla <sathya.perla@avagotech.com>
Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
Signed-off-by: Selvin Xavier <selvin.xavier@avagotech.com>
Signed-off-by: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
Devesh Sharma
2015-12-24 13:14:07 -05:00
committed by Doug Ledford
szülő 36ac0db0db
commit 10a214dc99
6 fájl változott, egészen pontosan 119 új sor hozzáadva és 22 régi sor törölve

Fájl megtekintése

@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
/* Request link events on this MQ. */
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
}
}
static void ocrdma_process_link_state(struct ocrdma_dev *dev,
struct ocrdma_ae_mcqe *cqe)
{
struct ocrdma_ae_lnkst_mcqe *evt;
u8 lstate;
evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
return;
if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
}
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
struct ocrdma_ae_mcqe *cqe = ae_cqe;
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
switch (evt_code) {
case OCRDMA_ASYNC_LINK_EVE_CODE:
ocrdma_process_link_state(dev, cqe);
break;
case OCRDMA_ASYNC_RDMA_EVE_CODE:
ocrdma_dispatch_ibevent(dev, cqe);
else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
break;
case OCRDMA_ASYNC_GRP5_EVE_CODE:
ocrdma_process_grp5_aync(dev, cqe);
else
break;
default:
pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
dev->id, evt_code);
}
}
static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -1363,7 +1387,8 @@ mbx_err:
return status;
}
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
u8 *lnk_state)
{
int status = -ENOMEM;
struct ocrdma_get_link_speed_rsp *rsp;
@@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
>> OCRDMA_PHY_PS_SHIFT;
if (lnk_speed)
*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
>> OCRDMA_PHY_PS_SHIFT;
if (lnk_state)
*lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);
mbx_err:
kfree(cmd);