IB/ipath: Fix many locking issues when switching to error state
The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Этот коммит содержится в:

коммит произвёл
Roland Dreier

родитель
53dc1ca194
Коммит
e509be898d
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
|
||||
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
|
||||
* @qp: the QP
|
||||
*
|
||||
* Called with the QP s_lock held and interrupts disabled.
|
||||
* XXX Use a simple list for now. We might need a priority
|
||||
* queue if we have lots of QPs waiting for RNR timeouts
|
||||
* but that should be rare.
|
||||
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
|
||||
void ipath_insert_rnr_queue(struct ipath_qp *qp)
|
||||
{
|
||||
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dev->pending_lock, flags);
|
||||
/* We already did a spin_lock_irqsave(), so just use spin_lock */
|
||||
spin_lock(&dev->pending_lock);
|
||||
if (list_empty(&dev->rnrwait))
|
||||
list_add(&qp->timerwait, &dev->rnrwait);
|
||||
else {
|
||||
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
|
||||
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
|
||||
list_add(&qp->timerwait, l);
|
||||
}
|
||||
spin_unlock_irqrestore(&dev->pending_lock, flags);
|
||||
spin_unlock(&dev->pending_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&rq->lock, flags);
|
||||
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
|
||||
ret = 0;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
wq = rq->wq;
|
||||
tail = wq->tail;
|
||||
/* Validate tail before using it since it is user writable. */
|
||||
@@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
|
||||
tail = 0;
|
||||
do {
|
||||
if (unlikely(tail == wq->head)) {
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
ret = 0;
|
||||
goto bail;
|
||||
goto unlock;
|
||||
}
|
||||
/* Make sure entry is read after head index is read. */
|
||||
smp_rmb();
|
||||
@@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
|
||||
wq->tail = tail;
|
||||
|
||||
ret = 1;
|
||||
qp->r_wrid_valid = 1;
|
||||
set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
|
||||
if (handler) {
|
||||
u32 n;
|
||||
|
||||
@@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
bail:
|
||||
return ret;
|
||||
}
|
||||
@@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
|
||||
atomic64_t *maddr;
|
||||
enum ib_wc_status send_status;
|
||||
|
||||
/*
|
||||
* Note that we check the responder QP state after
|
||||
* checking the requester's state.
|
||||
*/
|
||||
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
|
||||
if (!qp) {
|
||||
dev->n_pkt_drops++;
|
||||
return;
|
||||
}
|
||||
|
||||
again:
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
|
||||
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
|
||||
sqp->s_rnr_timeout) {
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
goto done;
|
||||
}
|
||||
/* Return if we are already busy processing a work request. */
|
||||
if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
|
||||
!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
|
||||
goto unlock;
|
||||
|
||||
/* Get the next send request. */
|
||||
if (sqp->s_last == sqp->s_head) {
|
||||
/* Send work queue is empty. */
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
goto done;
|
||||
sqp->s_flags |= IPATH_S_BUSY;
|
||||
|
||||
again:
|
||||
if (sqp->s_last == sqp->s_head)
|
||||
goto clr_busy;
|
||||
wqe = get_swqe_ptr(sqp, sqp->s_last);
|
||||
|
||||
/* Return if it is not OK to start a new work reqeust. */
|
||||
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
|
||||
if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
|
||||
goto clr_busy;
|
||||
/* We are in the error state, flush the work request. */
|
||||
send_status = IB_WC_WR_FLUSH_ERR;
|
||||
goto flush_send;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can rely on the entry not changing without the s_lock
|
||||
* being held until we update s_last.
|
||||
* We increment s_cur to indicate s_last is in progress.
|
||||
*/
|
||||
wqe = get_swqe_ptr(sqp, sqp->s_last);
|
||||
if (sqp->s_last == sqp->s_cur) {
|
||||
if (++sqp->s_cur >= sqp->s_size)
|
||||
sqp->s_cur = 0;
|
||||
}
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
|
||||
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
|
||||
dev->n_pkt_drops++;
|
||||
/*
|
||||
* For RC, the requester would timeout and retry so
|
||||
* shortcut the timeouts and just signal too many retries.
|
||||
*/
|
||||
if (sqp->ibqp.qp_type == IB_QPT_RC)
|
||||
send_status = IB_WC_RETRY_EXC_ERR;
|
||||
else
|
||||
send_status = IB_WC_SUCCESS;
|
||||
goto serr;
|
||||
}
|
||||
|
||||
memset(&wc, 0, sizeof wc);
|
||||
send_status = IB_WC_SUCCESS;
|
||||
|
||||
@@ -396,8 +425,7 @@ again:
|
||||
sqp->s_len -= len;
|
||||
}
|
||||
|
||||
if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
|
||||
wqe->wr.opcode == IB_WR_RDMA_READ)
|
||||
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
|
||||
goto send_comp;
|
||||
|
||||
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
|
||||
@@ -417,6 +445,8 @@ again:
|
||||
wqe->wr.send_flags & IB_SEND_SOLICITED);
|
||||
|
||||
send_comp:
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
flush_send:
|
||||
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
|
||||
ipath_send_complete(sqp, wqe, send_status);
|
||||
goto again;
|
||||
@@ -437,11 +467,12 @@ rnr_nak:
|
||||
sqp->s_rnr_retry--;
|
||||
spin_lock_irqsave(&sqp->s_lock, flags);
|
||||
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
|
||||
goto unlock;
|
||||
goto clr_busy;
|
||||
sqp->s_flags |= IPATH_S_WAITING;
|
||||
dev->n_rnr_naks++;
|
||||
sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
|
||||
ipath_insert_rnr_queue(sqp);
|
||||
goto unlock;
|
||||
goto clr_busy;
|
||||
|
||||
inv_err:
|
||||
send_status = IB_WC_REM_INV_REQ_ERR;
|
||||
@@ -473,17 +504,19 @@ serr:
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
clr_busy:
|
||||
sqp->s_flags &= ~IPATH_S_BUSY;
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&sqp->s_lock, flags);
|
||||
done:
|
||||
if (atomic_dec_and_test(&qp->refcount))
|
||||
if (qp && atomic_dec_and_test(&qp->refcount))
|
||||
wake_up(&qp->wait);
|
||||
}
|
||||
|
||||
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
|
||||
{
|
||||
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
|
||||
qp->ibqp.qp_type == IB_QPT_SMI) {
|
||||
qp->ibqp.qp_type == IB_QPT_SMI) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
|
||||
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
|
||||
* @dev: the device we ran out of buffers on
|
||||
*
|
||||
* Called when we run out of PIO buffers.
|
||||
* If we are now in the error state, return zero to flush the
|
||||
* send work request.
|
||||
*/
|
||||
static void ipath_no_bufs_available(struct ipath_qp *qp,
|
||||
static int ipath_no_bufs_available(struct ipath_qp *qp,
|
||||
struct ipath_ibdev *dev)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
/*
|
||||
* Note that as soon as want_buffer() is called and
|
||||
* possibly before it returns, ipath_ib_piobufavail()
|
||||
* could be called. If we are still in the tasklet function,
|
||||
* tasklet_hi_schedule() will not call us until the next time
|
||||
* tasklet_hi_schedule() is called.
|
||||
* We leave the busy flag set so that another post send doesn't
|
||||
* try to put the same QP on the piowait list again.
|
||||
* could be called. Therefore, put QP on the piowait list before
|
||||
* enabling the PIO avail interrupt.
|
||||
*/
|
||||
spin_lock_irqsave(&dev->pending_lock, flags);
|
||||
list_add_tail(&qp->piowait, &dev->piowait);
|
||||
spin_unlock_irqrestore(&dev->pending_lock, flags);
|
||||
want_buffer(dev->dd, qp);
|
||||
dev->n_piowait++;
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
|
||||
dev->n_piowait++;
|
||||
qp->s_flags |= IPATH_S_WAITING;
|
||||
qp->s_flags &= ~IPATH_S_BUSY;
|
||||
spin_lock(&dev->pending_lock);
|
||||
if (list_empty(&qp->piowait))
|
||||
list_add_tail(&qp->piowait, &dev->piowait);
|
||||
spin_unlock(&dev->pending_lock);
|
||||
} else
|
||||
ret = 0;
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
if (ret)
|
||||
want_buffer(dev->dd, qp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
|
||||
struct ipath_qp *qp = (struct ipath_qp *)data;
|
||||
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
|
||||
int (*make_req)(struct ipath_qp *qp);
|
||||
|
||||
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
|
||||
goto bail;
|
||||
unsigned long flags;
|
||||
|
||||
if ((qp->ibqp.qp_type == IB_QPT_RC ||
|
||||
qp->ibqp.qp_type == IB_QPT_UC) &&
|
||||
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
|
||||
ipath_ruc_loopback(qp);
|
||||
goto clear;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (qp->ibqp.qp_type == IB_QPT_RC)
|
||||
@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
|
||||
else
|
||||
make_req = ipath_make_ud_req;
|
||||
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
|
||||
/* Return if we are already busy processing a work request. */
|
||||
if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
|
||||
!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
qp->s_flags |= IPATH_S_BUSY;
|
||||
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
|
||||
again:
|
||||
/* Check for a constructed packet to be sent. */
|
||||
if (qp->s_hdrwords != 0) {
|
||||
@@ -623,8 +677,8 @@ again:
|
||||
*/
|
||||
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
|
||||
qp->s_cur_sge, qp->s_cur_size)) {
|
||||
ipath_no_bufs_available(qp, dev);
|
||||
goto bail;
|
||||
if (ipath_no_bufs_available(qp, dev))
|
||||
goto bail;
|
||||
}
|
||||
dev->n_unicast_xmit++;
|
||||
/* Record that we sent the packet and s_hdr is empty. */
|
||||
@@ -633,16 +687,20 @@ again:
|
||||
|
||||
if (make_req(qp))
|
||||
goto again;
|
||||
clear:
|
||||
clear_bit(IPATH_S_BUSY, &qp->s_busy);
|
||||
|
||||
bail:;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should be called with s_lock held.
|
||||
*/
|
||||
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
|
||||
enum ib_wc_status status)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 last;
|
||||
u32 old_last, last;
|
||||
|
||||
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
|
||||
return;
|
||||
|
||||
/* See ch. 11.2.4.1 and 10.7.3.1 */
|
||||
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
|
||||
@@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
|
||||
status != IB_WC_SUCCESS);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&qp->s_lock, flags);
|
||||
last = qp->s_last;
|
||||
old_last = last = qp->s_last;
|
||||
if (++last >= qp->s_size)
|
||||
last = 0;
|
||||
qp->s_last = last;
|
||||
spin_unlock_irqrestore(&qp->s_lock, flags);
|
||||
if (qp->s_cur == old_last)
|
||||
qp->s_cur = last;
|
||||
if (qp->s_tail == old_last)
|
||||
qp->s_tail = last;
|
||||
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
|
||||
qp->s_draining = 0;
|
||||
}
|
||||
|
Ссылка в новой задаче
Block a user