RDMA/cxgb4: DB Drop Recovery for RDMA and LLD queues
Add module option db_fc_threshold which is the count of active QPs that trigger automatic db flow control mode. Automatically transition to/from flow control mode when the active qp count crosses db_fc_theshold. Add more db debugfs stats On DB DROP event from the LLD, recover all the iwarp queues. Signed-off-by: Vipul Pandya <vipul@chelsio.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Šī revīzija ir iekļauta:

revīziju iesūtīja
Roland Dreier

vecāks
4984037bef
revīzija
422eea0a8c
@@ -42,6 +42,11 @@ static int ocqp_support = 1;
|
||||
module_param(ocqp_support, int, 0644);
|
||||
MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
|
||||
|
||||
int db_fc_threshold = 2000;
|
||||
module_param(db_fc_threshold, int, 0644);
|
||||
MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic "
|
||||
"db flow control mode (default = 2000)");
|
||||
|
||||
static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
|
||||
{
|
||||
unsigned long flag;
|
||||
@@ -1143,13 +1148,19 @@ static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
|
||||
|
||||
mutex_lock(&qhp->rhp->db_mutex);
|
||||
do {
|
||||
if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) {
|
||||
|
||||
/*
|
||||
* The interrupt threshold is dbfifo_int_thresh << 6. So
|
||||
* make sure we don't cross that and generate an interrupt.
|
||||
*/
|
||||
if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
|
||||
(qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
|
||||
writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
|
||||
break;
|
||||
}
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule_timeout(usecs_to_jiffies(delay));
|
||||
delay = min(delay << 1, 200000);
|
||||
delay = min(delay << 1, 2000);
|
||||
} while (1);
|
||||
mutex_unlock(&qhp->rhp->db_mutex);
|
||||
return 0;
|
||||
@@ -1388,6 +1399,14 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int enable_qp_db(int id, void *p, void *data)
|
||||
{
|
||||
struct c4iw_qp *qp = p;
|
||||
|
||||
t4_enable_wq_db(&qp->wq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int c4iw_destroy_qp(struct ib_qp *ib_qp)
|
||||
{
|
||||
struct c4iw_dev *rhp;
|
||||
@@ -1405,7 +1424,16 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
|
||||
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
|
||||
wait_event(qhp->wait, !qhp->ep);
|
||||
|
||||
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
|
||||
spin_lock_irq(&rhp->lock);
|
||||
remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
|
||||
rhp->qpcnt--;
|
||||
BUG_ON(rhp->qpcnt < 0);
|
||||
if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
|
||||
rhp->rdev.stats.db_state_transitions++;
|
||||
rhp->db_state = NORMAL;
|
||||
idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
|
||||
}
|
||||
spin_unlock_irq(&rhp->lock);
|
||||
atomic_dec(&qhp->refcnt);
|
||||
wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
|
||||
|
||||
@@ -1419,6 +1447,14 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int disable_qp_db(int id, void *p, void *data)
|
||||
{
|
||||
struct c4iw_qp *qp = p;
|
||||
|
||||
t4_disable_wq_db(&qp->wq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
|
||||
struct ib_udata *udata)
|
||||
{
|
||||
@@ -1508,6 +1544,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
|
||||
spin_lock_irq(&rhp->lock);
|
||||
if (rhp->db_state != NORMAL)
|
||||
t4_disable_wq_db(&qhp->wq);
|
||||
if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
|
||||
rhp->rdev.stats.db_state_transitions++;
|
||||
rhp->db_state = FLOW_CONTROL;
|
||||
idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
|
||||
}
|
||||
ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
|
||||
spin_unlock_irq(&rhp->lock);
|
||||
if (ret)
|
||||
|
Atsaukties uz šo jaunā problēmā
Block a user