RDMA/nes: CM connection setup/teardown rework

Major rework of CM connection setup/teardown.  We had a number of issues
with MPI applications not starting/terminating properly over time.
With these changes we were able to run longer on larger clusters.

* Remove memory allocation from nes_connect() and nes_cm_connect().
* Fix mini_cm_dec_refcnt_listen() when destroying listener.
* Remove unnecessary code from schedule_nes_timer() and nes_cm_timer_tick().
* Functionalize mini_cm_recv_pkt() and process_packet().
* Clean up cm_node->ref_count usage.
* Reuse skbs if available.

Signed-off-by: Faisal Latif <flatif@neteffect.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
这个提交包含在:
Faisal Latif
2008-07-24 20:50:45 -07:00
提交者 Roland Dreier
父节点 fb2e405fc1
当前提交 6492cdf3a2
修改 5 个文件,包含 1166 行新增939 行删除

查看文件

@@ -2814,7 +2814,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp = *((struct nes_qp **)&context);
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
nesqp->cm_id->add_ref(nesqp->cm_id);
nes_add_ref(&nesqp->ibqp);
schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
NES_TIMER_TYPE_CLOSE, 1, 0);
nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
@@ -2838,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
tcp_state = NES_AEQE_TCP_STATE_CLOSED;
}
nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
@@ -2876,7 +2874,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
}
spin_unlock_irqrestore(&nesqp->lock, flags);
if (next_iwarp_state) {
nes_add_ref(&nesqp->ibqp);
nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
" also added another reference\n",
nesqp->hwqp.qp_id, next_iwarp_state);
@@ -2888,7 +2885,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
/* FIN Received but ib state not RTS,
close complete will be on its way */
spin_unlock_irqrestore(&nesqp->lock, flags);
nes_rem_ref(&nesqp->ibqp);
return;
}
spin_unlock_irqrestore(&nesqp->lock, flags);
@@ -2922,7 +2918,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
((nesqp->ibqp_state == IB_QPS_RTS)&&
(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
} else {
nesqp->in_disconnect = 0;
@@ -2931,7 +2926,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
break;
case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
nesqp = *((struct nes_qp **)&context);
nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
@@ -3042,7 +3036,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
@@ -3062,7 +3055,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
@@ -3082,7 +3074,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
/* TODO: additional AEs need to be here */