1
0

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  mlx4_core: Increase command timeout for INIT_HCA to 10 seconds
  IPoIB/cm: Use common CQ for CM send completions
  IB/uverbs: Fix checking of userspace object ownership
  IB/mlx4: Sanity check userspace send queue sizes
  IPoIB: Rewrite "if (!likely(...))" as "if (unlikely(!(...)))"
  IB/ehca: Enable large page MRs by default
  IB/ehca: Change meaning of hca_cap_mr_pgsize
  IB/ehca: Fix ehca_encode_hwpage_size() and alloc_fmr()
  IB/ehca: Fix masking error in {,re}reg_phys_mr()
  IB/ehca: Supply QP token for SRQ base QPs
  IPoIB: Use round_jiffies() for ah_reap_task
  RDMA/cma: Fix deadlock destroying listen requests
  RDMA/cma: Add locking around QP accesses
  IB/mthca: Avoid alignment traps when writing doorbells
  mlx4_core: Kill mlx4_write64_raw()
Este cometimento está contido em:
Linus Torvalds
2007-10-23 09:56:11 -07:00
ascendente 0d6810091c 77109cc282
cometimento 0b776eb542
19 ficheiros modificados com 287 adições e 327 eliminações

Ver ficheiro

@@ -84,9 +84,8 @@ enum {
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_MCAST_STARTED = 8,
IPOIB_FLAG_NETIF_STOPPED = 9,
IPOIB_FLAG_ADMIN_CM = 10,
IPOIB_FLAG_UMCAST = 11,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -98,9 +97,9 @@ enum {
#define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM
#define IPOIB_CM_OP_SRQ (1ul << 30)
#define IPOIB_OP_CM (1ul << 30)
#else
#define IPOIB_CM_OP_SRQ (0)
#define IPOIB_OP_CM (0)
#endif
/* structs */
@@ -197,7 +196,6 @@ struct ipoib_cm_rx {
struct ipoib_cm_tx {
struct ib_cm_id *id;
struct ib_cq *cq;
struct ib_qp *qp;
struct list_head list;
struct net_device *dev;
@@ -294,6 +292,7 @@ struct ipoib_dev_priv {
unsigned tx_tail;
struct ib_sge tx_sge;
struct ib_send_wr tx_wr;
unsigned tx_outstanding;
struct ib_wc ibwc[IPOIB_NUM_WC];
@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
unsigned int mtu);
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
#else
struct ipoib_cm_tx;
@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w
{
}
static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
}
#endif
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG

Ver ficheiro

@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
struct ib_recv_wr *bad_wr;
int i, ret;
priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p;
unsigned long flags;
@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
goto repost;
}
if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
p = wc->qp->qp_context;
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
priv->tx_sge.addr = addr;
priv->tx_sge.length = len;
priv->tx_wr.wr_id = wr_id;
priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
}
@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
dev->trans_start = jiffies;
++tx->tx_head;
if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
netif_stop_queue(dev);
set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
}
}
}
static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx,
struct ib_wc *wc)
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
unsigned long flags;
@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_lock_irqsave(&priv->tx_lock, flags);
++tx->tx_tail;
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
}
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
tx->neigh = NULL;
}
/* queue would be re-started anyway when TX is destroyed,
* but it makes sense to do it ASAP here. */
if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
netif_wake_queue(dev);
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_unlock_irqrestore(&priv->tx_lock, flags);
}
static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
{
struct ipoib_cm_tx *tx = tx_ptr;
int n, i;
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
do {
n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
for (i = 0; i < n; ++i)
ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
} while (n == IPOIB_NUM_WC);
}
int ipoib_cm_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
return 0;
}
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = cq,
.send_cq = priv->cq,
.recv_cq = priv->cq,
.srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
.qp_context = tx
};
return ib_create_qp(priv->pd, &attr);
@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
goto err_tx;
}
p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
ipoib_sendq_size + 1, 0);
if (IS_ERR(p->cq)) {
ret = PTR_ERR(p->cq);
ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
goto err_cq;
}
ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
if (ret) {
ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
goto err_req_notify;
}
p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp);
ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
@@ -950,12 +917,8 @@ err_modify:
err_id:
p->id = NULL;
ib_destroy_qp(p->qp);
err_req_notify:
err_qp:
p->qp = NULL;
ib_destroy_cq(p->cq);
err_cq:
p->cq = NULL;
err_tx:
return ret;
}
@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long flags;
unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
if (p->id)
ib_destroy_cm_id(p->id);
if (p->tx_ring) {
/* Wait for all sends to complete */
begin = jiffies;
while ((int) p->tx_tail - (int) p->tx_head < 0) {
if (time_after(jiffies, begin + 5 * HZ)) {
ipoib_warn(priv, "timing out; %d sends not completed\n",
p->tx_head - p->tx_tail);
goto timeout;
}
msleep(1);
}
}
timeout:
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
spin_lock_irqsave(&priv->tx_lock, flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
spin_unlock_irqrestore(&priv->tx_lock, flags);
}
if (p->qp)
ib_destroy_qp(p->qp);
if (p->cq)
ib_destroy_cq(p->cq);
if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
netif_wake_queue(p->dev);
if (p->tx_ring) {
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
}
kfree(p->tx_ring);
}
kfree(p->tx_ring);
kfree(p);
}

Ver ficheiro

@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail;
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
}
spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
@@ -301,14 +300,18 @@ poll_more:
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
if (wc->wr_id & IPOIB_CM_OP_SRQ) {
if (wc->wr_id & IPOIB_OP_RECV) {
++done;
ipoib_cm_handle_rx_wc(dev, wc);
} else if (wc->wr_id & IPOIB_OP_RECV) {
++done;
ipoib_ib_handle_rx_wc(dev, wc);
} else
ipoib_ib_handle_tx_wc(dev, wc);
if (wc->wr_id & IPOIB_OP_CM)
ipoib_cm_handle_rx_wc(dev, wc);
else
ipoib_ib_handle_rx_wc(dev, wc);
} else {
if (wc->wr_id & IPOIB_OP_CM)
ipoib_cm_handle_tx_wc(dev, wc);
else
ipoib_ib_handle_tx_wc(dev, wc);
}
}
if (n != t)
@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
address->last_send = priv->tx_head;
++priv->tx_head;
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
}
}
}
@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
int ipoib_ib_dev_open(struct net_device *dev)
@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
@@ -561,12 +565,17 @@ void ipoib_drain_cq(struct net_device *dev)
if (priv->ibwc[i].status == IB_WC_SUCCESS)
priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
else
ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
} else {
if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
else
ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
}
}
} while (n == IPOIB_NUM_WC);
}
@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
--priv->tx_outstanding;
}
for (i = 0; i < ipoib_recvq_size; ++i) {

Ver ficheiro

@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
/*
* Now flush workqueue to make sure a scheduled task doesn't
* bring our internal state back up.
@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_rx_ring_cleanup;
}
/* priv->tx_head & tx_tail are already 0 */
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;