Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a quiet cycle for RDMA, the big bulk is the usual
  smallish driver updates and bug fixes. About four new uAPI related
  things. Not as much Szykaller patches this time, the bugs it finds are
  getting harder to fix.

  Summary:

   - More work cleaning up the RDMA CM code

   - Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
     i40iw, iw_cxgb4, mlx5, rxe

   - Driver specific resource tracking and reporting via netlink

   - Continued work for name space support from Parav

   - MPLS support for the verbs flow steering uAPI

   - A few tricky IPoIB fixes improving robustness

   - HFI1 driver support for the '16B' management packet format

   - Some auditing to not print kernel pointers via %llx or similar

   - Mark the entire 'UCM' user-space interface as BROKEN with the
     intent to remove it entirely. The user space side of this was long
     ago replaced with RDMA-CM and syzkaller is finding bugs in the
     residual UCM interface nobody wishes to fix because nobody uses it.

   - Purge more bogus BUG_ON's from Leon

   - 'flow counters' verbs uAPI

   - T10 fixups for iser/isert, these are Acked by Martin but going
     through the RDMA tree due to dependencies"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
  RDMA/mlx5: Update SPDX tags to show proper license
  RDMA/restrack: Change SPDX tag to properly reflect license
  IB/hfi1: Fix comment on default hdr entry size
  IB/hfi1: Rename exp_lock to exp_mutex
  IB/hfi1: Add bypass register defines and replace blind constants
  IB/hfi1: Remove unused variable
  IB/hfi1: Ensure VL index is within bounds
  IB/hfi1: Fix user context tail allocation for DMA_RTAIL
  IB/hns: Use zeroing memory allocator instead of allocator/memset
  infiniband: fix a possible use-after-free bug
  iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
  IB/isert: use T10-PI check mask definitions from core layer
  IB/iser: use T10-PI check mask definitions from core layer
  RDMA/core: introduce check masks for T10-PI offload
  IB/isert: fix T10-pi check mask setting
  IB/mlx5: Add counters read support
  IB/mlx5: Add flow counters read support
  IB/mlx5: Add flow counters binding support
  IB/mlx5: Add counters create and destroy support
  IB/uverbs: Add support for flow counters
  ...
This commit is contained in:
Linus Torvalds
2018-06-07 13:04:07 -07:00
144 changed files with 4480 additions and 1792 deletions

View File

@@ -415,6 +415,7 @@ struct ipoib_ah {
struct list_head list;
struct kref ref;
unsigned last_send;
int valid;
};
struct ipoib_path {
@@ -431,7 +432,6 @@ struct ipoib_path {
struct rb_node rb_node;
struct list_head list;
int valid;
};
struct ipoib_neigh {

View File

@@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
path->pathrec.dgid.raw);
path->valid = 0;
if (path->ah)
path->ah->valid = 0;
}
spin_unlock_irq(&priv->lock);
@@ -833,7 +834,7 @@ static void path_rec_completion(int status,
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
path->valid = 1;
path->ah->valid = 1;
}
path->query = NULL;
@@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev,
return 0;
}
static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
unsigned long flags;
spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, daddr + 4);
if (!path)
goto out;
if (!path->query)
path_rec_start(dev, path);
out:
spin_unlock_irqrestore(&priv->lock, flags);
}
static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
@@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
list_add_tail(&neigh->list, &path->neigh_list);
if (path->ah) {
if (path->ah && path->ah->valid) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
@@ -1034,63 +1053,43 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
goto drop_and_unlock;
path = __path_find(dev, phdr->hwaddr + 4);
if (!path || !path->valid) {
int new_path = 0;
if (!path || !path->ah || !path->ah->valid) {
if (!path) {
path = path_rec_create(dev, phdr->hwaddr + 4);
new_path = 1;
}
if (path) {
if (!new_path)
/* make sure there is no changes in the existing path record */
init_path_rec(priv, path, phdr->hwaddr + 4);
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (!path->query && path_rec_start(dev, path)) {
spin_unlock_irqrestore(&priv->lock, flags);
if (new_path)
path_free(dev, path);
return;
} else
__path_add(dev, path);
if (!path)
goto drop_and_unlock;
__path_add(dev, path);
} else {
/*
* make sure there are no changes in the existing
* path record
*/
init_path_rec(priv, path, phdr->hwaddr + 4);
}
if (!path->query && path_rec_start(dev, path)) {
goto drop_and_unlock;
}
spin_unlock_irqrestore(&priv->lock, flags);
return;
}
if (path->ah) {
ipoib_dbg(priv, "Send unicast ARP to %08x\n",
be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
spin_unlock_irqrestore(&priv->lock, flags);
path->ah->last_send = rn->send(dev, skb, path->ah->ah,
IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
goto drop_and_unlock;
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
goto unlock;
} else {
goto drop_and_unlock;
}
}
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_dbg(priv, "Send unicast ARP to %08x\n",
be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
path->ah->last_send = rn->send(dev, skb, path->ah->ah,
IPOIB_QPN(phdr->hwaddr));
return;
drop_and_unlock:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
unlock:
spin_unlock_irqrestore(&priv->lock, flags);
}
@@ -1161,10 +1160,12 @@ send_using_neigh:
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
} else if (neigh->ah) {
} else if (neigh->ah && neigh->ah->valid) {
neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
IPOIB_QPN(phdr->hwaddr));
goto unref;
} else if (neigh->ah) {
neigh_refresh_path(neigh, phdr->hwaddr, dev);
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {

View File

@@ -886,7 +886,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
struct netdev_hw_addr *ha;
struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list);
unsigned long flags;
struct ib_sa_mcmember_rec rec;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
@@ -898,9 +897,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "restarting multicast task\n");
local_irq_save(flags);
netif_addr_lock(dev);
spin_lock(&priv->lock);
netif_addr_lock_bh(dev);
spin_lock_irq(&priv->lock);
/*
* Unfortunately, the networking core only gives us a list of all of
@@ -978,9 +976,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
}
}
spin_unlock(&priv->lock);
netif_addr_unlock(dev);
local_irq_restore(flags);
spin_unlock_irq(&priv->lock);
netif_addr_unlock_bh(dev);
ipoib_mcast_remove_list(&remove_list);
@@ -988,9 +985,9 @@ void ipoib_mcast_restart_task(struct work_struct *work)
* Double check that we are still up
*/
if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
spin_lock_irqsave(&priv->lock, flags);
spin_lock_irq(&priv->lock);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
spin_unlock_irqrestore(&priv->lock, flags);
spin_unlock_irq(&priv->lock);
}
}

View File

@@ -665,19 +665,17 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
goto free_host;
}
/*
* FRs or FMRs can only map up to a (device) page per entry, but if the
* first entry is misaligned we'll end up using using two entries
* (head and tail) for a single page worth data, so we have to drop
* one segment from the calculation.
*/
max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9;
max_fr_sectors = (shost->sg_tablesize * PAGE_SIZE) >> 9;
shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
iser_conn, shost->sg_tablesize,
shost->max_sectors);
if (shost->max_sectors < iser_max_sectors)
iser_warn("max_sectors was reduced from %u to %u\n",
iser_max_sectors, shost->max_sectors);
if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n",
cmds_max, max_cmds);

View File

@@ -383,10 +383,6 @@ struct iser_device {
bool remote_inv_sup;
};
#define ISER_CHECK_GUARD 0xc0
#define ISER_CHECK_REFTAG 0x0f
#define ISER_CHECK_APPTAG 0x30
/**
* struct iser_reg_resources - Fast registration recources
*
@@ -498,6 +494,7 @@ struct ib_conn {
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
* @scsi_sg_tablesize: scsi host sg_tablesize
* @pages_per_mr: maximum pages available for registration
*/
struct iser_conn {
struct ib_conn ib_conn;
@@ -520,6 +517,7 @@ struct iser_conn {
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
unsigned short pages_per_mr;
bool snd_w_inv;
};

View File

@@ -251,7 +251,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max,
iser_conn->scsi_sg_tablesize))
iser_conn->pages_per_mr))
goto create_rdma_reg_res_failed;
if (iser_alloc_login_buf(iser_conn))

View File

@@ -362,9 +362,9 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
*mask = 0;
if (sc->prot_flags & SCSI_PROT_REF_CHECK)
*mask |= ISER_CHECK_REFTAG;
*mask |= IB_SIG_CHECK_REFTAG;
if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
*mask |= ISER_CHECK_GUARD;
*mask |= IB_SIG_CHECK_GUARD;
}
static inline void

View File

@@ -703,19 +703,34 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
unsigned int max_sectors)
{
struct iser_device *device = iser_conn->ib_conn.device;
struct ib_device_attr *attr = &device->ib_device->attrs;
unsigned short sg_tablesize, sup_sg_tablesize;
unsigned short reserved_mr_pages;
/*
* FRs without SG_GAPS or FMRs can only map up to a (device) page per
* entry, but if the first entry is misaligned we'll end up using two
* entries (head and tail) for a single page worth data, so one
* additional entry is required.
*/
if ((attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) &&
(attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG))
reserved_mr_pages = 0;
else
reserved_mr_pages = 1;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
if (device->ib_device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS)
if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)
sup_sg_tablesize =
min_t(
uint, ISCSI_ISER_MAX_SG_TABLESIZE,
device->ib_device->attrs.max_fast_reg_page_list_len);
attr->max_fast_reg_page_list_len - reserved_mr_pages);
else
sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize);
iser_conn->pages_per_mr =
iser_conn->scsi_sg_tablesize + reserved_mr_pages;
}
/**

View File

@@ -886,15 +886,9 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des
}
static void
isert_create_send_desc(struct isert_conn *isert_conn,
struct isert_cmd *isert_cmd,
struct iser_tx_desc *tx_desc)
__isert_create_send_desc(struct isert_device *device,
struct iser_tx_desc *tx_desc)
{
struct isert_device *device = isert_conn->device;
struct ib_device *ib_dev = device->ib_device;
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISCSI_CTRL;
@@ -907,6 +901,20 @@ isert_create_send_desc(struct isert_conn *isert_conn,
}
}
static void
isert_create_send_desc(struct isert_conn *isert_conn,
struct isert_cmd *isert_cmd,
struct iser_tx_desc *tx_desc)
{
struct isert_device *device = isert_conn->device;
struct ib_device *ib_dev = device->ib_device;
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
__isert_create_send_desc(device, tx_desc);
}
static int
isert_init_tx_hdrs(struct isert_conn *isert_conn,
struct iser_tx_desc *tx_desc)
@@ -994,7 +1002,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
int ret;
isert_create_send_desc(isert_conn, NULL, tx_desc);
__isert_create_send_desc(device, tx_desc);
memcpy(&tx_desc->iscsi_header, &login->rsp[0],
sizeof(struct iscsi_hdr));
@@ -2106,10 +2114,13 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
return -EINVAL;
}
sig_attrs->check_mask =
(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
(se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
(se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
if (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD)
sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
if (se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG)
sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
if (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG)
sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
return 0;
}