Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
 "Primary 4.7 merge window changes

   - Updates to the new Intel X722 iWARP driver
   - Updates to the hfi1 driver
   - Fixes for the iw_cxgb4 driver
   - Misc core fixes
   - Generic RDMA READ/WRITE API addition
   - SRP updates
   - Misc ipoib updates
   - Minor mlx5 updates"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (148 commits)
  IB/mlx5: Fire the CQ completion handler from tasklet
  net/mlx5_core: Use tasklet for user-space CQ completion events
  IB/core: Do not require CAP_NET_ADMIN for packet sniffing
  IB/mlx4: Fix unaligned access in send_reply_to_slave
  IB/mlx5: Report Scatter FCS device capability when supported
  IB/mlx5: Add Scatter FCS support for Raw Packet QP
  IB/core: Add Scatter FCS create flag
  IB/core: Add Raw Scatter FCS device capability
  IB/core: Add extended device capability flags
  i40iw: pass hw_stats by reference rather than by value
  i40iw: Remove unnecessary synchronize_irq() before free_irq()
  i40iw: constify i40iw_vf_cqp_ops structure
  IB/mlx5: Add UARs write-combining and non-cached mapping
  IB/mlx5: Allow mapping the free running counter on PROT_EXEC
  IB/mlx4: Use list_for_each_entry_safe
  IB/SA: Use correct free function
  IB/core: Fix a potential array overrun in CMA and SA agent
  IB/core: Remove unnecessary check in ibnl_rcv_msg
  IB/IWPM: Fix a potential skb leak
  RDMA/nes: replace custom print_hex_dump()
  ...
此提交包含在:
Linus Torvalds
2016-05-20 14:35:07 -07:00
當前提交 76b584d312
共有 111 個檔案被更改,包括 4116 行新增2605 行删除

查看文件

@@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
static bool prefer_fr = true;
static bool register_always = true;
static bool never_register;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -99,6 +100,9 @@ module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
module_param(never_register, bool, 0444);
MODULE_PARM_DESC(never_register, "Never register memory");
static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
@@ -316,7 +320,7 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
struct ib_fmr_pool_param fmr_param;
memset(&fmr_param, 0, sizeof(fmr_param));
fmr_param.pool_size = target->scsi_host->can_queue;
fmr_param.pool_size = target->mr_pool_size;
fmr_param.dirty_watermark = fmr_param.pool_size / 4;
fmr_param.cache = 1;
fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
@@ -441,23 +445,22 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
return srp_create_fr_pool(dev->dev, dev->pd,
target->scsi_host->can_queue,
return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
dev->max_pages_per_mr);
}
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
* @qp: RDMA queue pair.
*
* Drain the qp before destroying it. This avoids that the receive
* completion handler can access the queue pair while it is
* being destroyed.
*/
static void srp_destroy_qp(struct srp_rdma_ch *ch)
static void srp_destroy_qp(struct ib_qp *qp)
{
ib_drain_rq(ch->qp);
ib_destroy_qp(ch->qp);
ib_drain_rq(qp);
ib_destroy_qp(qp);
}
static int srp_create_ch_ib(struct srp_rdma_ch *ch)
@@ -469,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = dev->use_fast_reg ? 3 : 1;
const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -530,7 +533,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
}
if (ch->qp)
srp_destroy_qp(ch);
srp_destroy_qp(ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
@@ -554,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
srp_destroy_qp(ch);
srp_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -597,7 +600,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
ib_destroy_fmr_pool(ch->fmr_pool);
}
srp_destroy_qp(ch);
srp_destroy_qp(ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
@@ -850,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
for (i = 0; i < target->req_ring_size; ++i) {
req = &ch->req_ring[i];
mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
GFP_KERNEL);
if (!mr_list)
goto out;
@@ -1112,7 +1115,7 @@ static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
}
/**
* srp_free_req() - Unmap data and add request to the free request list.
* srp_free_req() - Unmap data and adjust ch->req_lim.
* @ch: SRP RDMA channel.
* @req: Request to be freed.
* @scmnd: SCSI command associated with @req.
@@ -1299,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
srp_handle_qp_err(cq, wc, "FAST REG");
}
/*
* Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
* where to start in the first element. If sg_offset_p != NULL then
* *sg_offset_p is updated to the offset in state->sg[retval] of the first
* byte that has not yet been mapped.
*/
static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_request *req,
struct srp_rdma_ch *ch, int sg_nents)
struct srp_rdma_ch *ch, int sg_nents,
unsigned int *sg_offset_p)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
@@ -1316,13 +1326,14 @@ static int srp_map_finish_fr(struct srp_map_state *state,
WARN_ON_ONCE(!dev->use_fast_reg);
if (sg_nents == 0)
return 0;
if (sg_nents == 1 && target->global_mr) {
srp_map_desc(state, sg_dma_address(state->sg),
sg_dma_len(state->sg),
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
sg_dma_len(state->sg) - sg_offset,
target->global_mr->rkey);
if (sg_offset_p)
*sg_offset_p = 0;
return 1;
}
@@ -1333,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
if (unlikely(n < 0))
n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
dev->mr_page_size);
if (unlikely(n < 0)) {
srp_fr_pool_put(ch->fr_pool, &desc, 1);
pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
sg_offset_p ? *sg_offset_p : -1, n);
return n;
}
WARN_ON_ONCE(desc->mr->length == 0);
req->reg_cqe.done = srp_reg_mr_err_done;
@@ -1357,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
desc->mr->length, desc->mr->rkey);
err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
if (unlikely(err))
if (unlikely(err)) {
WARN_ON_ONCE(err == -ENOMEM);
return err;
}
return n;
}
@@ -1398,7 +1419,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
/*
* If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
* boundries.
* boundaries.
*/
ret = 0;
if (len != dev->mr_page_size)
@@ -1413,10 +1434,9 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct scatterlist *sg;
int i, ret;
state->desc = req->indirect_desc;
state->pages = req->map_page;
state->fmr.next = req->fmr_list;
state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
for_each_sg(scat, sg, count, i) {
ret = srp_map_sg_entry(state, ch, sg, i);
@@ -1428,8 +1448,6 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
if (ret)
return ret;
req->nmdesc = state->nmdesc;
return 0;
}
@@ -1437,15 +1455,20 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct srp_request *req, struct scatterlist *scat,
int count)
{
unsigned int sg_offset = 0;
state->desc = req->indirect_desc;
state->fr.next = req->fr_list;
state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
state->fr.end = req->fr_list + ch->target->mr_per_cmd;
state->sg = scat;
if (count == 0)
return 0;
while (count) {
int i, n;
n = srp_map_finish_fr(state, req, ch, count);
n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
if (unlikely(n < 0))
return n;
@@ -1454,8 +1477,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
state->sg = sg_next(state->sg);
}
req->nmdesc = state->nmdesc;
return 0;
}
@@ -1475,8 +1496,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
target->global_mr->rkey);
}
req->nmdesc = state->nmdesc;
return 0;
}
@@ -1509,14 +1528,15 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
if (dev->use_fast_reg) {
state.sg = idb_sg;
sg_set_buf(idb_sg, req->indirect_desc, idb_len);
sg_init_one(idb_sg, req->indirect_desc, idb_len);
idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
ret = srp_map_finish_fr(&state, req, ch, 1);
ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
if (ret < 0)
return ret;
WARN_ON_ONCE(ret < 1);
} else if (dev->use_fmr) {
state.pages = idb_pages;
state.pages[0] = (req->indirect_dma_addr &
@@ -1534,6 +1554,41 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
return 0;
}
#if defined(DYNAMIC_DATA_DEBUG)
static void srp_check_mapping(struct srp_map_state *state,
struct srp_rdma_ch *ch, struct srp_request *req,
struct scatterlist *scat, int count)
{
struct srp_device *dev = ch->target->srp_host->srp_dev;
struct srp_fr_desc **pfr;
u64 desc_len = 0, mr_len = 0;
int i;
for (i = 0; i < state->ndesc; i++)
desc_len += be32_to_cpu(req->indirect_desc[i].len);
if (dev->use_fast_reg)
for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
mr_len += (*pfr)->mr->length;
else if (dev->use_fmr)
for (i = 0; i < state->nmdesc; i++)
mr_len += be32_to_cpu(req->indirect_desc[i].len);
if (desc_len != scsi_bufflen(req->scmnd) ||
mr_len > scsi_bufflen(req->scmnd))
pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
scsi_bufflen(req->scmnd), desc_len, mr_len,
state->ndesc, state->nmdesc);
}
#endif
/**
* srp_map_data() - map SCSI data buffer onto an SRP request
* @scmnd: SCSI command to map
* @ch: SRP RDMA channel
* @req: SRP request
*
* Returns the length in bytes of the SRP_CMD IU or a negative value if
* mapping failed.
*/
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
@@ -1601,11 +1656,23 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
memset(&state, 0, sizeof(state));
if (dev->use_fast_reg)
srp_map_sg_fr(&state, ch, req, scat, count);
ret = srp_map_sg_fr(&state, ch, req, scat, count);
else if (dev->use_fmr)
srp_map_sg_fmr(&state, ch, req, scat, count);
ret = srp_map_sg_fmr(&state, ch, req, scat, count);
else
srp_map_sg_dma(&state, ch, req, scat, count);
ret = srp_map_sg_dma(&state, ch, req, scat, count);
req->nmdesc = state.nmdesc;
if (ret < 0)
goto unmap;
#if defined(DYNAMIC_DEBUG)
{
DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
"Memory mapping consistency check");
if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
srp_check_mapping(&state, ch, req, scat, count);
}
#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -1628,7 +1695,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
!target->allow_ext_sg)) {
shost_printk(KERN_ERR, target->scsi_host,
"Could not fit S/G list into SRP_CMD\n");
return -EIO;
ret = -EIO;
goto unmap;
}
count = min(state.ndesc, target->cmd_sg_cnt);
@@ -1646,7 +1714,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
idb_len, &idb_rkey);
if (ret < 0)
return ret;
goto unmap;
req->nmdesc++;
} else {
idb_rkey = cpu_to_be32(target->global_mr->rkey);
@@ -1672,6 +1740,12 @@ map_complete:
cmd->buf_fmt = fmt;
return len;
unmap:
srp_unmap_data(scmnd, ch, req);
if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
ret = -E2BIG;
return ret;
}
/*
@@ -2564,6 +2638,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
static int srp_slave_alloc(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
return 0;
}
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -2755,6 +2843,7 @@ static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
.slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
@@ -2829,7 +2918,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
goto out;
}
pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
dev_name(&target->scsi_host->shost_gendev),
srp_sdev_count(target->scsi_host));
@@ -3161,6 +3250,7 @@ static ssize_t srp_create_target(struct device *dev,
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
int ret, node_idx, node, cpu, i;
unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
target_host = scsi_host_alloc(&srp_template,
@@ -3217,7 +3307,33 @@ static ssize_t srp_create_target(struct device *dev,
target->sg_tablesize = target->cmd_sg_cnt;
}
if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
/*
* FR and FMR can only map one HCA page per entry. If the
* start address is not aligned on a HCA page boundary two
* entries will be used for the head and the tail although
* these two entries combined contain at most one HCA page of
* data. Hence the "+ 1" in the calculation below.
*
* The indirect data buffer descriptor is contiguous so the
* memory for that buffer will only be registered if
* register_always is true. Hence add one to mr_per_cmd if
* register_always has been set.
*/
max_sectors_per_mr = srp_dev->max_pages_per_mr <<
(ilog2(srp_dev->mr_page_size) - 9);
mr_per_cmd = register_always +
(target->scsi_host->max_sectors + 1 +
max_sectors_per_mr - 1) / max_sectors_per_mr;
pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
target->scsi_host->max_sectors,
srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
max_sectors_per_mr, mr_per_cmd);
}
target_host->sg_tablesize = target->sg_tablesize;
target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
target->max_iu_len = sizeof (struct srp_cmd) +
@@ -3414,17 +3530,6 @@ static void srp_add_one(struct ib_device *device)
if (!srp_dev)
return;
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
@@ -3435,8 +3540,25 @@ static void srp_add_one(struct ib_device *device)
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
device->attrs.max_mr_size, srp_dev->mr_page_size,
max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
} else if (!never_register &&
device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
}
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
@@ -3456,7 +3578,8 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->pd))
goto free_dev;
if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
if (never_register || !register_always ||
(!srp_dev->has_fmr && !srp_dev->has_fr)) {
srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |

查看文件

@@ -202,6 +202,8 @@ struct srp_target_port {
char target_name[32];
unsigned int scsi_id;
unsigned int sg_tablesize;
int mr_pool_size;
int mr_per_cmd;
int queue_size;
int req_ring_size;
int comp_vector;