IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover

Using this new API reduces iSER code complexity.
It also reduces the maximum number of work requests per task and the need
of dealing with multiple MRs (and their registrations and invalidations)
per task. It is done by using a single WR and a special MR type
(IB_MR_TYPE_INTEGRITY) for PI operation.

The setup of the tested benchmark:
 - 2 servers with 24 cores (1 initiator and 1 target)
 - 24 target sessions with 1 LUN each
 - ramdisk backstore
 - PI active

Performance results running fio (24 jobs, 128 iodepth) using
write_generate=0 and read_verify=0 (w/w.o patch):

bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512     1236.6K/1164.3K   1357.2K/1332.8K
1k      1196.5K/1163.8K   1348.4K/1262.7K
2k      1016.7K/921950    1003.7K/931230
4k      662728/600545     595423/501513
8k      385954/384345     333775/277090
16k     222864/222820     170317/170671
32k     116869/114896     82331/82244
64k     55205/54931       40264/40021

Using write_generate=1 and read_verify=1 (w/w.o patch):

bs      IOPS(read)        IOPS(write)
----    ----------        ----------
512     1090.1K/1030.9K   1303.9K/1101.4K
1k      1057.7K/904583    1318.4K/988085
2k      965226/638799     1008.6K/692514
4k      555479/410151     542414/414517
8k      298675/224964     264729/237508
16k     133485/122481     164625/138647
32k     74329/67615       80143/78743
64k     35716/35519       39294/37334

We get performance improvement at all block sizes.
The most significant improvement is when writing 4k bs (almost 30% more
iops).

Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Israel Rukshin
2019-06-11 18:52:47 +03:00
committed by Jason Gunthorpe
parent 38ca87c6f1
commit b76a439982
4 changed files with 96 additions and 194 deletions

View File

@@ -233,85 +233,6 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
kfree(desc);
}
static int
iser_alloc_reg_res(struct iser_device *device,
struct ib_pd *pd,
struct iser_reg_resources *res,
unsigned int size)
{
struct ib_device *ib_dev = device->ib_device;
enum ib_mr_type mr_type;
int ret;
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
res->mr = ib_alloc_mr(pd, mr_type, size);
if (IS_ERR(res->mr)) {
ret = PTR_ERR(res->mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
return ret;
}
res->mr_valid = 0;
return 0;
}
static void
iser_free_reg_res(struct iser_reg_resources *rsc)
{
ib_dereg_mr(rsc->mr);
}
static int
iser_alloc_pi_ctx(struct iser_device *device,
struct ib_pd *pd,
struct iser_fr_desc *desc,
unsigned int size)
{
struct iser_pi_context *pi_ctx = NULL;
int ret;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx)
return -ENOMEM;
pi_ctx = desc->pi_ctx;
ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size);
if (ret) {
iser_err("failed to allocate reg_resources\n");
goto alloc_reg_res_err;
}
pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
pi_ctx->sig_mr_valid = 0;
desc->pi_ctx->sig_protected = 0;
return 0;
sig_mr_failure:
iser_free_reg_res(&pi_ctx->rsc);
alloc_reg_res_err:
kfree(desc->pi_ctx);
return ret;
}
static void
iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
{
iser_free_reg_res(&pi_ctx->rsc);
ib_dereg_mr(pi_ctx->sig_mr);
kfree(pi_ctx);
}
static struct iser_fr_desc *
iser_create_fastreg_desc(struct iser_device *device,
struct ib_pd *pd,
@@ -319,32 +240,58 @@ iser_create_fastreg_desc(struct iser_device *device,
unsigned int size)
{
struct iser_fr_desc *desc;
struct ib_device *ib_dev = device->ib_device;
enum ib_mr_type mr_type;
int ret;
desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc)
return ERR_PTR(-ENOMEM);
ret = iser_alloc_reg_res(device, pd, &desc->rsc, size);
if (ret)
goto reg_res_alloc_failure;
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
desc->rsc.mr = ib_alloc_mr(pd, mr_type, size);
if (IS_ERR(desc->rsc.mr)) {
ret = PTR_ERR(desc->rsc.mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
goto err_alloc_mr;
}
if (pi_enable) {
ret = iser_alloc_pi_ctx(device, pd, desc, size);
if (ret)
goto pi_ctx_alloc_failure;
desc->rsc.sig_mr = ib_alloc_mr_integrity(pd, size, size);
if (IS_ERR(desc->rsc.sig_mr)) {
ret = PTR_ERR(desc->rsc.sig_mr);
iser_err("Failed to allocate sig_mr err=%d\n", ret);
goto err_alloc_mr_integrity;
}
}
desc->rsc.mr_valid = 0;
return desc;
pi_ctx_alloc_failure:
iser_free_reg_res(&desc->rsc);
reg_res_alloc_failure:
err_alloc_mr_integrity:
ib_dereg_mr(desc->rsc.mr);
err_alloc_mr:
kfree(desc);
return ERR_PTR(ret);
}
static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc)
{
struct iser_reg_resources *res = &desc->rsc;
ib_dereg_mr(res->mr);
if (res->sig_mr) {
ib_dereg_mr(res->sig_mr);
res->sig_mr = NULL;
}
kfree(desc);
}
/**
* iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors
* for fast registration work requests.
@@ -399,10 +346,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
list_del(&desc->all_list);
iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
kfree(desc);
iser_destroy_fastreg_desc(desc);
++i;
}
@@ -707,6 +651,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
struct ib_device_attr *attr = &device->ib_device->attrs;
unsigned short sg_tablesize, sup_sg_tablesize;
unsigned short reserved_mr_pages;
u32 max_num_sg;
/*
* FRs without SG_GAPS or FMRs can only map up to a (device) page per
@@ -720,12 +665,17 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
else
reserved_mr_pages = 1;
if (iser_conn->ib_conn.pi_support)
max_num_sg = attr->max_pi_fast_reg_page_list_len;
else
max_num_sg = attr->max_fast_reg_page_list_len;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)
sup_sg_tablesize =
min_t(
uint, ISCSI_ISER_MAX_SG_TABLESIZE,
attr->max_fast_reg_page_list_len - reserved_mr_pages);
max_num_sg - reserved_mr_pages);
else
sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
@@ -1118,9 +1068,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
struct ib_mr_status mr_status;
int ret;
if (desc && desc->pi_ctx->sig_protected) {
desc->pi_ctx->sig_protected = 0;
ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
if (desc && desc->sig_protected) {
desc->sig_protected = 0;
ret = ib_check_mr_status(desc->rsc.sig_mr,
IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);