Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
This commit is contained in:
@@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req)
|
||||
trace_nvme_complete_rq(req);
|
||||
|
||||
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
|
||||
if (nvme_req_needs_failover(req, status)) {
|
||||
if ((req->cmd_flags & REQ_NVME_MPATH) &&
|
||||
blk_path_error(status)) {
|
||||
nvme_failover_req(req);
|
||||
return;
|
||||
}
|
||||
@@ -617,6 +618,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
|
||||
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
|
||||
return BLK_STS_NOTSUPP;
|
||||
control |= NVME_RW_PRINFO_PRACT;
|
||||
} else if (req_op(req) == REQ_OP_WRITE) {
|
||||
t10_pi_prepare(req, ns->pi_type);
|
||||
}
|
||||
|
||||
switch (ns->pi_type) {
|
||||
@@ -627,8 +630,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
|
||||
case NVME_NS_DPS_PI_TYPE2:
|
||||
control |= NVME_RW_PRINFO_PRCHK_GUARD |
|
||||
NVME_RW_PRINFO_PRCHK_REF;
|
||||
cmnd->rw.reftag = cpu_to_le32(
|
||||
nvme_block_nr(ns, blk_rq_pos(req)));
|
||||
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -638,6 +640,22 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvme_cleanup_cmd(struct request *req)
|
||||
{
|
||||
if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
|
||||
nvme_req(req)->status == 0) {
|
||||
struct nvme_ns *ns = req->rq_disk->private_data;
|
||||
|
||||
t10_pi_complete(req, ns->pi_type,
|
||||
blk_rq_bytes(req) >> ns->lba_shift);
|
||||
}
|
||||
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
|
||||
kfree(page_address(req->special_vec.bv_page) +
|
||||
req->special_vec.bv_offset);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
|
||||
|
||||
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
|
||||
struct nvme_command *cmd)
|
||||
{
|
||||
@@ -668,10 +686,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
|
||||
}
|
||||
|
||||
cmd->common.command_id = req->tag;
|
||||
if (ns)
|
||||
trace_nvme_setup_nvm_cmd(req->q->id, cmd);
|
||||
else
|
||||
trace_nvme_setup_admin_cmd(cmd);
|
||||
trace_nvme_setup_cmd(req, cmd);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
|
||||
@@ -864,9 +879,6 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
|
||||
if (unlikely(ctrl->kato == 0))
|
||||
return;
|
||||
|
||||
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
|
||||
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
|
||||
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
|
||||
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
|
||||
}
|
||||
|
||||
@@ -1056,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
|
||||
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
|
||||
|
||||
#define NVME_AEN_SUPPORTED \
|
||||
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
|
||||
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
|
||||
|
||||
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
@@ -1472,6 +1484,12 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
||||
|
||||
set_capacity(disk, capacity);
|
||||
nvme_config_discard(ns);
|
||||
|
||||
if (id->nsattr & (1 << 0))
|
||||
set_disk_ro(disk, true);
|
||||
else
|
||||
set_disk_ro(disk, false);
|
||||
|
||||
blk_mq_unfreeze_queue(disk->queue);
|
||||
}
|
||||
|
||||
@@ -2270,21 +2288,16 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 log_page, void *log,
|
||||
size_t size, u64 offset)
|
||||
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
|
||||
void *log, size_t size, u64 offset)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
unsigned long dwlen = size / 4 - 1;
|
||||
|
||||
c.get_log_page.opcode = nvme_admin_get_log_page;
|
||||
|
||||
if (ns)
|
||||
c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
|
||||
else
|
||||
c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
|
||||
|
||||
c.get_log_page.nsid = cpu_to_le32(nsid);
|
||||
c.get_log_page.lid = log_page;
|
||||
c.get_log_page.lsp = lsp;
|
||||
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
|
||||
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
|
||||
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
|
||||
@@ -2293,12 +2306,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
|
||||
}
|
||||
|
||||
static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
|
||||
size_t size)
|
||||
{
|
||||
return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
|
||||
}
|
||||
|
||||
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
int ret;
|
||||
@@ -2309,8 +2316,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
|
||||
if (!ctrl->effects)
|
||||
return 0;
|
||||
|
||||
ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
|
||||
sizeof(*ctrl->effects));
|
||||
ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
|
||||
ctrl->effects, sizeof(*ctrl->effects), 0);
|
||||
if (ret) {
|
||||
kfree(ctrl->effects);
|
||||
ctrl->effects = NULL;
|
||||
@@ -2401,6 +2408,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
nvme_set_queue_limits(ctrl, ctrl->admin_q);
|
||||
ctrl->sgls = le32_to_cpu(id->sgls);
|
||||
ctrl->kas = le16_to_cpu(id->kas);
|
||||
ctrl->max_namespaces = le32_to_cpu(id->mnan);
|
||||
|
||||
if (id->rtd3e) {
|
||||
/* us -> s */
|
||||
@@ -2460,8 +2468,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
|
||||
}
|
||||
|
||||
ret = nvme_mpath_init(ctrl, id);
|
||||
kfree(id);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ctrl->apst_enabled && !prev_apst_enabled)
|
||||
dev_pm_qos_expose_latency_tolerance(ctrl->device);
|
||||
else if (!ctrl->apst_enabled && prev_apst_enabled)
|
||||
@@ -2680,6 +2692,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
|
||||
&dev_attr_nguid.attr,
|
||||
&dev_attr_eui.attr,
|
||||
&dev_attr_nsid.attr,
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
&dev_attr_ana_grpid.attr,
|
||||
&dev_attr_ana_state.attr,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -2702,6 +2718,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
|
||||
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
|
||||
return 0;
|
||||
}
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
|
||||
if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
|
||||
return 0;
|
||||
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
@@ -3075,8 +3099,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
|
||||
nvme_get_ctrl(ctrl);
|
||||
|
||||
kfree(id);
|
||||
|
||||
device_add_disk(ctrl->device, ns->disk);
|
||||
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvme_ns_id_attr_group))
|
||||
@@ -3086,8 +3108,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
|
||||
ns->disk->disk_name);
|
||||
|
||||
nvme_mpath_add_disk(ns->head);
|
||||
nvme_mpath_add_disk(ns, id);
|
||||
nvme_fault_inject_init(ns);
|
||||
kfree(id);
|
||||
|
||||
return;
|
||||
out_unlink_ns:
|
||||
mutex_lock(&ctrl->subsys->lock);
|
||||
@@ -3229,7 +3253,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
|
||||
* raced with us in reading the log page, which could cause us to miss
|
||||
* updates.
|
||||
*/
|
||||
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
|
||||
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
|
||||
log_size, 0);
|
||||
if (error)
|
||||
dev_warn(ctrl->device,
|
||||
"reading changed ns log failed: %d\n", error);
|
||||
@@ -3346,9 +3371,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
|
||||
if (!log)
|
||||
return;
|
||||
|
||||
if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
|
||||
dev_warn(ctrl->device,
|
||||
"Get FW SLOT INFO log error\n");
|
||||
if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
|
||||
sizeof(*log), 0))
|
||||
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
|
||||
kfree(log);
|
||||
}
|
||||
|
||||
@@ -3394,6 +3419,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
|
||||
case NVME_AER_NOTICE_FW_ACT_STARTING:
|
||||
queue_work(nvme_wq, &ctrl->fw_act_work);
|
||||
break;
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
case NVME_AER_NOTICE_ANA:
|
||||
if (!ctrl->ana_log_buf)
|
||||
break;
|
||||
queue_work(nvme_wq, &ctrl->ana_work);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
dev_warn(ctrl->device, "async event result %08x\n", result);
|
||||
}
|
||||
@@ -3426,6 +3458,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
|
||||
|
||||
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
nvme_mpath_stop(ctrl);
|
||||
nvme_stop_keep_alive(ctrl);
|
||||
flush_work(&ctrl->async_event_work);
|
||||
flush_work(&ctrl->scan_work);
|
||||
@@ -3463,6 +3496,7 @@ static void nvme_free_ctrl(struct device *dev)
|
||||
|
||||
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
|
||||
kfree(ctrl->effects);
|
||||
nvme_mpath_uninit(ctrl);
|
||||
|
||||
if (subsys) {
|
||||
mutex_lock(&subsys->lock);
|
||||
@@ -3499,6 +3533,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
|
||||
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
|
||||
|
||||
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
|
||||
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
|
||||
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
|
||||
|
||||
ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
|
||||
|
||||
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
if (ctrl->opts->max_reconnects != -1 &&
|
||||
if (ctrl->opts->max_reconnects == -1 ||
|
||||
ctrl->nr_reconnects < ctrl->opts->max_reconnects)
|
||||
return true;
|
||||
|
||||
|
@@ -1737,6 +1737,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
|
||||
struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
|
||||
|
||||
nvme_req(rq)->ctrl = &ctrl->ctrl;
|
||||
return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
|
||||
}
|
||||
|
||||
|
@@ -414,12 +414,6 @@ static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
|
||||
/* Set compacted version for upper layers */
|
||||
geo->version = NVM_OCSSD_SPEC_20;
|
||||
|
||||
if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) {
|
||||
pr_err("nvm: OCSSD version not supported (v%d.%d)\n",
|
||||
geo->major_ver_id, geo->minor_ver_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
geo->num_ch = le16_to_cpu(id->num_grp);
|
||||
geo->num_lun = le16_to_cpu(id->num_pu);
|
||||
geo->all_luns = geo->num_ch * geo->num_lun;
|
||||
@@ -583,7 +577,13 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
|
||||
struct ppa_addr ppa;
|
||||
size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
|
||||
size_t log_pos, offset, len;
|
||||
int ret, i;
|
||||
int ret, i, max_len;
|
||||
|
||||
/*
|
||||
* limit requests to maximum 256K to avoid issuing arbitrary large
|
||||
* requests when the device does not specific a maximum transfer size.
|
||||
*/
|
||||
max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024);
|
||||
|
||||
/* Normalize lba address space to obtain log offset */
|
||||
ppa.ppa = slba;
|
||||
@@ -596,10 +596,11 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
|
||||
offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
|
||||
|
||||
while (left) {
|
||||
len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9);
|
||||
len = min_t(unsigned int, left, max_len);
|
||||
|
||||
ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
|
||||
dev_meta, len, offset);
|
||||
ret = nvme_get_log(ctrl, ns->head->ns_id,
|
||||
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
|
||||
offset);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
|
||||
break;
|
||||
@@ -662,12 +663,10 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q,
|
||||
|
||||
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
|
||||
|
||||
if (rqd->bio) {
|
||||
if (rqd->bio)
|
||||
blk_init_request_from_bio(rq, rqd->bio);
|
||||
} else {
|
||||
else
|
||||
rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
|
||||
rq->__data_len = 0;
|
||||
}
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Christoph Hellwig.
|
||||
* Copyright (c) 2017-2018 Christoph Hellwig.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -20,6 +20,11 @@ module_param(multipath, bool, 0444);
|
||||
MODULE_PARM_DESC(multipath,
|
||||
"turn on native support for multiple controllers per subsystem");
|
||||
|
||||
inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3));
|
||||
}
|
||||
|
||||
/*
|
||||
* If multipathing is enabled we need to always use the subsystem instance
|
||||
* number for numbering our devices to avoid conflicts between subsystems that
|
||||
@@ -45,6 +50,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
||||
void nvme_failover_req(struct request *req)
|
||||
{
|
||||
struct nvme_ns *ns = req->q->queuedata;
|
||||
u16 status = nvme_req(req)->status;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ns->head->requeue_lock, flags);
|
||||
@@ -52,15 +58,35 @@ void nvme_failover_req(struct request *req)
|
||||
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
||||
blk_mq_end_request(req, 0);
|
||||
|
||||
nvme_reset_ctrl(ns->ctrl);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
switch (status & 0x7ff) {
|
||||
case NVME_SC_ANA_TRANSITION:
|
||||
case NVME_SC_ANA_INACCESSIBLE:
|
||||
case NVME_SC_ANA_PERSISTENT_LOSS:
|
||||
/*
|
||||
* If we got back an ANA error we know the controller is alive,
|
||||
* but not ready to serve this namespaces. The spec suggests
|
||||
* we should update our general state here, but due to the fact
|
||||
* that the admin and I/O queues are not serialized that is
|
||||
* fundamentally racy. So instead just clear the current path,
|
||||
* mark the the path as pending and kick of a re-read of the ANA
|
||||
* log page ASAP.
|
||||
*/
|
||||
nvme_mpath_clear_current_path(ns);
|
||||
if (ns->ctrl->ana_log_buf) {
|
||||
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
||||
queue_work(nvme_wq, &ns->ctrl->ana_work);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Reset the controller for any non-ANA error as we don't know
|
||||
* what caused the error.
|
||||
*/
|
||||
nvme_reset_ctrl(ns->ctrl);
|
||||
break;
|
||||
}
|
||||
|
||||
bool nvme_req_needs_failover(struct request *req, blk_status_t error)
|
||||
{
|
||||
if (!(req->cmd_flags & REQ_NVME_MPATH))
|
||||
return false;
|
||||
return blk_path_error(error);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
@@ -75,25 +101,51 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
|
||||
static const char *nvme_ana_state_names[] = {
|
||||
[0] = "invalid state",
|
||||
[NVME_ANA_OPTIMIZED] = "optimized",
|
||||
[NVME_ANA_NONOPTIMIZED] = "non-optimized",
|
||||
[NVME_ANA_INACCESSIBLE] = "inaccessible",
|
||||
[NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
|
||||
[NVME_ANA_CHANGE] = "change",
|
||||
};
|
||||
|
||||
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
|
||||
{
|
||||
struct nvme_ns *ns;
|
||||
struct nvme_ns *ns, *fallback = NULL;
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
if (ns->ctrl->state == NVME_CTRL_LIVE) {
|
||||
if (ns->ctrl->state != NVME_CTRL_LIVE ||
|
||||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
||||
continue;
|
||||
switch (ns->ana_state) {
|
||||
case NVME_ANA_OPTIMIZED:
|
||||
rcu_assign_pointer(head->current_path, ns);
|
||||
return ns;
|
||||
case NVME_ANA_NONOPTIMIZED:
|
||||
fallback = ns;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (fallback)
|
||||
rcu_assign_pointer(head->current_path, fallback);
|
||||
return fallback;
|
||||
}
|
||||
|
||||
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
|
||||
{
|
||||
return ns->ctrl->state == NVME_CTRL_LIVE &&
|
||||
ns->ana_state == NVME_ANA_OPTIMIZED;
|
||||
}
|
||||
|
||||
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
|
||||
{
|
||||
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
|
||||
|
||||
if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE))
|
||||
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
|
||||
ns = __nvme_find_path(head);
|
||||
return ns;
|
||||
}
|
||||
@@ -142,7 +194,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
|
||||
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
ns = srcu_dereference(head->current_path, &head->srcu);
|
||||
if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE))
|
||||
if (likely(ns && nvme_path_is_optimized(ns)))
|
||||
found = ns->queue->poll_fn(q, qc);
|
||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||
return found;
|
||||
@@ -176,6 +228,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
struct request_queue *q;
|
||||
bool vwc = false;
|
||||
|
||||
mutex_init(&head->lock);
|
||||
bio_list_init(&head->requeue_list);
|
||||
spin_lock_init(&head->requeue_lock);
|
||||
INIT_WORK(&head->requeue_work, nvme_requeue_work);
|
||||
@@ -220,29 +273,232 @@ out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void nvme_mpath_add_disk(struct nvme_ns_head *head)
|
||||
static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||
{
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
|
||||
lockdep_assert_held(&ns->head->lock);
|
||||
|
||||
if (!head->disk)
|
||||
return;
|
||||
|
||||
mutex_lock(&head->subsys->lock);
|
||||
if (!(head->disk->flags & GENHD_FL_UP)) {
|
||||
device_add_disk(&head->subsys->dev, head->disk);
|
||||
if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
|
||||
&nvme_ns_id_attr_group))
|
||||
pr_warn("%s: failed to create sysfs group for identification\n",
|
||||
head->disk->disk_name);
|
||||
dev_warn(&head->subsys->dev,
|
||||
"failed to create id group.\n");
|
||||
}
|
||||
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
|
||||
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
|
||||
int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
|
||||
void *))
|
||||
{
|
||||
void *base = ctrl->ana_log_buf;
|
||||
size_t offset = sizeof(struct nvme_ana_rsp_hdr);
|
||||
int error, i;
|
||||
|
||||
lockdep_assert_held(&ctrl->ana_lock);
|
||||
|
||||
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
|
||||
struct nvme_ana_group_desc *desc = base + offset;
|
||||
u32 nr_nsids = le32_to_cpu(desc->nnsids);
|
||||
size_t nsid_buf_size = nr_nsids * sizeof(__le32);
|
||||
|
||||
if (WARN_ON_ONCE(desc->grpid == 0))
|
||||
return -EINVAL;
|
||||
if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
|
||||
return -EINVAL;
|
||||
if (WARN_ON_ONCE(desc->state == 0))
|
||||
return -EINVAL;
|
||||
if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
|
||||
return -EINVAL;
|
||||
|
||||
offset += sizeof(*desc);
|
||||
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
|
||||
return -EINVAL;
|
||||
|
||||
error = cb(ctrl, desc, data);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
offset += nsid_buf_size;
|
||||
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool nvme_state_is_live(enum nvme_ana_state state)
|
||||
{
|
||||
return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
|
||||
}
|
||||
|
||||
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
||||
struct nvme_ns *ns)
|
||||
{
|
||||
enum nvme_ana_state old;
|
||||
|
||||
mutex_lock(&ns->head->lock);
|
||||
old = ns->ana_state;
|
||||
ns->ana_grpid = le32_to_cpu(desc->grpid);
|
||||
ns->ana_state = desc->state;
|
||||
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
||||
|
||||
if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
|
||||
nvme_mpath_set_live(ns);
|
||||
mutex_unlock(&ns->head->lock);
|
||||
}
|
||||
|
||||
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ana_group_desc *desc, void *data)
|
||||
{
|
||||
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
|
||||
unsigned *nr_change_groups = data;
|
||||
struct nvme_ns *ns;
|
||||
|
||||
dev_info(ctrl->device, "ANA group %d: %s.\n",
|
||||
le32_to_cpu(desc->grpid),
|
||||
nvme_ana_state_names[desc->state]);
|
||||
|
||||
if (desc->state == NVME_ANA_CHANGE)
|
||||
(*nr_change_groups)++;
|
||||
|
||||
if (!nr_nsids)
|
||||
return 0;
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
if (ns->head->ns_id != le32_to_cpu(desc->nsids[n]))
|
||||
continue;
|
||||
nvme_update_ns_ana_state(desc, ns);
|
||||
if (++n == nr_nsids)
|
||||
break;
|
||||
}
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
WARN_ON_ONCE(n < nr_nsids);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
|
||||
{
|
||||
u32 nr_change_groups = 0;
|
||||
int error;
|
||||
|
||||
mutex_lock(&ctrl->ana_lock);
|
||||
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
|
||||
groups_only ? NVME_ANA_LOG_RGO : 0,
|
||||
ctrl->ana_log_buf, ctrl->ana_log_size, 0);
|
||||
if (error) {
|
||||
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
error = nvme_parse_ana_log(ctrl, &nr_change_groups,
|
||||
nvme_update_ana_state);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* In theory we should have an ANATT timer per group as they might enter
|
||||
* the change state at different times. But that is a lot of overhead
|
||||
* just to protect against a target that keeps entering new changes
|
||||
* states while never finishing previous ones. But we'll still
|
||||
* eventually time out once all groups are in change state, so this
|
||||
* isn't a big deal.
|
||||
*
|
||||
* We also double the ANATT value to provide some slack for transports
|
||||
* or AEN processing overhead.
|
||||
*/
|
||||
if (nr_change_groups)
|
||||
mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
|
||||
else
|
||||
del_timer_sync(&ctrl->anatt_timer);
|
||||
out_unlock:
|
||||
mutex_unlock(&ctrl->ana_lock);
|
||||
return error;
|
||||
}
|
||||
|
||||
static void nvme_ana_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
|
||||
|
||||
nvme_read_ana_log(ctrl, false);
|
||||
}
|
||||
|
||||
static void nvme_anatt_timeout(struct timer_list *t)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
|
||||
|
||||
dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
|
||||
nvme_reset_ctrl(ctrl);
|
||||
}
|
||||
|
||||
void nvme_mpath_stop(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
if (!nvme_ctrl_use_ana(ctrl))
|
||||
return;
|
||||
del_timer_sync(&ctrl->anatt_timer);
|
||||
cancel_work_sync(&ctrl->ana_work);
|
||||
}
|
||||
|
||||
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
|
||||
}
|
||||
DEVICE_ATTR_RO(ana_grpid);
|
||||
|
||||
static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
|
||||
return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
|
||||
}
|
||||
DEVICE_ATTR_RO(ana_state);
|
||||
|
||||
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ana_group_desc *desc, void *data)
|
||||
{
|
||||
struct nvme_ns *ns = data;
|
||||
|
||||
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
|
||||
nvme_update_ns_ana_state(desc, ns);
|
||||
return -ENXIO; /* just break out of the loop */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
{
|
||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||
mutex_lock(&ns->ctrl->ana_lock);
|
||||
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
||||
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
|
||||
mutex_unlock(&ns->ctrl->ana_lock);
|
||||
} else {
|
||||
mutex_lock(&ns->head->lock);
|
||||
ns->ana_state = NVME_ANA_OPTIMIZED;
|
||||
nvme_mpath_set_live(ns);
|
||||
mutex_unlock(&ns->head->lock);
|
||||
}
|
||||
mutex_unlock(&head->subsys->lock);
|
||||
}
|
||||
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
{
|
||||
if (!head->disk)
|
||||
return;
|
||||
sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
|
||||
&nvme_ns_id_attr_group);
|
||||
del_gendisk(head->disk);
|
||||
if (head->disk->flags & GENHD_FL_UP) {
|
||||
sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
|
||||
&nvme_ns_id_attr_group);
|
||||
del_gendisk(head->disk);
|
||||
}
|
||||
blk_set_queue_dying(head->disk->queue);
|
||||
/* make sure all pending bios are cleaned up */
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
@@ -250,3 +506,52 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
blk_cleanup_queue(head->disk->queue);
|
||||
put_disk(head->disk);
|
||||
}
|
||||
|
||||
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!nvme_ctrl_use_ana(ctrl))
|
||||
return 0;
|
||||
|
||||
ctrl->anacap = id->anacap;
|
||||
ctrl->anatt = id->anatt;
|
||||
ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
|
||||
ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
|
||||
|
||||
mutex_init(&ctrl->ana_lock);
|
||||
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
|
||||
ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
|
||||
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
|
||||
if (!(ctrl->anacap & (1 << 6)))
|
||||
ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
|
||||
|
||||
if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
|
||||
dev_err(ctrl->device,
|
||||
"ANA log page size (%zd) larger than MDTS (%d).\n",
|
||||
ctrl->ana_log_size,
|
||||
ctrl->max_hw_sectors << SECTOR_SHIFT);
|
||||
dev_err(ctrl->device, "disabling ANA support.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
|
||||
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
|
||||
if (!ctrl->ana_log_buf)
|
||||
goto out;
|
||||
|
||||
error = nvme_read_ana_log(ctrl, true);
|
||||
if (error)
|
||||
goto out_free_ana_log_buf;
|
||||
return 0;
|
||||
out_free_ana_log_buf:
|
||||
kfree(ctrl->ana_log_buf);
|
||||
out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
kfree(ctrl->ana_log_buf);
|
||||
}
|
||||
|
||||
|
@@ -102,6 +102,7 @@ struct nvme_request {
|
||||
u8 retries;
|
||||
u8 flags;
|
||||
u16 status;
|
||||
struct nvme_ctrl *ctrl;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -119,6 +120,13 @@ static inline struct nvme_request *nvme_req(struct request *req)
|
||||
return blk_mq_rq_to_pdu(req);
|
||||
}
|
||||
|
||||
static inline u16 nvme_req_qid(struct request *req)
|
||||
{
|
||||
if (!req->rq_disk)
|
||||
return 0;
|
||||
return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
|
||||
}
|
||||
|
||||
/* The below value is the specific amount of delay needed before checking
|
||||
* readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
|
||||
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
|
||||
@@ -175,6 +183,7 @@ struct nvme_ctrl {
|
||||
u16 oacs;
|
||||
u16 nssa;
|
||||
u16 nr_streams;
|
||||
u32 max_namespaces;
|
||||
atomic_t abort_limit;
|
||||
u8 vwc;
|
||||
u32 vs;
|
||||
@@ -197,6 +206,19 @@ struct nvme_ctrl {
|
||||
struct work_struct fw_act_work;
|
||||
unsigned long events;
|
||||
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
/* asymmetric namespace access: */
|
||||
u8 anacap;
|
||||
u8 anatt;
|
||||
u32 anagrpmax;
|
||||
u32 nanagrpid;
|
||||
struct mutex ana_lock;
|
||||
struct nvme_ana_rsp_hdr *ana_log_buf;
|
||||
size_t ana_log_size;
|
||||
struct timer_list anatt_timer;
|
||||
struct work_struct ana_work;
|
||||
#endif
|
||||
|
||||
/* Power saving configuration */
|
||||
u64 ps_max_latency_us;
|
||||
bool apst_enabled;
|
||||
@@ -261,6 +283,7 @@ struct nvme_ns_head {
|
||||
struct bio_list requeue_list;
|
||||
spinlock_t requeue_lock;
|
||||
struct work_struct requeue_work;
|
||||
struct mutex lock;
|
||||
#endif
|
||||
struct list_head list;
|
||||
struct srcu_struct srcu;
|
||||
@@ -287,6 +310,10 @@ struct nvme_ns {
|
||||
struct nvme_ctrl *ctrl;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *disk;
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
enum nvme_ana_state ana_state;
|
||||
u32 ana_grpid;
|
||||
#endif
|
||||
struct list_head siblings;
|
||||
struct nvm_dev *ndev;
|
||||
struct kref kref;
|
||||
@@ -299,8 +326,9 @@ struct nvme_ns {
|
||||
bool ext;
|
||||
u8 pi_type;
|
||||
unsigned long flags;
|
||||
#define NVME_NS_REMOVING 0
|
||||
#define NVME_NS_DEAD 1
|
||||
#define NVME_NS_REMOVING 0
|
||||
#define NVME_NS_DEAD 1
|
||||
#define NVME_NS_ANA_PENDING 2
|
||||
u16 noiob;
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
@@ -356,14 +384,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
|
||||
return (sector >> (ns->lba_shift - 9));
|
||||
}
|
||||
|
||||
static inline void nvme_cleanup_cmd(struct request *req)
|
||||
{
|
||||
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
|
||||
kfree(page_address(req->special_vec.bv_page) +
|
||||
req->special_vec.bv_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nvme_end_request(struct request *req, __le16 status,
|
||||
union nvme_result result)
|
||||
{
|
||||
@@ -420,6 +440,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
|
||||
#define NVME_QID_ANY -1
|
||||
struct request *nvme_alloc_request(struct request_queue *q,
|
||||
struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
|
||||
void nvme_cleanup_cmd(struct request *req);
|
||||
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
|
||||
struct nvme_command *cmd);
|
||||
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
@@ -435,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
|
||||
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
|
||||
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
|
||||
|
||||
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 log_page, void *log, size_t size, u64 offset);
|
||||
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
|
||||
void *log, size_t size, u64 offset);
|
||||
|
||||
extern const struct attribute_group nvme_ns_id_attr_group;
|
||||
extern const struct block_device_operations nvme_ns_head_ops;
|
||||
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
|
||||
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
||||
struct nvme_ctrl *ctrl, int *flags);
|
||||
void nvme_failover_req(struct request *req);
|
||||
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||
void nvme_mpath_add_disk(struct nvme_ns_head *head);
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
|
||||
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
|
||||
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
|
||||
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
|
||||
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
|
||||
|
||||
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
||||
{
|
||||
@@ -468,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
}
|
||||
|
||||
extern struct device_attribute dev_attr_ana_grpid;
|
||||
extern struct device_attribute dev_attr_ana_state;
|
||||
|
||||
#else
|
||||
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Without the multipath code enabled, multiple controller per subsystems are
|
||||
* visible as devices and thus we cannot use the subsystem instance.
|
||||
@@ -482,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
||||
static inline void nvme_failover_req(struct request *req)
|
||||
{
|
||||
}
|
||||
static inline bool nvme_req_needs_failover(struct request *req,
|
||||
blk_status_t error)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
}
|
||||
@@ -495,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
|
||||
static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
|
||||
struct nvme_id_ns *id)
|
||||
{
|
||||
}
|
||||
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
@@ -507,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
||||
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
|
||||
{
|
||||
}
|
||||
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
|
||||
struct nvme_id_ctrl *id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
}
|
||||
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_NVME_MULTIPATH */
|
||||
|
||||
#ifdef CONFIG_NVM
|
||||
|
@@ -418,6 +418,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
|
||||
|
||||
BUG_ON(!nvmeq);
|
||||
iod->nvmeq = nvmeq;
|
||||
|
||||
nvme_req(req)->ctrl = &dev->ctrl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -535,73 +537,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
|
||||
mempool_free(iod->sg, dev->iod_mempool);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
|
||||
{
|
||||
if (be32_to_cpu(pi->ref_tag) == v)
|
||||
pi->ref_tag = cpu_to_be32(p);
|
||||
}
|
||||
|
||||
static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
|
||||
{
|
||||
if (be32_to_cpu(pi->ref_tag) == p)
|
||||
pi->ref_tag = cpu_to_be32(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* nvme_dif_remap - remaps ref tags to bip seed and physical lba
|
||||
*
|
||||
* The virtual start sector is the one that was originally submitted by the
|
||||
* block layer. Due to partitioning, MD/DM cloning, etc. the actual physical
|
||||
* start sector may be different. Remap protection information to match the
|
||||
* physical LBA on writes, and back to the original seed on reads.
|
||||
*
|
||||
* Type 0 and 3 do not have a ref tag, so no remapping required.
|
||||
*/
|
||||
static void nvme_dif_remap(struct request *req,
|
||||
void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
|
||||
{
|
||||
struct nvme_ns *ns = req->rq_disk->private_data;
|
||||
struct bio_integrity_payload *bip;
|
||||
struct t10_pi_tuple *pi;
|
||||
void *p, *pmap;
|
||||
u32 i, nlb, ts, phys, virt;
|
||||
|
||||
if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
|
||||
return;
|
||||
|
||||
bip = bio_integrity(req->bio);
|
||||
if (!bip)
|
||||
return;
|
||||
|
||||
pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
|
||||
|
||||
p = pmap;
|
||||
virt = bip_get_seed(bip);
|
||||
phys = nvme_block_nr(ns, blk_rq_pos(req));
|
||||
nlb = (blk_rq_bytes(req) >> ns->lba_shift);
|
||||
ts = ns->disk->queue->integrity.tuple_size;
|
||||
|
||||
for (i = 0; i < nlb; i++, virt++, phys++) {
|
||||
pi = (struct t10_pi_tuple *)p;
|
||||
dif_swap(phys, virt, pi);
|
||||
p += ts;
|
||||
}
|
||||
kunmap_atomic(pmap);
|
||||
}
|
||||
#else /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
static void nvme_dif_remap(struct request *req,
|
||||
void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
|
||||
{
|
||||
}
|
||||
static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
|
||||
{
|
||||
}
|
||||
static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
|
||||
{
|
||||
int i;
|
||||
@@ -827,9 +762,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
||||
if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
|
||||
goto out_unmap;
|
||||
|
||||
if (req_op(req) == REQ_OP_WRITE)
|
||||
nvme_dif_remap(req, nvme_dif_prep);
|
||||
|
||||
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
|
||||
goto out_unmap;
|
||||
}
|
||||
@@ -852,11 +784,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
|
||||
|
||||
if (iod->nents) {
|
||||
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
|
||||
if (blk_integrity_rq(req)) {
|
||||
if (req_op(req) == REQ_OP_READ)
|
||||
nvme_dif_remap(req, nvme_dif_complete);
|
||||
if (blk_integrity_rq(req))
|
||||
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
|
||||
}
|
||||
}
|
||||
|
||||
nvme_cleanup_cmd(req);
|
||||
|
@@ -40,13 +40,14 @@
|
||||
|
||||
#define NVME_RDMA_MAX_SEGMENTS 256
|
||||
|
||||
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
|
||||
#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
|
||||
|
||||
struct nvme_rdma_device {
|
||||
struct ib_device *dev;
|
||||
struct ib_pd *pd;
|
||||
struct kref ref;
|
||||
struct list_head entry;
|
||||
unsigned int num_inline_segments;
|
||||
};
|
||||
|
||||
struct nvme_rdma_qe {
|
||||
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
|
||||
struct sockaddr_storage src_addr;
|
||||
|
||||
struct nvme_ctrl ctrl;
|
||||
bool use_inline_data;
|
||||
};
|
||||
|
||||
static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
|
||||
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
|
||||
/* +1 for drain */
|
||||
init_attr.cap.max_recv_wr = queue->queue_size + 1;
|
||||
init_attr.cap.max_recv_sge = 1;
|
||||
init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
|
||||
init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
|
||||
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
|
||||
init_attr.qp_type = IB_QPT_RC;
|
||||
init_attr.send_cq = queue->ib_cq;
|
||||
@@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
|
||||
struct ib_device *ibdev = dev->dev;
|
||||
int ret;
|
||||
|
||||
nvme_req(rq)->ctrl = &ctrl->ctrl;
|
||||
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
|
||||
DMA_TO_DEVICE);
|
||||
if (ret)
|
||||
@@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
|
||||
goto out_free_pd;
|
||||
}
|
||||
|
||||
ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
|
||||
ndev->dev->attrs.max_sge - 1);
|
||||
list_add(&ndev->entry, &device_list);
|
||||
out_unlock:
|
||||
mutex_unlock(&device_list_mutex);
|
||||
@@ -868,6 +873,31 @@ out_free_io_queues:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
|
||||
&ctrl->ctrl);
|
||||
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, remove);
|
||||
}
|
||||
|
||||
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
|
||||
bool remove)
|
||||
{
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
nvme_rdma_stop_io_queues(ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
|
||||
&ctrl->ctrl);
|
||||
if (remove)
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
nvme_rdma_destroy_io_queues(ctrl, remove);
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
@@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
|
||||
struct nvme_rdma_ctrl, reconnect_work);
|
||||
int ret = -EINVAL;
|
||||
bool changed;
|
||||
int ret;
|
||||
|
||||
++ctrl->ctrl.nr_reconnects;
|
||||
|
||||
ret = nvme_rdma_configure_admin_queue(ctrl, false);
|
||||
ret = nvme_rdma_configure_admin_queue(ctrl, new);
|
||||
if (ret)
|
||||
goto requeue;
|
||||
return ret;
|
||||
|
||||
if (ctrl->ctrl.icdoff) {
|
||||
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
|
||||
goto destroy_admin;
|
||||
}
|
||||
|
||||
if (!(ctrl->ctrl.sgls & (1 << 2))) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"Mandatory keyed sgls are not supported!\n");
|
||||
goto destroy_admin;
|
||||
}
|
||||
|
||||
if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"queue_size %zu > ctrl sqsize %u, clamping down\n",
|
||||
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
|
||||
}
|
||||
|
||||
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"sqsize %u > ctrl maxcmd %u, clamping down\n",
|
||||
ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
|
||||
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
||||
}
|
||||
|
||||
if (ctrl->ctrl.sgls & (1 << 20))
|
||||
ctrl->use_inline_data = true;
|
||||
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
ret = nvme_rdma_configure_io_queues(ctrl, false);
|
||||
ret = nvme_rdma_configure_io_queues(ctrl, new);
|
||||
if (ret)
|
||||
goto destroy_admin;
|
||||
}
|
||||
@@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
if (!changed) {
|
||||
/* state change failure is ok if we're in DELETING state */
|
||||
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
|
||||
return;
|
||||
ret = -EINVAL;
|
||||
goto destroy_io;
|
||||
}
|
||||
|
||||
nvme_start_ctrl(&ctrl->ctrl);
|
||||
return 0;
|
||||
|
||||
destroy_io:
|
||||
if (ctrl->ctrl.queue_count > 1)
|
||||
nvme_rdma_destroy_io_queues(ctrl, new);
|
||||
destroy_admin:
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
|
||||
struct nvme_rdma_ctrl, reconnect_work);
|
||||
|
||||
++ctrl->ctrl.nr_reconnects;
|
||||
|
||||
if (nvme_rdma_setup_ctrl(ctrl, false))
|
||||
goto requeue;
|
||||
|
||||
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
|
||||
ctrl->ctrl.nr_reconnects);
|
||||
@@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
|
||||
return;
|
||||
|
||||
destroy_admin:
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, false);
|
||||
requeue:
|
||||
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
|
||||
ctrl->ctrl.nr_reconnects);
|
||||
@@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
||||
struct nvme_rdma_ctrl, err_work);
|
||||
|
||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
nvme_rdma_stop_io_queues(ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
nvme_rdma_destroy_io_queues(ctrl, false);
|
||||
}
|
||||
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, false);
|
||||
|
||||
/*
|
||||
* queues are not a live anymore, so restart the queues to fail fast
|
||||
* new IO
|
||||
*/
|
||||
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_teardown_io_queues(ctrl, false);
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
nvme_rdma_teardown_admin_queue(ctrl, false);
|
||||
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
|
||||
/* state change failure is ok if we're in DELETING state */
|
||||
@@ -1090,19 +1143,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
|
||||
}
|
||||
|
||||
static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
|
||||
struct nvme_rdma_request *req, struct nvme_command *c)
|
||||
struct nvme_rdma_request *req, struct nvme_command *c,
|
||||
int count)
|
||||
{
|
||||
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
|
||||
struct scatterlist *sgl = req->sg_table.sgl;
|
||||
struct ib_sge *sge = &req->sge[1];
|
||||
u32 len = 0;
|
||||
int i;
|
||||
|
||||
req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
|
||||
req->sge[1].length = sg_dma_len(req->sg_table.sgl);
|
||||
req->sge[1].lkey = queue->device->pd->local_dma_lkey;
|
||||
for (i = 0; i < count; i++, sgl++, sge++) {
|
||||
sge->addr = sg_dma_address(sgl);
|
||||
sge->length = sg_dma_len(sgl);
|
||||
sge->lkey = queue->device->pd->local_dma_lkey;
|
||||
len += sge->length;
|
||||
}
|
||||
|
||||
sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
|
||||
sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
|
||||
sg->length = cpu_to_le32(len);
|
||||
sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
|
||||
|
||||
req->num_sge++;
|
||||
req->num_sge += count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1195,15 +1256,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
|
||||
goto out_free_table;
|
||||
}
|
||||
|
||||
if (count == 1) {
|
||||
if (count <= dev->num_inline_segments) {
|
||||
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
|
||||
queue->ctrl->use_inline_data &&
|
||||
blk_rq_payload_bytes(rq) <=
|
||||
nvme_rdma_inline_data_size(queue)) {
|
||||
ret = nvme_rdma_map_sg_inline(queue, req, c);
|
||||
ret = nvme_rdma_map_sg_inline(queue, req, c, count);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
|
||||
if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
|
||||
ret = nvme_rdma_map_sg_single(queue, req, c);
|
||||
goto out;
|
||||
}
|
||||
@@ -1574,6 +1636,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
||||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
nvme_rdma_destroy_queue_ib(queue);
|
||||
/* fall through */
|
||||
case RDMA_CM_EVENT_ADDR_ERROR:
|
||||
dev_dbg(queue->ctrl->ctrl.device,
|
||||
"CM error event %d\n", ev->event);
|
||||
@@ -1736,25 +1799,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
|
||||
|
||||
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
|
||||
{
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
nvme_rdma_stop_io_queues(ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
nvme_rdma_destroy_io_queues(ctrl, shutdown);
|
||||
}
|
||||
|
||||
nvme_rdma_teardown_io_queues(ctrl, shutdown);
|
||||
if (shutdown)
|
||||
nvme_shutdown_ctrl(&ctrl->ctrl);
|
||||
else
|
||||
nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
|
||||
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, shutdown);
|
||||
nvme_rdma_teardown_admin_queue(ctrl, shutdown);
|
||||
}
|
||||
|
||||
static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
|
||||
@@ -1766,8 +1816,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl =
|
||||
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
|
||||
int ret;
|
||||
bool changed;
|
||||
|
||||
nvme_stop_ctrl(&ctrl->ctrl);
|
||||
nvme_rdma_shutdown_ctrl(ctrl, false);
|
||||
@@ -1778,25 +1826,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
|
||||
return;
|
||||
}
|
||||
|
||||
ret = nvme_rdma_configure_admin_queue(ctrl, false);
|
||||
if (ret)
|
||||
if (nvme_rdma_setup_ctrl(ctrl, false))
|
||||
goto out_fail;
|
||||
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
ret = nvme_rdma_configure_io_queues(ctrl, false);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
||||
if (!changed) {
|
||||
/* state change failure is ok if we're in DELETING state */
|
||||
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
|
||||
return;
|
||||
}
|
||||
|
||||
nvme_start_ctrl(&ctrl->ctrl);
|
||||
|
||||
return;
|
||||
|
||||
out_fail:
|
||||
@@ -1959,49 +1991,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
||||
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
|
||||
WARN_ON_ONCE(!changed);
|
||||
|
||||
ret = nvme_rdma_configure_admin_queue(ctrl, true);
|
||||
ret = nvme_rdma_setup_ctrl(ctrl, true);
|
||||
if (ret)
|
||||
goto out_uninit_ctrl;
|
||||
|
||||
/* sanity check icdoff */
|
||||
if (ctrl->ctrl.icdoff) {
|
||||
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
|
||||
ret = -EINVAL;
|
||||
goto out_remove_admin_queue;
|
||||
}
|
||||
|
||||
/* sanity check keyed sgls */
|
||||
if (!(ctrl->ctrl.sgls & (1 << 2))) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"Mandatory keyed sgls are not supported!\n");
|
||||
ret = -EINVAL;
|
||||
goto out_remove_admin_queue;
|
||||
}
|
||||
|
||||
/* only warn if argument is too large here, will clamp later */
|
||||
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"queue_size %zu > ctrl sqsize %u, clamping down\n",
|
||||
opts->queue_size, ctrl->ctrl.sqsize + 1);
|
||||
}
|
||||
|
||||
/* warn if maxcmd is lower than sqsize+1 */
|
||||
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"sqsize %u > ctrl maxcmd %u, clamping down\n",
|
||||
ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
|
||||
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
||||
}
|
||||
|
||||
if (opts->nr_io_queues) {
|
||||
ret = nvme_rdma_configure_io_queues(ctrl, true);
|
||||
if (ret)
|
||||
goto out_remove_admin_queue;
|
||||
}
|
||||
|
||||
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
||||
WARN_ON_ONCE(!changed);
|
||||
|
||||
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
|
||||
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
|
||||
|
||||
@@ -2011,13 +2004,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
||||
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
|
||||
mutex_unlock(&nvme_rdma_ctrl_mutex);
|
||||
|
||||
nvme_start_ctrl(&ctrl->ctrl);
|
||||
|
||||
return &ctrl->ctrl;
|
||||
|
||||
out_remove_admin_queue:
|
||||
nvme_rdma_stop_queue(&ctrl->queues[0]);
|
||||
nvme_rdma_destroy_admin_queue(ctrl, true);
|
||||
out_uninit_ctrl:
|
||||
nvme_uninit_ctrl(&ctrl->ctrl);
|
||||
nvme_put_ctrl(&ctrl->ctrl);
|
||||
|
@@ -128,3 +128,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
|
||||
return nvme_trace_common(p, cdw10);
|
||||
}
|
||||
}
|
||||
|
||||
const char *nvme_trace_disk_name(struct trace_seq *p, char *name)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
|
||||
if (*name)
|
||||
trace_seq_printf(p, "disk=%s, ", name);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -50,13 +50,8 @@
|
||||
nvme_admin_opcode_name(nvme_admin_security_recv), \
|
||||
nvme_admin_opcode_name(nvme_admin_sanitize_nvm))
|
||||
|
||||
const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
|
||||
u8 *cdw10);
|
||||
#define __parse_nvme_admin_cmd(opcode, cdw10) \
|
||||
nvme_trace_parse_admin_cmd(p, opcode, cdw10)
|
||||
|
||||
#define nvme_opcode_name(opcode) { opcode, #opcode }
|
||||
#define show_opcode_name(val) \
|
||||
#define show_nvm_opcode_name(val) \
|
||||
__print_symbolic(val, \
|
||||
nvme_opcode_name(nvme_cmd_flush), \
|
||||
nvme_opcode_name(nvme_cmd_write), \
|
||||
@@ -70,85 +65,92 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
|
||||
nvme_opcode_name(nvme_cmd_resv_acquire), \
|
||||
nvme_opcode_name(nvme_cmd_resv_release))
|
||||
|
||||
#define show_opcode_name(qid, opcode) \
|
||||
(qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode))
|
||||
|
||||
const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
|
||||
u8 *cdw10);
|
||||
const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
|
||||
u8 *cdw10);
|
||||
#define __parse_nvme_cmd(opcode, cdw10) \
|
||||
nvme_trace_parse_nvm_cmd(p, opcode, cdw10)
|
||||
u8 *cdw10);
|
||||
|
||||
TRACE_EVENT(nvme_setup_admin_cmd,
|
||||
TP_PROTO(struct nvme_command *cmd),
|
||||
TP_ARGS(cmd),
|
||||
#define parse_nvme_cmd(qid, opcode, cdw10) \
|
||||
(qid ? \
|
||||
nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \
|
||||
nvme_trace_parse_admin_cmd(p, opcode, cdw10))
|
||||
|
||||
const char *nvme_trace_disk_name(struct trace_seq *p, char *name);
|
||||
#define __print_disk_name(name) \
|
||||
nvme_trace_disk_name(p, name)
|
||||
|
||||
#ifndef TRACE_HEADER_MULTI_READ
|
||||
static inline void __assign_disk_name(char *name, struct gendisk *disk)
|
||||
{
|
||||
if (disk)
|
||||
memcpy(name, disk->disk_name, DISK_NAME_LEN);
|
||||
else
|
||||
memset(name, 0, DISK_NAME_LEN);
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACE_EVENT(nvme_setup_cmd,
|
||||
TP_PROTO(struct request *req, struct nvme_command *cmd),
|
||||
TP_ARGS(req, cmd),
|
||||
TP_STRUCT__entry(
|
||||
__field(u8, opcode)
|
||||
__field(u8, flags)
|
||||
__field(u16, cid)
|
||||
__field(u64, metadata)
|
||||
__array(u8, cdw10, 24)
|
||||
__array(char, disk, DISK_NAME_LEN)
|
||||
__field(int, ctrl_id)
|
||||
__field(int, qid)
|
||||
__field(u8, opcode)
|
||||
__field(u8, flags)
|
||||
__field(u16, cid)
|
||||
__field(u32, nsid)
|
||||
__field(u64, metadata)
|
||||
__array(u8, cdw10, 24)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->opcode = cmd->common.opcode;
|
||||
__entry->flags = cmd->common.flags;
|
||||
__entry->cid = cmd->common.command_id;
|
||||
__entry->metadata = le64_to_cpu(cmd->common.metadata);
|
||||
memcpy(__entry->cdw10, cmd->common.cdw10,
|
||||
sizeof(__entry->cdw10));
|
||||
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
||||
__entry->qid = nvme_req_qid(req);
|
||||
__entry->opcode = cmd->common.opcode;
|
||||
__entry->flags = cmd->common.flags;
|
||||
__entry->cid = cmd->common.command_id;
|
||||
__entry->nsid = le32_to_cpu(cmd->common.nsid);
|
||||
__entry->metadata = le64_to_cpu(cmd->common.metadata);
|
||||
__assign_disk_name(__entry->disk, req->rq_disk);
|
||||
memcpy(__entry->cdw10, cmd->common.cdw10,
|
||||
sizeof(__entry->cdw10));
|
||||
),
|
||||
TP_printk(" cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
|
||||
__entry->cid, __entry->flags, __entry->metadata,
|
||||
show_admin_opcode_name(__entry->opcode),
|
||||
__parse_nvme_admin_cmd(__entry->opcode, __entry->cdw10))
|
||||
);
|
||||
|
||||
|
||||
TRACE_EVENT(nvme_setup_nvm_cmd,
|
||||
TP_PROTO(int qid, struct nvme_command *cmd),
|
||||
TP_ARGS(qid, cmd),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, qid)
|
||||
__field(u8, opcode)
|
||||
__field(u8, flags)
|
||||
__field(u16, cid)
|
||||
__field(u32, nsid)
|
||||
__field(u64, metadata)
|
||||
__array(u8, cdw10, 24)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->qid = qid;
|
||||
__entry->opcode = cmd->common.opcode;
|
||||
__entry->flags = cmd->common.flags;
|
||||
__entry->cid = cmd->common.command_id;
|
||||
__entry->nsid = le32_to_cpu(cmd->common.nsid);
|
||||
__entry->metadata = le64_to_cpu(cmd->common.metadata);
|
||||
memcpy(__entry->cdw10, cmd->common.cdw10,
|
||||
sizeof(__entry->cdw10));
|
||||
),
|
||||
TP_printk("qid=%d, nsid=%u, cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
|
||||
__entry->qid, __entry->nsid, __entry->cid,
|
||||
TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
|
||||
__entry->ctrl_id, __print_disk_name(__entry->disk),
|
||||
__entry->qid, __entry->cid, __entry->nsid,
|
||||
__entry->flags, __entry->metadata,
|
||||
show_opcode_name(__entry->opcode),
|
||||
__parse_nvme_cmd(__entry->opcode, __entry->cdw10))
|
||||
show_opcode_name(__entry->qid, __entry->opcode),
|
||||
parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10))
|
||||
);
|
||||
|
||||
TRACE_EVENT(nvme_complete_rq,
|
||||
TP_PROTO(struct request *req),
|
||||
TP_ARGS(req),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, qid)
|
||||
__field(int, cid)
|
||||
__field(u64, result)
|
||||
__field(u8, retries)
|
||||
__field(u8, flags)
|
||||
__field(u16, status)
|
||||
__array(char, disk, DISK_NAME_LEN)
|
||||
__field(int, ctrl_id)
|
||||
__field(int, qid)
|
||||
__field(int, cid)
|
||||
__field(u64, result)
|
||||
__field(u8, retries)
|
||||
__field(u8, flags)
|
||||
__field(u16, status)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->qid = req->q->id;
|
||||
__entry->cid = req->tag;
|
||||
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
__entry->retries = nvme_req(req)->retries;
|
||||
__entry->flags = nvme_req(req)->flags;
|
||||
__entry->status = nvme_req(req)->status;
|
||||
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
|
||||
__entry->qid = nvme_req_qid(req);
|
||||
__entry->cid = req->tag;
|
||||
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
__entry->retries = nvme_req(req)->retries;
|
||||
__entry->flags = nvme_req(req)->flags;
|
||||
__entry->status = nvme_req(req)->status;
|
||||
__assign_disk_name(__entry->disk, req->rq_disk);
|
||||
),
|
||||
TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
|
||||
TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
|
||||
__entry->ctrl_id, __print_disk_name(__entry->disk),
|
||||
__entry->qid, __entry->cid, __entry->result,
|
||||
__entry->retries, __entry->flags, __entry->status)
|
||||
|
||||
|
Reference in New Issue
Block a user