NVMe: Move error handling to failed reset handler

This moves failed queue handling out of the namespace removal path and
into the reset failure path, fixing a hanging condition if the controller
fails or link down during del_gendisk. Previously the driver had to see
the controller as degraded prior to calling del_gendisk to setup the
queues to fail. But, if the controller happened to fail after this,
there was no task to end outstanding requests.

On failure, all namespace states are set to dead. This has capacity
revalidate to 0, and ends all new requests with error status.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Keith Busch
2016-02-24 09:15:56 -07:00
committed by Jens Axboe
parent f58944e265
commit 69d9a99c25
3 changed files with 50 additions and 18 deletions

View File

@@ -690,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
ret = BLK_MQ_RQ_QUEUE_BUSY;
if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
ret = BLK_MQ_RQ_QUEUE_BUSY;
else
ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
goto out;
}
@@ -1261,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = {
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
/*
* If the controller was reset during removal, it's possible
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1901,6 +1910,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
kref_get(&dev->ctrl.kref);
nvme_dev_disable(dev, false);
if (!schedule_work(&dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
@@ -1973,6 +1983,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);
nvme_kill_queues(&dev->ctrl);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
nvme_put_ctrl(&dev->ctrl);