Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
This commit is contained in:
@@ -100,7 +100,6 @@ static DEFINE_IDA(mmc_rpmb_ida);
|
||||
* There is one mmc_blk_data per slot.
|
||||
*/
|
||||
struct mmc_blk_data {
|
||||
spinlock_t lock;
|
||||
struct device *parent;
|
||||
struct gendisk *disk;
|
||||
struct mmc_queue queue;
|
||||
@@ -1488,7 +1487,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
|
||||
blk_mq_end_request(req, BLK_STS_OK);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
|
||||
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
|
||||
|
||||
@@ -1496,7 +1495,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
|
||||
|
||||
mmc_cqe_check_busy(mq);
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
if (!mq->cqe_busy)
|
||||
blk_mq_run_hw_queues(q, true);
|
||||
@@ -1993,17 +1992,16 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
|
||||
|
||||
static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
|
||||
{
|
||||
struct request_queue *q = req->q;
|
||||
unsigned long flags;
|
||||
bool put_card;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
|
||||
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
|
||||
|
||||
put_card = (mmc_tot_in_flight(mq) == 0);
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
if (put_card)
|
||||
mmc_put_card(mq->card, &mq->ctx);
|
||||
@@ -2099,11 +2097,11 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
|
||||
* request does not need to wait (although it does need to
|
||||
* complete complete_req first).
|
||||
*/
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
mq->complete_req = req;
|
||||
mq->rw_wait = false;
|
||||
waiting = mq->waiting;
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
/*
|
||||
* If 'waiting' then the waiting task will complete this
|
||||
@@ -2122,10 +2120,10 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
|
||||
/* Take the recovery path for errors or urgent background operations */
|
||||
if (mmc_blk_rq_error(&mqrq->brq) ||
|
||||
mmc_blk_urgent_bkops_needed(mq, mqrq)) {
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
mq->recovery_needed = true;
|
||||
mq->recovery_req = req;
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
wake_up(&mq->wait);
|
||||
schedule_work(&mq->recovery_work);
|
||||
return;
|
||||
@@ -2141,7 +2139,6 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
|
||||
|
||||
static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
|
||||
{
|
||||
struct request_queue *q = mq->queue;
|
||||
unsigned long flags;
|
||||
bool done;
|
||||
|
||||
@@ -2149,7 +2146,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
|
||||
* Wait while there is another request in progress, but not if recovery
|
||||
* is needed. Also indicate whether there is a request waiting to start.
|
||||
*/
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
if (mq->recovery_needed) {
|
||||
*err = -EBUSY;
|
||||
done = true;
|
||||
@@ -2157,7 +2154,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
|
||||
done = !mq->rw_wait;
|
||||
}
|
||||
mq->waiting = !done;
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
return done;
|
||||
}
|
||||
@@ -2334,12 +2331,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
goto err_kfree;
|
||||
}
|
||||
|
||||
spin_lock_init(&md->lock);
|
||||
INIT_LIST_HEAD(&md->part);
|
||||
INIT_LIST_HEAD(&md->rpmbs);
|
||||
md->usage = 1;
|
||||
|
||||
ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
|
||||
ret = mmc_init_queue(&md->queue, card);
|
||||
if (ret)
|
||||
goto err_putdisk;
|
||||
|
||||
|
@@ -89,9 +89,9 @@ void mmc_cqe_recovery_notifier(struct mmc_request *mrq)
|
||||
struct mmc_queue *mq = q->queuedata;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
__mmc_cqe_recovery_notifier(mq);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
|
||||
@@ -128,14 +128,14 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
spin_lock_irqsave(&mq->lock, flags);
|
||||
|
||||
if (mq->recovery_needed || !mq->use_cqe)
|
||||
ret = BLK_EH_RESET_TIMER;
|
||||
else
|
||||
ret = mmc_cqe_timed_out(req);
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -157,9 +157,9 @@ static void mmc_mq_recovery_handler(struct work_struct *work)
|
||||
|
||||
mq->in_recovery = false;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
spin_lock_irq(&mq->lock);
|
||||
mq->recovery_needed = false;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
spin_unlock_irq(&mq->lock);
|
||||
|
||||
mmc_put_card(mq->card, &mq->ctx);
|
||||
|
||||
@@ -258,10 +258,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
issue_type = mmc_issue_type(mq, req);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
spin_lock_irq(&mq->lock);
|
||||
|
||||
if (mq->recovery_needed || mq->busy) {
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
spin_unlock_irq(&mq->lock);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
@@ -269,7 +269,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
case MMC_ISSUE_DCMD:
|
||||
if (mmc_cqe_dcmd_busy(mq)) {
|
||||
mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
spin_unlock_irq(&mq->lock);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
break;
|
||||
@@ -294,7 +294,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
get_card = (mmc_tot_in_flight(mq) == 1);
|
||||
cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1);
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
spin_unlock_irq(&mq->lock);
|
||||
|
||||
if (!(req->rq_flags & RQF_DONTPREP)) {
|
||||
req_to_mmc_queue_req(req)->retries = 0;
|
||||
@@ -328,12 +328,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
if (issued != MMC_REQ_STARTED) {
|
||||
bool put_card = false;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
spin_lock_irq(&mq->lock);
|
||||
mq->in_flight[issue_type] -= 1;
|
||||
if (mmc_tot_in_flight(mq) == 0)
|
||||
put_card = true;
|
||||
mq->busy = false;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
spin_unlock_irq(&mq->lock);
|
||||
if (put_card)
|
||||
mmc_put_card(card, &mq->ctx);
|
||||
} else {
|
||||
@@ -378,14 +378,37 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
init_waitqueue_head(&mq->wait);
|
||||
}
|
||||
|
||||
static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
|
||||
const struct blk_mq_ops *mq_ops, spinlock_t *lock)
|
||||
/* Set queue depth to get a reasonable value for q->nr_requests */
|
||||
#define MMC_QUEUE_DEPTH 64
|
||||
|
||||
/**
|
||||
* mmc_init_queue - initialise a queue structure.
|
||||
* @mq: mmc queue
|
||||
* @card: mmc card to attach this queue
|
||||
*
|
||||
* Initialise a MMC card request queue.
|
||||
*/
|
||||
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
int ret;
|
||||
|
||||
mq->card = card;
|
||||
mq->use_cqe = host->cqe_enabled;
|
||||
|
||||
spin_lock_init(&mq->lock);
|
||||
|
||||
memset(&mq->tag_set, 0, sizeof(mq->tag_set));
|
||||
mq->tag_set.ops = mq_ops;
|
||||
mq->tag_set.queue_depth = q_depth;
|
||||
mq->tag_set.ops = &mmc_mq_ops;
|
||||
/*
|
||||
* The queue depth for CQE must match the hardware because the request
|
||||
* tag is used to index the hardware queue.
|
||||
*/
|
||||
if (mq->use_cqe)
|
||||
mq->tag_set.queue_depth =
|
||||
min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
|
||||
else
|
||||
mq->tag_set.queue_depth = MMC_QUEUE_DEPTH;
|
||||
mq->tag_set.numa_node = NUMA_NO_NODE;
|
||||
mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
|
||||
BLK_MQ_F_BLOCKING;
|
||||
@@ -403,68 +426,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
|
||||
goto free_tag_set;
|
||||
}
|
||||
|
||||
mq->queue->queue_lock = lock;
|
||||
mq->queue->queuedata = mq;
|
||||
blk_queue_rq_timeout(mq->queue, 60 * HZ);
|
||||
|
||||
mmc_setup_queue(mq, card);
|
||||
return 0;
|
||||
|
||||
free_tag_set:
|
||||
blk_mq_free_tag_set(&mq->tag_set);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Set queue depth to get a reasonable value for q->nr_requests */
|
||||
#define MMC_QUEUE_DEPTH 64
|
||||
|
||||
static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card,
|
||||
spinlock_t *lock)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
int q_depth;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* The queue depth for CQE must match the hardware because the request
|
||||
* tag is used to index the hardware queue.
|
||||
*/
|
||||
if (mq->use_cqe)
|
||||
q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
|
||||
else
|
||||
q_depth = MMC_QUEUE_DEPTH;
|
||||
|
||||
ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
blk_queue_rq_timeout(mq->queue, 60 * HZ);
|
||||
|
||||
mmc_setup_queue(mq, card);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* mmc_init_queue - initialise a queue structure.
|
||||
* @mq: mmc queue
|
||||
* @card: mmc card to attach this queue
|
||||
* @lock: queue lock
|
||||
* @subname: partition subname
|
||||
*
|
||||
* Initialise a MMC card request queue.
|
||||
*/
|
||||
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
|
||||
spinlock_t *lock, const char *subname)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
|
||||
mq->card = card;
|
||||
|
||||
mq->use_cqe = host->cqe_enabled;
|
||||
|
||||
return mmc_mq_init(mq, card, lock);
|
||||
}
|
||||
|
||||
void mmc_queue_suspend(struct mmc_queue *mq)
|
||||
{
|
||||
blk_mq_quiesce_queue(mq->queue);
|
||||
|
@@ -77,6 +77,7 @@ struct mmc_queue {
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct mmc_blk_data *blkdata;
|
||||
struct request_queue *queue;
|
||||
spinlock_t lock;
|
||||
int in_flight[MMC_ISSUE_MAX];
|
||||
unsigned int cqe_busy;
|
||||
#define MMC_CQE_DCMD_BUSY BIT(0)
|
||||
@@ -95,8 +96,7 @@ struct mmc_queue {
|
||||
struct work_struct complete_work;
|
||||
};
|
||||
|
||||
extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
|
||||
const char *);
|
||||
extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *);
|
||||
extern void mmc_cleanup_queue(struct mmc_queue *);
|
||||
extern void mmc_queue_suspend(struct mmc_queue *);
|
||||
extern void mmc_queue_resume(struct mmc_queue *);
|
||||
|
Reference in New Issue
Block a user