Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:
 "This is the main pull request for block/storage for 4.21.

  Larger than usual, it was a busy round with lots of goodies queued up.
  Most notable is the removal of the old IO stack, which has been a long
  time coming. No new features for a while, everything coming in this
  week has all been fixes for things that were previously merged.

  This contains:

   - Use atomic counters instead of semaphores for mtip32xx (Arnd)

   - Cleanup of the mtip32xx request setup (Christoph)

   - Fix for circular locking dependency in loop (Jan, Tetsuo)

   - bcache (Coly, Guoju, Shenghui)
      * Optimizations for writeback caching
      * Various fixes and improvements

   - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
      * host and target support for NVMe over TCP
      * Error log page support
      * Support for separate read/write/poll queues
      * Much improved polling
      * discard OOM fallback
      * Tracepoint improvements

   - lightnvm (Hans, Hua, Igor, Matias, Javier)
      * Igor added packed metadata to pblk. Now drives without metadata
        per LBA can be used as well.
      * Fix from Geert on uninitialized value on chunk metadata reads.
      * Fixes from Hans and Javier to pblk recovery and write path.
      * Fix from Hua Su to fix a race condition in the pblk recovery
        code.
      * Scan optimization added to pblk recovery from Zhoujie.
      * Small geometry cleanup from me.

   - Conversion of the last few drivers that used the legacy path to
     blk-mq (me)

   - Removal of legacy IO path in SCSI (me, Christoph)

   - Removal of legacy IO stack and schedulers (me)

   - Support for much better polling, now without interrupts at all.
     blk-mq adds support for multiple queue maps, which enables us to
     have a map per type. This in turn enables nvme to have separate
     completion queues for polling, which can then be interrupt-less.
     Also means we're ready for async polled IO, which is hopefully
     coming in the next release.

   - Killing of (now) unused block exports (Christoph)

   - Unification of the blk-rq-qos and blk-wbt wait handling (Josef)

   - Support for zoned testing with null_blk (Masato)

   - sx8 conversion to per-host tag sets (Christoph)

   - IO priority improvements (Damien)

   - mq-deadline zoned fix (Damien)

   - Ref count blkcg series (Dennis)

   - Lots of blk-mq improvements and speedups (me)

   - sbitmap scalability improvements (me)

   - Make core inflight IO accounting per-cpu (Mikulas)

   - Export timeout setting in sysfs (Weiping)

   - Cleanup the direct issue path (Jianchao)

   - Export blk-wbt internals in block debugfs for easier debugging
     (Ming)

   - Lots of other fixes and improvements"

* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
  kyber: use sbitmap add_wait_queue/list_del wait helpers
  sbitmap: add helpers for add/del wait queue handling
  block: save irq state in blkg_lookup_create()
  dm: don't reuse bio for flushes
  nvme-pci: trace SQ status on completions
  nvme-rdma: implement polling queue map
  nvme-fabrics: allow user to pass in nr_poll_queues
  nvme-fabrics: allow nvmf_connect_io_queue to poll
  nvme-core: optionally poll sync commands
  block: make request_to_qc_t public
  nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
  nvme-tcp: fix endianess annotations
  nvmet-tcp: fix endianess annotations
  nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
  nvme-pci: only set nr_maps to 2 if poll queues are supported
  nvmet: use a macro for default error location
  nvmet: fix comparison of a u16 with -1
  blk-mq: enable IO poll if .nr_queues of type poll > 0
  blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
  blk-mq: skip zero-queue maps in blk_mq_map_swqueue
  ...
This commit is contained in:
Linus Torvalds
2018-12-28 13:19:59 -08:00
246 changed files with 10469 additions and 14370 deletions

View File

@@ -100,7 +100,6 @@ static DEFINE_IDA(mmc_rpmb_ida);
* There is one mmc_blk_data per slot.
*/
struct mmc_blk_data {
spinlock_t lock;
struct device *parent;
struct gendisk *disk;
struct mmc_queue queue;
@@ -1488,7 +1487,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
blk_mq_end_request(req, BLK_STS_OK);
}
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
@@ -1496,7 +1495,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
mmc_cqe_check_busy(mq);
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
if (!mq->cqe_busy)
blk_mq_run_hw_queues(q, true);
@@ -1993,17 +1992,16 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
{
struct request_queue *q = req->q;
unsigned long flags;
bool put_card;
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
mq->in_flight[mmc_issue_type(mq, req)] -= 1;
put_card = (mmc_tot_in_flight(mq) == 0);
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
if (put_card)
mmc_put_card(mq->card, &mq->ctx);
@@ -2099,11 +2097,11 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
* request does not need to wait (although it does need to
* complete complete_req first).
*/
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
mq->complete_req = req;
mq->rw_wait = false;
waiting = mq->waiting;
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
/*
* If 'waiting' then the waiting task will complete this
@@ -2122,10 +2120,10 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
/* Take the recovery path for errors or urgent background operations */
if (mmc_blk_rq_error(&mqrq->brq) ||
mmc_blk_urgent_bkops_needed(mq, mqrq)) {
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
mq->recovery_needed = true;
mq->recovery_req = req;
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
wake_up(&mq->wait);
schedule_work(&mq->recovery_work);
return;
@@ -2141,7 +2139,6 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
{
struct request_queue *q = mq->queue;
unsigned long flags;
bool done;
@@ -2149,7 +2146,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
* Wait while there is another request in progress, but not if recovery
* is needed. Also indicate whether there is a request waiting to start.
*/
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
if (mq->recovery_needed) {
*err = -EBUSY;
done = true;
@@ -2157,7 +2154,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
done = !mq->rw_wait;
}
mq->waiting = !done;
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
return done;
}
@@ -2334,12 +2331,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
goto err_kfree;
}
spin_lock_init(&md->lock);
INIT_LIST_HEAD(&md->part);
INIT_LIST_HEAD(&md->rpmbs);
md->usage = 1;
ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
ret = mmc_init_queue(&md->queue, card);
if (ret)
goto err_putdisk;

View File

@@ -89,9 +89,9 @@ void mmc_cqe_recovery_notifier(struct mmc_request *mrq)
struct mmc_queue *mq = q->queuedata;
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
__mmc_cqe_recovery_notifier(mq);
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
}
static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
@@ -128,14 +128,14 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
unsigned long flags;
int ret;
spin_lock_irqsave(q->queue_lock, flags);
spin_lock_irqsave(&mq->lock, flags);
if (mq->recovery_needed || !mq->use_cqe)
ret = BLK_EH_RESET_TIMER;
else
ret = mmc_cqe_timed_out(req);
spin_unlock_irqrestore(q->queue_lock, flags);
spin_unlock_irqrestore(&mq->lock, flags);
return ret;
}
@@ -157,9 +157,9 @@ static void mmc_mq_recovery_handler(struct work_struct *work)
mq->in_recovery = false;
spin_lock_irq(q->queue_lock);
spin_lock_irq(&mq->lock);
mq->recovery_needed = false;
spin_unlock_irq(q->queue_lock);
spin_unlock_irq(&mq->lock);
mmc_put_card(mq->card, &mq->ctx);
@@ -258,10 +258,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
issue_type = mmc_issue_type(mq, req);
spin_lock_irq(q->queue_lock);
spin_lock_irq(&mq->lock);
if (mq->recovery_needed || mq->busy) {
spin_unlock_irq(q->queue_lock);
spin_unlock_irq(&mq->lock);
return BLK_STS_RESOURCE;
}
@@ -269,7 +269,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
case MMC_ISSUE_DCMD:
if (mmc_cqe_dcmd_busy(mq)) {
mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
spin_unlock_irq(q->queue_lock);
spin_unlock_irq(&mq->lock);
return BLK_STS_RESOURCE;
}
break;
@@ -294,7 +294,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
get_card = (mmc_tot_in_flight(mq) == 1);
cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1);
spin_unlock_irq(q->queue_lock);
spin_unlock_irq(&mq->lock);
if (!(req->rq_flags & RQF_DONTPREP)) {
req_to_mmc_queue_req(req)->retries = 0;
@@ -328,12 +328,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (issued != MMC_REQ_STARTED) {
bool put_card = false;
spin_lock_irq(q->queue_lock);
spin_lock_irq(&mq->lock);
mq->in_flight[issue_type] -= 1;
if (mmc_tot_in_flight(mq) == 0)
put_card = true;
mq->busy = false;
spin_unlock_irq(q->queue_lock);
spin_unlock_irq(&mq->lock);
if (put_card)
mmc_put_card(card, &mq->ctx);
} else {
@@ -378,14 +378,37 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
init_waitqueue_head(&mq->wait);
}
static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
const struct blk_mq_ops *mq_ops, spinlock_t *lock)
/* Set queue depth to get a reasonable value for q->nr_requests */
#define MMC_QUEUE_DEPTH 64
/**
* mmc_init_queue - initialise a queue structure.
* @mq: mmc queue
* @card: mmc card to attach this queue
*
* Initialise a MMC card request queue.
*/
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
{
struct mmc_host *host = card->host;
int ret;
mq->card = card;
mq->use_cqe = host->cqe_enabled;
spin_lock_init(&mq->lock);
memset(&mq->tag_set, 0, sizeof(mq->tag_set));
mq->tag_set.ops = mq_ops;
mq->tag_set.queue_depth = q_depth;
mq->tag_set.ops = &mmc_mq_ops;
/*
* The queue depth for CQE must match the hardware because the request
* tag is used to index the hardware queue.
*/
if (mq->use_cqe)
mq->tag_set.queue_depth =
min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
else
mq->tag_set.queue_depth = MMC_QUEUE_DEPTH;
mq->tag_set.numa_node = NUMA_NO_NODE;
mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
BLK_MQ_F_BLOCKING;
@@ -403,68 +426,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
goto free_tag_set;
}
mq->queue->queue_lock = lock;
mq->queue->queuedata = mq;
blk_queue_rq_timeout(mq->queue, 60 * HZ);
mmc_setup_queue(mq, card);
return 0;
free_tag_set:
blk_mq_free_tag_set(&mq->tag_set);
return ret;
}
/* Set queue depth to get a reasonable value for q->nr_requests */
#define MMC_QUEUE_DEPTH 64
static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card,
spinlock_t *lock)
{
struct mmc_host *host = card->host;
int q_depth;
int ret;
/*
* The queue depth for CQE must match the hardware because the request
* tag is used to index the hardware queue.
*/
if (mq->use_cqe)
q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
else
q_depth = MMC_QUEUE_DEPTH;
ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock);
if (ret)
return ret;
blk_queue_rq_timeout(mq->queue, 60 * HZ);
mmc_setup_queue(mq, card);
return 0;
}
/**
* mmc_init_queue - initialise a queue structure.
* @mq: mmc queue
* @card: mmc card to attach this queue
* @lock: queue lock
* @subname: partition subname
*
* Initialise a MMC card request queue.
*/
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
spinlock_t *lock, const char *subname)
{
struct mmc_host *host = card->host;
mq->card = card;
mq->use_cqe = host->cqe_enabled;
return mmc_mq_init(mq, card, lock);
}
void mmc_queue_suspend(struct mmc_queue *mq)
{
blk_mq_quiesce_queue(mq->queue);

View File

@@ -77,6 +77,7 @@ struct mmc_queue {
struct blk_mq_tag_set tag_set;
struct mmc_blk_data *blkdata;
struct request_queue *queue;
spinlock_t lock;
int in_flight[MMC_ISSUE_MAX];
unsigned int cqe_busy;
#define MMC_CQE_DCMD_BUSY BIT(0)
@@ -95,8 +96,7 @@ struct mmc_queue {
struct work_struct complete_work;
};
extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
const char *);
extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *);
extern void mmc_cleanup_queue(struct mmc_queue *);
extern void mmc_queue_suspend(struct mmc_queue *);
extern void mmc_queue_resume(struct mmc_queue *);