Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block

Pull core block layer updates from Jens Axboe:
 "This is the main pull request for block storage for 4.15-rc1.

  Nothing out of the ordinary in here, and no API changes or anything
  like that. Just various new features for drivers, core changes, etc.
  In particular, this pull request contains:

   - A patch series from Bart, closing the whole on blk/scsi-mq queue
     quescing.

   - A series from Christoph, building towards hidden gendisks (for
     multipath) and ability to move bio chains around.

   - NVMe
        - Support for native multipath for NVMe (Christoph).
        - Userspace notifications for AENs (Keith).
        - Command side-effects support (Keith).
        - SGL support (Chaitanya Kulkarni)
        - FC fixes and improvements (James Smart)
        - Lots of fixes and tweaks (Various)

   - bcache
        - New maintainer (Michael Lyle)
        - Writeback control improvements (Michael)
        - Various fixes (Coly, Elena, Eric, Liang, et al)

   - lightnvm updates, mostly centered around the pblk interface
     (Javier, Hans, and Rakesh).

   - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph)

   - Writeback series that fix the much discussed hundreds of millions
     of sync-all units. This goes all the way, as discussed previously
     (me).

   - Fix for missing wakeup on writeback timer adjustments (Yafang
     Shao).

   - Fix laptop mode on blk-mq (me).

   - {mq,name} tupple lookup for IO schedulers, allowing us to have
     alias names. This means you can use 'deadline' on both !mq and on
     mq (where it's called mq-deadline). (me).

   - blktrace race fix, oopsing on sg load (me).

   - blk-mq optimizations (me).

   - Obscure waitqueue race fix for kyber (Omar).

   - NBD fixes (Josef).

   - Disable writeback throttling by default on bfq, like we do on cfq
     (Luca Miccio).

   - Series from Ming that enable us to treat flush requests on blk-mq
     like any other request. This is a really nice cleanup.

   - Series from Ming that improves merging on blk-mq with schedulers,
     getting us closer to flipping the switch on scsi-mq again.

   - BFQ updates (Paolo).

   - blk-mq atomic flags memory ordering fixes (Peter Z).

   - Loop cgroup support (Shaohua).

   - Lots of minor fixes from lots of different folks, both for core and
     driver code"

* 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits)
  nvme: fix visibility of "uuid" ns attribute
  blk-mq: fixup some comment typos and lengths
  ide: ide-atapi: fix compile error with defining macro DEBUG
  blk-mq: improve tag waiting setup for non-shared tags
  brd: remove unused brd_mutex
  blk-mq: only run the hardware queue if IO is pending
  block: avoid null pointer dereference on null disk
  fs: guard_bio_eod() needs to consider partitions
  xtensa/simdisk: fix compile error
  nvme: expose subsys attribute to sysfs
  nvme: create 'slaves' and 'holders' entries for hidden controllers
  block: create 'slaves' and 'holders' entries for hidden gendisks
  nvme: also expose the namespace identification sysfs files for mpath nodes
  nvme: implement multipath access to nvme subsystems
  nvme: track shared namespaces
  nvme: introduce a nvme_ns_ids structure
  nvme: track subsystems
  block, nvme: Introduce blk_mq_req_flags_t
  block, scsi: Make SCSI quiesce and resume work reliably
  block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag
  ...
This commit is contained in:
Linus Torvalds
2017-11-14 15:32:19 -08:00
131 changed files with 5485 additions and 3104 deletions

View File

@@ -27,12 +27,12 @@ struct kmem_cache *bch_search_cache;
static void bch_data_insert_start(struct closure *);
static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
static unsigned cache_mode(struct cached_dev *dc)
{
return BDEV_CACHE_MODE(&dc->sb);
}
static bool verify(struct cached_dev *dc, struct bio *bio)
static bool verify(struct cached_dev *dc)
{
return dc->verify;
}
@@ -370,7 +370,7 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
{
struct cache_set *c = dc->disk.c;
unsigned mode = cache_mode(dc, bio);
unsigned mode = cache_mode(dc);
unsigned sectors, congested = bch_get_congested(c);
struct task_struct *task = current;
struct io *i;
@@ -385,6 +385,14 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
op_is_write(bio_op(bio))))
goto skip;
/*
* Flag for bypass if the IO is for read-ahead or background,
* unless the read-ahead request is for metadata (eg, for gfs2).
*/
if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
!(bio->bi_opf & REQ_META))
goto skip;
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
pr_debug("skipping unaligned io");
@@ -463,6 +471,7 @@ struct search {
unsigned recoverable:1;
unsigned write:1;
unsigned read_dirty_data:1;
unsigned cache_missed:1;
unsigned long start_time;
@@ -649,6 +658,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->orig_bio = bio;
s->cache_miss = NULL;
s->cache_missed = 0;
s->d = d;
s->recoverable = 1;
s->write = op_is_write(bio_op(bio));
@@ -698,8 +708,16 @@ static void cached_dev_read_error(struct closure *cl)
{
struct search *s = container_of(cl, struct search, cl);
struct bio *bio = &s->bio.bio;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
if (s->recoverable) {
/*
* If cache device is dirty (dc->has_dirty is non-zero), then
* recovery a failed read request from cached device may get a
* stale data back. So read failure recovery is only permitted
* when cache device is clean.
*/
if (s->recoverable &&
(dc && !atomic_read(&dc->has_dirty))) {
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
@@ -740,7 +758,7 @@ static void cached_dev_read_done(struct closure *cl)
s->cache_miss = NULL;
}
if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
if (verify(dc) && s->recoverable && !s->read_dirty_data)
bch_data_verify(dc, s->orig_bio);
bio_complete(s);
@@ -760,12 +778,12 @@ static void cached_dev_read_done_bh(struct closure *cl)
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s->iop.c, s->d,
!s->cache_miss, s->iop.bypass);
!s->cache_missed, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
if (s->iop.status)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
else if (s->iop.bio || verify(dc, &s->bio.bio))
else if (s->iop.bio || verify(dc))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
else
continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
@@ -779,6 +797,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
s->cache_missed = 1;
if (s->cache_miss || s->iop.bypass) {
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
@@ -892,7 +912,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bypass = true;
if (should_writeback(dc, s->orig_bio,
cache_mode(dc, bio),
cache_mode(dc),
s->iop.bypass)) {
s->iop.bypass = false;
s->iop.writeback = true;