Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block

Pull core block layer updates from Jens Axboe:
 "This is the main pull request for block storage for 4.15-rc1.

  Nothing out of the ordinary in here, and no API changes or anything
  like that. Just various new features for drivers, core changes, etc.
  In particular, this pull request contains:

   - A patch series from Bart, closing the whole on blk/scsi-mq queue
     quescing.

   - A series from Christoph, building towards hidden gendisks (for
     multipath) and ability to move bio chains around.

   - NVMe
        - Support for native multipath for NVMe (Christoph).
        - Userspace notifications for AENs (Keith).
        - Command side-effects support (Keith).
        - SGL support (Chaitanya Kulkarni)
        - FC fixes and improvements (James Smart)
        - Lots of fixes and tweaks (Various)

   - bcache
        - New maintainer (Michael Lyle)
        - Writeback control improvements (Michael)
        - Various fixes (Coly, Elena, Eric, Liang, et al)

   - lightnvm updates, mostly centered around the pblk interface
     (Javier, Hans, and Rakesh).

   - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph)

   - Writeback series that fix the much discussed hundreds of millions
     of sync-all units. This goes all the way, as discussed previously
     (me).

   - Fix for missing wakeup on writeback timer adjustments (Yafang
     Shao).

   - Fix laptop mode on blk-mq (me).

   - {mq,name} tupple lookup for IO schedulers, allowing us to have
     alias names. This means you can use 'deadline' on both !mq and on
     mq (where it's called mq-deadline). (me).

   - blktrace race fix, oopsing on sg load (me).

   - blk-mq optimizations (me).

   - Obscure waitqueue race fix for kyber (Omar).

   - NBD fixes (Josef).

   - Disable writeback throttling by default on bfq, like we do on cfq
     (Luca Miccio).

   - Series from Ming that enable us to treat flush requests on blk-mq
     like any other request. This is a really nice cleanup.

   - Series from Ming that improves merging on blk-mq with schedulers,
     getting us closer to flipping the switch on scsi-mq again.

   - BFQ updates (Paolo).

   - blk-mq atomic flags memory ordering fixes (Peter Z).

   - Loop cgroup support (Shaohua).

   - Lots of minor fixes from lots of different folks, both for core and
     driver code"

* 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits)
  nvme: fix visibility of "uuid" ns attribute
  blk-mq: fixup some comment typos and lengths
  ide: ide-atapi: fix compile error with defining macro DEBUG
  blk-mq: improve tag waiting setup for non-shared tags
  brd: remove unused brd_mutex
  blk-mq: only run the hardware queue if IO is pending
  block: avoid null pointer dereference on null disk
  fs: guard_bio_eod() needs to consider partitions
  xtensa/simdisk: fix compile error
  nvme: expose subsys attribute to sysfs
  nvme: create 'slaves' and 'holders' entries for hidden controllers
  block: create 'slaves' and 'holders' entries for hidden gendisks
  nvme: also expose the namespace identification sysfs files for mpath nodes
  nvme: implement multipath access to nvme subsystems
  nvme: track shared namespaces
  nvme: introduce a nvme_ns_ids structure
  nvme: track subsystems
  block, nvme: Introduce blk_mq_req_flags_t
  block, scsi: Make SCSI quiesce and resume work reliably
  block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag
  ...
This commit is contained in:
Linus Torvalds
2017-11-14 15:32:19 -08:00
131 changed files with 5485 additions and 3104 deletions

View File

@@ -252,27 +252,6 @@ out:
return ret;
}
/*
* Kick the writeback threads then try to free up some ZONE_NORMAL memory.
*/
static void free_more_memory(void)
{
struct zoneref *z;
int nid;
wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
yield();
for_each_online_node(nid) {
z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
gfp_zone(GFP_NOFS), NULL);
if (z->zone)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS, NULL);
}
}
/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
@@ -861,16 +840,19 @@ int remove_inode_buffers(struct inode *inode)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
int retry)
bool retry)
{
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS;
long offset;
try_again:
if (retry)
gfp |= __GFP_NOFAIL;
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
bh = alloc_buffer_head(GFP_NOFS);
bh = alloc_buffer_head(gfp);
if (!bh)
goto no_grow;
@@ -896,23 +878,7 @@ no_grow:
} while (head);
}
/*
* Return failure for non-async IO requests. Async IO requests
* are not allowed to fail, so we have to wait until buffer heads
* become available. But we don't want tasks sleeping with
* partially complete buffers, so all were released above.
*/
if (!retry)
return NULL;
/* We're _really_ low on memory. Now we just
* wait for old buffer heads to become free due to
* finishing IO. Since this is an async request and
* the reserve list is empty, we're sure there are
* async buffer heads in use.
*/
free_more_memory();
goto try_again;
return NULL;
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
@@ -1001,8 +967,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
gfp_mask |= __GFP_NOFAIL;
page = find_or_create_page(inode->i_mapping, index, gfp_mask);
if (!page)
return ret;
BUG_ON(!PageLocked(page));
@@ -1021,9 +985,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
/*
* Allocate some buffers for this page
*/
bh = alloc_page_buffers(page, size, 0);
if (!bh)
goto failed;
bh = alloc_page_buffers(page, size, true);
/*
* Link the page to the buffers and initialise them. Take the
@@ -1103,8 +1065,6 @@ __getblk_slow(struct block_device *bdev, sector_t block,
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
if (ret == 0)
free_more_memory();
}
}
@@ -1575,7 +1535,7 @@ void create_empty_buffers(struct page *page,
{
struct buffer_head *bh, *head, *tail;
head = alloc_page_buffers(page, blocksize, 1);
head = alloc_page_buffers(page, blocksize, true);
bh = head;
do {
bh->b_state |= b_state;
@@ -2639,7 +2599,7 @@ int nobh_write_begin(struct address_space *mapping,
* Be careful: the buffer linked list is a NULL terminated one, rather
* than the circular one we're used to.
*/
head = alloc_page_buffers(page, blocksize, 0);
head = alloc_page_buffers(page, blocksize, false);
if (!head) {
ret = -ENOMEM;
goto out_release;
@@ -3056,8 +3016,16 @@ void guard_bio_eod(int op, struct bio *bio)
sector_t maxsector;
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned truncated_bytes;
struct hd_struct *part;
rcu_read_lock();
part = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (part)
maxsector = part_nr_sects_read(part);
else
maxsector = get_capacity(bio->bi_disk);
rcu_read_unlock();
maxsector = get_capacity(bio->bi_disk);
if (!maxsector)
return;