Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:
 "First pull request for this merge window, there will also be a
  followup request with some stragglers.

  This pull request contains:

   - Fix for a thundering heard issue in the wbt block code (Anchal
     Agarwal)

   - A few NVMe pull requests:
      * Improved tracepoints (Keith)
      * Larger inline data support for RDMA (Steve Wise)
      * RDMA setup/teardown fixes (Sagi)
      * Effects log suppor for NVMe target (Chaitanya Kulkarni)
      * Buffered IO suppor for NVMe target (Chaitanya Kulkarni)
      * TP4004 (ANA) support (Christoph)
      * Various NVMe fixes

   - Block io-latency controller support. Much needed support for
     properly containing block devices. (Josef)

   - Series improving how we handle sense information on the stack
     (Kees)

   - Lightnvm fixes and updates/improvements (Mathias/Javier et al)

   - Zoned device support for null_blk (Matias)

   - AIX partition fixes (Mauricio Faria de Oliveira)

   - DIF checksum code made generic (Max Gurtovoy)

   - Add support for discard in iostats (Michael Callahan / Tejun)

   - Set of updates for BFQ (Paolo)

   - Removal of async write support for bsg (Christoph)

   - Bio page dirtying and clone fixups (Christoph)

   - Set of bcache fix/changes (via Coly)

   - Series improving blk-mq queue setup/teardown speed (Ming)

   - Series improving merging performance on blk-mq (Ming)

   - Lots of other fixes and cleanups from a slew of folks"

* tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits)
  blkcg: Make blkg_root_lookup() work for queues in bypass mode
  bcache: fix error setting writeback_rate through sysfs interface
  null_blk: add lock drop/acquire annotation
  Blk-throttle: reduce tail io latency when iops limit is enforced
  block: paride: pd: mark expected switch fall-throughs
  block: Ensure that a request queue is dissociated from the cgroup controller
  block: Introduce blk_exit_queue()
  blkcg: Introduce blkg_root_lookup()
  block: Remove two superfluous #include directives
  blk-mq: count the hctx as active before allocating tag
  block: bvec_nr_vecs() returns value for wrong slab
  bcache: trivial - remove tailing backslash in macro BTREE_FLAG
  bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section
  bcache: set max writeback rate when I/O request is idle
  bcache: add code comments for bset.c
  bcache: fix mistaken comments in request.c
  bcache: fix mistaken code comments in bcache.h
  bcache: add a comment in super.c
  bcache: avoid unncessary cache prefetch bch_btree_node_get()
  bcache: display rate debug parameters to 0 when writeback is not running
  ...
このコミットが含まれているのは:
Linus Torvalds
2018-08-14 10:23:25 -07:00
コミット 73ba2fb33c
172個のファイルの変更6035行の追加2665行の削除

ファイルの表示

@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
VM_BUG_ON_PAGE(!PageCompound(page), page);
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
vmf->address, page_to_nid(page));
if (unlikely(!pages[i] ||
mem_cgroup_try_charge(pages[i], vma->vm_mm,
mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
GFP_KERNEL, &memcg, false))) {
if (pages[i])
put_page(pages[i]);
@@ -1312,7 +1312,7 @@ alloc:
goto out;
}
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
huge_gfp, &memcg, true))) {
put_page(new_page);
split_huge_pmd(vma, vmf->pmd, vmf->address);

ファイルの表示

@@ -5600,6 +5600,19 @@ out:
return ret;
}
int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound)
{
struct mem_cgroup *memcg;
int ret;
ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
memcg = *memcgp;
mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
return ret;
}
/**
* mem_cgroup_commit_charge - commit a page charge
* @page: page to charge

ファイルの表示

@@ -2524,7 +2524,7 @@ static int wp_page_copy(struct vm_fault *vmf)
cow_user_page(new_page, old_page, vmf->address, vma);
}
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
goto oom_free_new;
__SetPageUptodate(new_page);
@@ -3024,8 +3024,8 @@ int do_swap_page(struct vm_fault *vmf)
goto out_page;
}
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
&memcg, false)) {
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
&memcg, false)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -3186,7 +3186,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
if (!page)
goto oom;
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
false))
goto oom_free_page;
/*
@@ -3682,7 +3683,7 @@ static int do_cow_fault(struct vm_fault *vmf)
if (!vmf->cow_page)
return VM_FAULT_OOM;
if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
&vmf->memcg, false)) {
put_page(vmf->cow_page);
return VM_FAULT_OOM;

ファイルの表示

@@ -338,7 +338,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
ret = -ENOMEM;
goto out;
}
bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
bio_associate_blkcg_from_page(bio, page);
count_swpout_vm_event(page);
set_page_writeback(page);
unlock_page(page);

ファイルの表示

@@ -19,6 +19,7 @@
#include <linux/syscalls.h>
#include <linux/file.h>
#include <linux/mm_inline.h>
#include <linux/blk-cgroup.h>
#include "internal.h"
@@ -385,6 +386,7 @@ ondemand_readahead(struct address_space *mapping,
{
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
pgoff_t prev_offset;
/*
@@ -474,10 +476,17 @@ readit:
* Will this read hit the readahead marker made by itself?
* If so, trigger the readahead marker hit now, and merge
* the resulted next readahead window into the current one.
* Take care of maximum IO pages as above.
*/
if (offset == ra->start && ra->size == ra->async_size) {
ra->async_size = get_next_ra_size(ra, max_pages);
ra->size += ra->async_size;
add_pages = get_next_ra_size(ra, max_pages);
if (ra->size + add_pages <= max_pages) {
ra->async_size = add_pages;
ra->size += add_pages;
} else {
ra->size = max_pages;
ra->async_size = max_pages >> 1;
}
}
return ra_submit(ra, mapping, filp);
@@ -505,6 +514,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
if (!ra->ra_pages)
return;
if (blk_cgroup_congested())
return;
/* be dumb */
if (filp && (filp->f_mode & FMODE_RANDOM)) {
force_page_cache_readahead(mapping, filp, offset, req_size);
@@ -555,6 +567,9 @@ page_cache_async_readahead(struct address_space *mapping,
if (inode_read_congested(mapping->host))
return;
if (blk_cgroup_congested())
return;
/* do read-ahead */
ondemand_readahead(mapping, ra, filp, true, offset, req_size);
}

ファイルの表示

@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
false);
error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
&memcg, false);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1713,7 +1713,7 @@ repeat:
goto failed;
}
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
false);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
@@ -1819,7 +1819,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode,
if (sgp == SGP_WRITE)
__SetPageReferenced(page);
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
PageTransHuge(page));
if (error)
goto unacct;
@@ -2292,7 +2292,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
__SetPageSwapBacked(page);
__SetPageUptodate(page);
ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
if (ret)
goto out_release;

ファイルの表示

@@ -3745,6 +3745,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
gfp_t gfp_mask)
{
struct swap_info_struct *si, *next;
if (!(gfp_mask & __GFP_IO) || !memcg)
return;
if (!blk_cgroup_congested())
return;
/*
* We've already scheduled a throttle, avoid taking the global swap
* lock.
*/
if (current->throttle_queue)
return;
spin_lock(&swap_avail_lock);
plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
avail_lists[node]) {
if (si->bdev) {
blkcg_schedule_throttle(bdev_get_queue(si->bdev),
true);
break;
}
}
spin_unlock(&swap_avail_lock);
}
#endif
static int __init swapfile_init(void)
{
int nid;