Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
このコミットが含まれているのは:
@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
|
||||
|
||||
VM_BUG_ON_PAGE(!PageCompound(page), page);
|
||||
|
||||
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
|
||||
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
|
||||
put_page(page);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
|
||||
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
|
||||
vmf->address, page_to_nid(page));
|
||||
if (unlikely(!pages[i] ||
|
||||
mem_cgroup_try_charge(pages[i], vma->vm_mm,
|
||||
mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
|
||||
GFP_KERNEL, &memcg, false))) {
|
||||
if (pages[i])
|
||||
put_page(pages[i]);
|
||||
@@ -1312,7 +1312,7 @@ alloc:
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
|
||||
if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
|
||||
huge_gfp, &memcg, true))) {
|
||||
put_page(new_page);
|
||||
split_huge_pmd(vma, vmf->pmd, vmf->address);
|
||||
|
@@ -5600,6 +5600,19 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
|
||||
gfp_t gfp_mask, struct mem_cgroup **memcgp,
|
||||
bool compound)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
int ret;
|
||||
|
||||
ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
|
||||
memcg = *memcgp;
|
||||
mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_commit_charge - commit a page charge
|
||||
* @page: page to charge
|
||||
|
11
mm/memory.c
11
mm/memory.c
@@ -2524,7 +2524,7 @@ static int wp_page_copy(struct vm_fault *vmf)
|
||||
cow_user_page(new_page, old_page, vmf->address, vma);
|
||||
}
|
||||
|
||||
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
|
||||
if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
|
||||
goto oom_free_new;
|
||||
|
||||
__SetPageUptodate(new_page);
|
||||
@@ -3024,8 +3024,8 @@ int do_swap_page(struct vm_fault *vmf)
|
||||
goto out_page;
|
||||
}
|
||||
|
||||
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
|
||||
&memcg, false)) {
|
||||
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
|
||||
&memcg, false)) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_page;
|
||||
}
|
||||
@@ -3186,7 +3186,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
|
||||
if (!page)
|
||||
goto oom;
|
||||
|
||||
if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
|
||||
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
|
||||
false))
|
||||
goto oom_free_page;
|
||||
|
||||
/*
|
||||
@@ -3682,7 +3683,7 @@ static int do_cow_fault(struct vm_fault *vmf)
|
||||
if (!vmf->cow_page)
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
|
||||
if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
|
||||
&vmf->memcg, false)) {
|
||||
put_page(vmf->cow_page);
|
||||
return VM_FAULT_OOM;
|
||||
|
@@ -338,7 +338,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
|
||||
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
|
||||
bio_associate_blkcg_from_page(bio, page);
|
||||
count_swpout_vm_event(page);
|
||||
set_page_writeback(page);
|
||||
unlock_page(page);
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -385,6 +386,7 @@ ondemand_readahead(struct address_space *mapping,
|
||||
{
|
||||
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
|
||||
unsigned long max_pages = ra->ra_pages;
|
||||
unsigned long add_pages;
|
||||
pgoff_t prev_offset;
|
||||
|
||||
/*
|
||||
@@ -474,10 +476,17 @@ readit:
|
||||
* Will this read hit the readahead marker made by itself?
|
||||
* If so, trigger the readahead marker hit now, and merge
|
||||
* the resulted next readahead window into the current one.
|
||||
* Take care of maximum IO pages as above.
|
||||
*/
|
||||
if (offset == ra->start && ra->size == ra->async_size) {
|
||||
ra->async_size = get_next_ra_size(ra, max_pages);
|
||||
ra->size += ra->async_size;
|
||||
add_pages = get_next_ra_size(ra, max_pages);
|
||||
if (ra->size + add_pages <= max_pages) {
|
||||
ra->async_size = add_pages;
|
||||
ra->size += add_pages;
|
||||
} else {
|
||||
ra->size = max_pages;
|
||||
ra->async_size = max_pages >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
return ra_submit(ra, mapping, filp);
|
||||
@@ -505,6 +514,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
|
||||
if (!ra->ra_pages)
|
||||
return;
|
||||
|
||||
if (blk_cgroup_congested())
|
||||
return;
|
||||
|
||||
/* be dumb */
|
||||
if (filp && (filp->f_mode & FMODE_RANDOM)) {
|
||||
force_page_cache_readahead(mapping, filp, offset, req_size);
|
||||
@@ -555,6 +567,9 @@ page_cache_async_readahead(struct address_space *mapping,
|
||||
if (inode_read_congested(mapping->host))
|
||||
return;
|
||||
|
||||
if (blk_cgroup_congested())
|
||||
return;
|
||||
|
||||
/* do read-ahead */
|
||||
ondemand_readahead(mapping, ra, filp, true, offset, req_size);
|
||||
}
|
||||
|
10
mm/shmem.c
10
mm/shmem.c
@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
|
||||
* the shmem_swaplist_mutex which might hold up shmem_writepage().
|
||||
* Charged back to the user (not to caller) when swap account is used.
|
||||
*/
|
||||
error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
|
||||
false);
|
||||
error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
|
||||
&memcg, false);
|
||||
if (error)
|
||||
goto out;
|
||||
/* No radix_tree_preload: swap entry keeps a place for page in tree */
|
||||
@@ -1713,7 +1713,7 @@ repeat:
|
||||
goto failed;
|
||||
}
|
||||
|
||||
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
|
||||
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
|
||||
false);
|
||||
if (!error) {
|
||||
error = shmem_add_to_page_cache(page, mapping, index,
|
||||
@@ -1819,7 +1819,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode,
|
||||
if (sgp == SGP_WRITE)
|
||||
__SetPageReferenced(page);
|
||||
|
||||
error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
|
||||
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
|
||||
PageTransHuge(page));
|
||||
if (error)
|
||||
goto unacct;
|
||||
@@ -2292,7 +2292,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
||||
__SetPageSwapBacked(page);
|
||||
__SetPageUptodate(page);
|
||||
|
||||
ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
|
||||
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
|
||||
if (ret)
|
||||
goto out_release;
|
||||
|
||||
|
@@ -3745,6 +3745,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
|
||||
void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct swap_info_struct *si, *next;
|
||||
if (!(gfp_mask & __GFP_IO) || !memcg)
|
||||
return;
|
||||
|
||||
if (!blk_cgroup_congested())
|
||||
return;
|
||||
|
||||
/*
|
||||
* We've already scheduled a throttle, avoid taking the global swap
|
||||
* lock.
|
||||
*/
|
||||
if (current->throttle_queue)
|
||||
return;
|
||||
|
||||
spin_lock(&swap_avail_lock);
|
||||
plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
|
||||
avail_lists[node]) {
|
||||
if (si->bdev) {
|
||||
blkcg_schedule_throttle(bdev_get_queue(si->bdev),
|
||||
true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&swap_avail_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __init swapfile_init(void)
|
||||
{
|
||||
int nid;
|
||||
|
新しいイシューから参照
ユーザーをブロックする