Merge branch 'for-4.2/writeback' of git://git.kernel.dk/linux-block
Pull cgroup writeback support from Jens Axboe: "This is the big pull request for adding cgroup writeback support. This code has been in development for a long time, and it has been simmering in for-next for a good chunk of this cycle too. This is one of those problems that has been talked about for at least half a decade, finally there's a solution and code to go with it. Also see last weeks writeup on LWN: http://lwn.net/Articles/648292/" * 'for-4.2/writeback' of git://git.kernel.dk/linux-block: (85 commits) writeback, blkio: add documentation for cgroup writeback support vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB writeback: do foreign inode detection iff cgroup writeback is enabled v9fs: fix error handling in v9fs_session_init() bdi: fix wrong error return value in cgwb_create() buffer: remove unusued 'ret' variable writeback: disassociate inodes from dying bdi_writebacks writeback: implement foreign cgroup inode bdi_writeback switching writeback: add lockdep annotation to inode_to_wb() writeback: use unlocked_inode_to_wb transaction in inode_congested() writeback: implement unlocked_inode_to_wb transaction and use it for stat updates writeback: implement [locked_]inode_to_wb_and_lock_list() writeback: implement foreign cgroup inode detection writeback: make writeback_control track the inode being written back writeback: relocate wb[_try]_get(), wb_put(), inode_{attach|detach}_wb() mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use writeback: implement memcg writeback domain based throttling writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes writeback: implement memcg wb_domain writeback: update wb_over_bg_thresh() to use wb_domain aware operations ...
这个提交包含在:
652
mm/backing-dev.c
652
mm/backing-dev.c
@@ -18,6 +18,7 @@ struct backing_dev_info noop_backing_dev_info = {
|
||||
.name = "noop",
|
||||
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
|
||||
|
||||
static struct class *bdi_class;
|
||||
|
||||
@@ -48,7 +49,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
|
||||
struct bdi_writeback *wb = &bdi->wb;
|
||||
unsigned long background_thresh;
|
||||
unsigned long dirty_thresh;
|
||||
unsigned long bdi_thresh;
|
||||
unsigned long wb_thresh;
|
||||
unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
|
||||
struct inode *inode;
|
||||
|
||||
@@ -66,7 +67,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
|
||||
spin_unlock(&wb->list_lock);
|
||||
|
||||
global_dirty_limits(&background_thresh, &dirty_thresh);
|
||||
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
|
||||
wb_thresh = wb_calc_thresh(wb, dirty_thresh);
|
||||
|
||||
#define K(x) ((x) << (PAGE_SHIFT - 10))
|
||||
seq_printf(m,
|
||||
@@ -84,19 +85,19 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
|
||||
"b_dirty_time: %10lu\n"
|
||||
"bdi_list: %10u\n"
|
||||
"state: %10lx\n",
|
||||
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
|
||||
(unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
|
||||
K(bdi_thresh),
|
||||
(unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
|
||||
(unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
|
||||
K(wb_thresh),
|
||||
K(dirty_thresh),
|
||||
K(background_thresh),
|
||||
(unsigned long) K(bdi_stat(bdi, BDI_DIRTIED)),
|
||||
(unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
|
||||
(unsigned long) K(bdi->write_bandwidth),
|
||||
(unsigned long) K(wb_stat(wb, WB_DIRTIED)),
|
||||
(unsigned long) K(wb_stat(wb, WB_WRITTEN)),
|
||||
(unsigned long) K(wb->write_bandwidth),
|
||||
nr_dirty,
|
||||
nr_io,
|
||||
nr_more_io,
|
||||
nr_dirty_time,
|
||||
!list_empty(&bdi->bdi_list), bdi->state);
|
||||
!list_empty(&bdi->bdi_list), bdi->wb.state);
|
||||
#undef K
|
||||
|
||||
return 0;
|
||||
@@ -255,13 +256,8 @@ static int __init default_bdi_init(void)
|
||||
}
|
||||
subsys_initcall(default_bdi_init);
|
||||
|
||||
int bdi_has_dirty_io(struct backing_dev_info *bdi)
|
||||
{
|
||||
return wb_has_dirty_io(&bdi->wb);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used when the first inode for this bdi is marked dirty. It
|
||||
* This function is used when the first inode for this wb is marked dirty. It
|
||||
* wakes-up the corresponding bdi thread which should then take care of the
|
||||
* periodic background write-out of dirty inodes. Since the write-out would
|
||||
* starts only 'dirty_writeback_interval' centisecs from now anyway, we just
|
||||
@@ -274,29 +270,497 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
|
||||
* We have to be careful not to postpone flush work if it is scheduled for
|
||||
* earlier. Thus we use queue_delayed_work().
|
||||
*/
|
||||
void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
|
||||
void wb_wakeup_delayed(struct bdi_writeback *wb)
|
||||
{
|
||||
unsigned long timeout;
|
||||
|
||||
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
if (test_bit(BDI_registered, &bdi->state))
|
||||
queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
spin_lock_bh(&wb->work_lock);
|
||||
if (test_bit(WB_registered, &wb->state))
|
||||
queue_delayed_work(bdi_wq, &wb->dwork, timeout);
|
||||
spin_unlock_bh(&wb->work_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove bdi from bdi_list, and ensure that it is no longer visible
|
||||
* Initial write bandwidth: 100 MB/s
|
||||
*/
|
||||
static void bdi_remove_from_list(struct backing_dev_info *bdi)
|
||||
{
|
||||
spin_lock_bh(&bdi_lock);
|
||||
list_del_rcu(&bdi->bdi_list);
|
||||
spin_unlock_bh(&bdi_lock);
|
||||
#define INIT_BW (100 << (20 - PAGE_SHIFT))
|
||||
|
||||
synchronize_rcu_expedited();
|
||||
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
|
||||
gfp_t gfp)
|
||||
{
|
||||
int i, err;
|
||||
|
||||
memset(wb, 0, sizeof(*wb));
|
||||
|
||||
wb->bdi = bdi;
|
||||
wb->last_old_flush = jiffies;
|
||||
INIT_LIST_HEAD(&wb->b_dirty);
|
||||
INIT_LIST_HEAD(&wb->b_io);
|
||||
INIT_LIST_HEAD(&wb->b_more_io);
|
||||
INIT_LIST_HEAD(&wb->b_dirty_time);
|
||||
spin_lock_init(&wb->list_lock);
|
||||
|
||||
wb->bw_time_stamp = jiffies;
|
||||
wb->balanced_dirty_ratelimit = INIT_BW;
|
||||
wb->dirty_ratelimit = INIT_BW;
|
||||
wb->write_bandwidth = INIT_BW;
|
||||
wb->avg_write_bandwidth = INIT_BW;
|
||||
|
||||
spin_lock_init(&wb->work_lock);
|
||||
INIT_LIST_HEAD(&wb->work_list);
|
||||
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
|
||||
|
||||
err = fprop_local_init_percpu(&wb->completions, gfp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
|
||||
err = percpu_counter_init(&wb->stat[i], 0, gfp);
|
||||
if (err) {
|
||||
while (--i)
|
||||
percpu_counter_destroy(&wb->stat[i]);
|
||||
fprop_local_destroy_percpu(&wb->completions);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove bdi from the global list and shutdown any threads we have running
|
||||
*/
|
||||
static void wb_shutdown(struct bdi_writeback *wb)
|
||||
{
|
||||
/* Make sure nobody queues further work */
|
||||
spin_lock_bh(&wb->work_lock);
|
||||
if (!test_and_clear_bit(WB_registered, &wb->state)) {
|
||||
spin_unlock_bh(&wb->work_lock);
|
||||
return;
|
||||
}
|
||||
spin_unlock_bh(&wb->work_lock);
|
||||
|
||||
/*
|
||||
* Drain work list and shutdown the delayed_work. !WB_registered
|
||||
* tells wb_workfn() that @wb is dying and its work_list needs to
|
||||
* be drained no matter what.
|
||||
*/
|
||||
mod_delayed_work(bdi_wq, &wb->dwork, 0);
|
||||
flush_delayed_work(&wb->dwork);
|
||||
WARN_ON(!list_empty(&wb->work_list));
|
||||
}
|
||||
|
||||
static void wb_exit(struct bdi_writeback *wb)
|
||||
{
|
||||
int i;
|
||||
|
||||
WARN_ON(delayed_work_pending(&wb->dwork));
|
||||
|
||||
for (i = 0; i < NR_WB_STAT_ITEMS; i++)
|
||||
percpu_counter_destroy(&wb->stat[i]);
|
||||
|
||||
fprop_local_destroy_percpu(&wb->completions);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
#include <linux/memcontrol.h>
|
||||
|
||||
/*
|
||||
* cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
|
||||
* blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
|
||||
* protected. cgwb_release_wait is used to wait for the completion of cgwb
|
||||
* releases from bdi destruction path.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(cgwb_lock);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
|
||||
|
||||
/**
|
||||
* wb_congested_get_create - get or create a wb_congested
|
||||
* @bdi: associated bdi
|
||||
* @blkcg_id: ID of the associated blkcg
|
||||
* @gfp: allocation mask
|
||||
*
|
||||
* Look up the wb_congested for @blkcg_id on @bdi. If missing, create one.
|
||||
* The returned wb_congested has its reference count incremented. Returns
|
||||
* NULL on failure.
|
||||
*/
|
||||
struct bdi_writeback_congested *
|
||||
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
|
||||
{
|
||||
struct bdi_writeback_congested *new_congested = NULL, *congested;
|
||||
struct rb_node **node, *parent;
|
||||
unsigned long flags;
|
||||
|
||||
if (blkcg_id == 1)
|
||||
return &bdi->wb_congested;
|
||||
retry:
|
||||
spin_lock_irqsave(&cgwb_lock, flags);
|
||||
|
||||
node = &bdi->cgwb_congested_tree.rb_node;
|
||||
parent = NULL;
|
||||
|
||||
while (*node != NULL) {
|
||||
parent = *node;
|
||||
congested = container_of(parent, struct bdi_writeback_congested,
|
||||
rb_node);
|
||||
if (congested->blkcg_id < blkcg_id)
|
||||
node = &parent->rb_left;
|
||||
else if (congested->blkcg_id > blkcg_id)
|
||||
node = &parent->rb_right;
|
||||
else
|
||||
goto found;
|
||||
}
|
||||
|
||||
if (new_congested) {
|
||||
/* !found and storage for new one already allocated, insert */
|
||||
congested = new_congested;
|
||||
new_congested = NULL;
|
||||
rb_link_node(&congested->rb_node, parent, node);
|
||||
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
|
||||
atomic_inc(&bdi->usage_cnt);
|
||||
goto found;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&cgwb_lock, flags);
|
||||
|
||||
/* allocate storage for new one and retry */
|
||||
new_congested = kzalloc(sizeof(*new_congested), gfp);
|
||||
if (!new_congested)
|
||||
return NULL;
|
||||
|
||||
atomic_set(&new_congested->refcnt, 0);
|
||||
new_congested->bdi = bdi;
|
||||
new_congested->blkcg_id = blkcg_id;
|
||||
goto retry;
|
||||
|
||||
found:
|
||||
atomic_inc(&congested->refcnt);
|
||||
spin_unlock_irqrestore(&cgwb_lock, flags);
|
||||
kfree(new_congested);
|
||||
return congested;
|
||||
}
|
||||
|
||||
/**
|
||||
* wb_congested_put - put a wb_congested
|
||||
* @congested: wb_congested to put
|
||||
*
|
||||
* Put @congested and destroy it if the refcnt reaches zero.
|
||||
*/
|
||||
void wb_congested_put(struct bdi_writeback_congested *congested)
|
||||
{
|
||||
struct backing_dev_info *bdi = congested->bdi;
|
||||
unsigned long flags;
|
||||
|
||||
if (congested->blkcg_id == 1)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
|
||||
rb_erase(&congested->rb_node, &congested->bdi->cgwb_congested_tree);
|
||||
spin_unlock_irqrestore(&cgwb_lock, flags);
|
||||
kfree(congested);
|
||||
|
||||
if (atomic_dec_and_test(&bdi->usage_cnt))
|
||||
wake_up_all(&cgwb_release_wait);
|
||||
}
|
||||
|
||||
static void cgwb_release_workfn(struct work_struct *work)
|
||||
{
|
||||
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
|
||||
release_work);
|
||||
struct backing_dev_info *bdi = wb->bdi;
|
||||
|
||||
wb_shutdown(wb);
|
||||
|
||||
css_put(wb->memcg_css);
|
||||
css_put(wb->blkcg_css);
|
||||
wb_congested_put(wb->congested);
|
||||
|
||||
fprop_local_destroy_percpu(&wb->memcg_completions);
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
wb_exit(wb);
|
||||
kfree_rcu(wb, rcu);
|
||||
|
||||
if (atomic_dec_and_test(&bdi->usage_cnt))
|
||||
wake_up_all(&cgwb_release_wait);
|
||||
}
|
||||
|
||||
static void cgwb_release(struct percpu_ref *refcnt)
|
||||
{
|
||||
struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
|
||||
refcnt);
|
||||
schedule_work(&wb->release_work);
|
||||
}
|
||||
|
||||
static void cgwb_kill(struct bdi_writeback *wb)
|
||||
{
|
||||
lockdep_assert_held(&cgwb_lock);
|
||||
|
||||
WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
|
||||
list_del(&wb->memcg_node);
|
||||
list_del(&wb->blkcg_node);
|
||||
percpu_ref_kill(&wb->refcnt);
|
||||
}
|
||||
|
||||
static int cgwb_create(struct backing_dev_info *bdi,
|
||||
struct cgroup_subsys_state *memcg_css, gfp_t gfp)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
struct blkcg *blkcg;
|
||||
struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
|
||||
struct bdi_writeback *wb;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
memcg = mem_cgroup_from_css(memcg_css);
|
||||
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &blkio_cgrp_subsys);
|
||||
blkcg = css_to_blkcg(blkcg_css);
|
||||
memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
|
||||
blkcg_cgwb_list = &blkcg->cgwb_list;
|
||||
|
||||
/* look up again under lock and discard on blkcg mismatch */
|
||||
spin_lock_irqsave(&cgwb_lock, flags);
|
||||
wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
|
||||
if (wb && wb->blkcg_css != blkcg_css) {
|
||||
cgwb_kill(wb);
|
||||
wb = NULL;
|
||||
}
|
||||
spin_unlock_irqrestore(&cgwb_lock, flags);
|
||||
if (wb)
|
||||
goto out_put;
|
||||
|
||||
/* need to create a new one */
|
||||
wb = kmalloc(sizeof(*wb), gfp);
|
||||
if (!wb)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = wb_init(wb, bdi, gfp);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
|
||||
if (ret)
|
||||
goto err_wb_exit;
|
||||
|
||||
ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
|
||||
if (ret)
|
||||
goto err_ref_exit;
|
||||
|
||||
wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
|
||||
if (!wb->congested) {
|
||||
ret = -ENOMEM;
|
||||
goto err_fprop_exit;
|
||||
}
|
||||
|
||||
wb->memcg_css = memcg_css;
|
||||
wb->blkcg_css = blkcg_css;
|
||||
INIT_WORK(&wb->release_work, cgwb_release_workfn);
|
||||
set_bit(WB_registered, &wb->state);
|
||||
|
||||
/*
|
||||
* The root wb determines the registered state of the whole bdi and
|
||||
* memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
|
||||
* whether they're still online. Don't link @wb if any is dead.
|
||||
* See wb_memcg_offline() and wb_blkcg_offline().
|
||||
*/
|
||||
ret = -ENODEV;
|
||||
spin_lock_irqsave(&cgwb_lock, flags);
|
||||
if (test_bit(WB_registered, &bdi->wb.state) &&
|
||||
blkcg_cgwb_list->next && memcg_cgwb_list->next) {
|
||||
/* we might have raced another instance of this function */
|
||||
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
|
||||
if (!ret) {
|
||||
atomic_inc(&bdi->usage_cnt);
|
||||
list_add(&wb->memcg_node, memcg_cgwb_list);
|
||||
list_add(&wb->blkcg_node, blkcg_cgwb_list);
|
||||
css_get(memcg_css);
|
||||
css_get(blkcg_css);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&cgwb_lock, flags);
|
||||
if (ret) {
|
||||
if (ret == -EEXIST)
|
||||
ret = 0;
|
||||
goto err_put_congested;
|
||||
}
|
||||
goto out_put;
|
||||
|
||||
err_put_congested:
|
||||
wb_congested_put(wb->congested);
|
||||
err_fprop_exit:
|
||||
fprop_local_destroy_percpu(&wb->memcg_completions);
|
||||
err_ref_exit:
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
err_wb_exit:
|
||||
wb_exit(wb);
|
||||
err_free:
|
||||
kfree(wb);
|
||||
out_put:
|
||||
css_put(blkcg_css);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* wb_get_create - get wb for a given memcg, create if necessary
|
||||
* @bdi: target bdi
|
||||
* @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
|
||||
* @gfp: allocation mask to use
|
||||
*
|
||||
* Try to get the wb for @memcg_css on @bdi. If it doesn't exist, try to
|
||||
* create one. The returned wb has its refcount incremented.
|
||||
*
|
||||
* This function uses css_get() on @memcg_css and thus expects its refcnt
|
||||
* to be positive on invocation. IOW, rcu_read_lock() protection on
|
||||
* @memcg_css isn't enough. try_get it before calling this function.
|
||||
*
|
||||
* A wb is keyed by its associated memcg. As blkcg implicitly enables
|
||||
* memcg on the default hierarchy, memcg association is guaranteed to be
|
||||
* more specific (equal or descendant to the associated blkcg) and thus can
|
||||
* identify both the memcg and blkcg associations.
|
||||
*
|
||||
* Because the blkcg associated with a memcg may change as blkcg is enabled
|
||||
* and disabled closer to root in the hierarchy, each wb keeps track of
|
||||
* both the memcg and blkcg associated with it and verifies the blkcg on
|
||||
* each lookup. On mismatch, the existing wb is discarded and a new one is
|
||||
* created.
|
||||
*/
|
||||
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
|
||||
struct cgroup_subsys_state *memcg_css,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct bdi_writeback *wb;
|
||||
|
||||
might_sleep_if(gfp & __GFP_WAIT);
|
||||
|
||||
if (!memcg_css->parent)
|
||||
return &bdi->wb;
|
||||
|
||||
do {
|
||||
rcu_read_lock();
|
||||
wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
|
||||
if (wb) {
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
|
||||
/* see whether the blkcg association has changed */
|
||||
blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
|
||||
&blkio_cgrp_subsys);
|
||||
if (unlikely(wb->blkcg_css != blkcg_css ||
|
||||
!wb_tryget(wb)))
|
||||
wb = NULL;
|
||||
css_put(blkcg_css);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
|
||||
|
||||
return wb;
|
||||
}
|
||||
|
||||
static void cgwb_bdi_init(struct backing_dev_info *bdi)
|
||||
{
|
||||
bdi->wb.memcg_css = mem_cgroup_root_css;
|
||||
bdi->wb.blkcg_css = blkcg_root_css;
|
||||
bdi->wb_congested.blkcg_id = 1;
|
||||
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
|
||||
bdi->cgwb_congested_tree = RB_ROOT;
|
||||
atomic_set(&bdi->usage_cnt, 1);
|
||||
}
|
||||
|
||||
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
|
||||
{
|
||||
struct radix_tree_iter iter;
|
||||
void **slot;
|
||||
|
||||
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
|
||||
|
||||
spin_lock_irq(&cgwb_lock);
|
||||
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
|
||||
cgwb_kill(*slot);
|
||||
spin_unlock_irq(&cgwb_lock);
|
||||
|
||||
/*
|
||||
* All cgwb's and their congested states must be shutdown and
|
||||
* released before returning. Drain the usage counter to wait for
|
||||
* all cgwb's and cgwb_congested's ever created on @bdi.
|
||||
*/
|
||||
atomic_dec(&bdi->usage_cnt);
|
||||
wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
|
||||
}
|
||||
|
||||
/**
|
||||
* wb_memcg_offline - kill all wb's associated with a memcg being offlined
|
||||
* @memcg: memcg being offlined
|
||||
*
|
||||
* Also prevents creation of any new wb's associated with @memcg.
|
||||
*/
|
||||
void wb_memcg_offline(struct mem_cgroup *memcg)
|
||||
{
|
||||
LIST_HEAD(to_destroy);
|
||||
struct list_head *memcg_cgwb_list = mem_cgroup_cgwb_list(memcg);
|
||||
struct bdi_writeback *wb, *next;
|
||||
|
||||
spin_lock_irq(&cgwb_lock);
|
||||
list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
|
||||
cgwb_kill(wb);
|
||||
memcg_cgwb_list->next = NULL; /* prevent new wb's */
|
||||
spin_unlock_irq(&cgwb_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
|
||||
* @blkcg: blkcg being offlined
|
||||
*
|
||||
* Also prevents creation of any new wb's associated with @blkcg.
|
||||
*/
|
||||
void wb_blkcg_offline(struct blkcg *blkcg)
|
||||
{
|
||||
LIST_HEAD(to_destroy);
|
||||
struct bdi_writeback *wb, *next;
|
||||
|
||||
spin_lock_irq(&cgwb_lock);
|
||||
list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
|
||||
cgwb_kill(wb);
|
||||
blkcg->cgwb_list.next = NULL; /* prevent new wb's */
|
||||
spin_unlock_irq(&cgwb_lock);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
static void cgwb_bdi_init(struct backing_dev_info *bdi) { }
|
||||
static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
|
||||
|
||||
#endif /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
int bdi_init(struct backing_dev_info *bdi)
|
||||
{
|
||||
int err;
|
||||
|
||||
bdi->dev = NULL;
|
||||
|
||||
bdi->min_ratio = 0;
|
||||
bdi->max_ratio = 100;
|
||||
bdi->max_prop_frac = FPROP_FRAC_BASE;
|
||||
INIT_LIST_HEAD(&bdi->bdi_list);
|
||||
init_waitqueue_head(&bdi->wb_waitq);
|
||||
|
||||
err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
bdi->wb_congested.state = 0;
|
||||
bdi->wb.congested = &bdi->wb_congested;
|
||||
|
||||
cgwb_bdi_init(bdi);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(bdi_init);
|
||||
|
||||
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
@@ -315,7 +779,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
||||
bdi->dev = dev;
|
||||
|
||||
bdi_debug_register(bdi, dev_name(dev));
|
||||
set_bit(BDI_registered, &bdi->state);
|
||||
set_bit(WB_registered, &bdi->wb.state);
|
||||
|
||||
spin_lock_bh(&bdi_lock);
|
||||
list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
|
||||
@@ -333,103 +797,23 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
|
||||
EXPORT_SYMBOL(bdi_register_dev);
|
||||
|
||||
/*
|
||||
* Remove bdi from the global list and shutdown any threads we have running
|
||||
* Remove bdi from bdi_list, and ensure that it is no longer visible
|
||||
*/
|
||||
static void bdi_wb_shutdown(struct backing_dev_info *bdi)
|
||||
static void bdi_remove_from_list(struct backing_dev_info *bdi)
|
||||
{
|
||||
/* Make sure nobody queues further work */
|
||||
spin_lock_bh(&bdi->wb_lock);
|
||||
if (!test_and_clear_bit(BDI_registered, &bdi->state)) {
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
return;
|
||||
}
|
||||
spin_unlock_bh(&bdi->wb_lock);
|
||||
spin_lock_bh(&bdi_lock);
|
||||
list_del_rcu(&bdi->bdi_list);
|
||||
spin_unlock_bh(&bdi_lock);
|
||||
|
||||
/*
|
||||
* Make sure nobody finds us on the bdi_list anymore
|
||||
*/
|
||||
bdi_remove_from_list(bdi);
|
||||
|
||||
/*
|
||||
* Drain work list and shutdown the delayed_work. At this point,
|
||||
* @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
|
||||
* is dying and its work_list needs to be drained no matter what.
|
||||
*/
|
||||
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
||||
flush_delayed_work(&bdi->wb.dwork);
|
||||
synchronize_rcu_expedited();
|
||||
}
|
||||
|
||||
static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
|
||||
{
|
||||
memset(wb, 0, sizeof(*wb));
|
||||
|
||||
wb->bdi = bdi;
|
||||
wb->last_old_flush = jiffies;
|
||||
INIT_LIST_HEAD(&wb->b_dirty);
|
||||
INIT_LIST_HEAD(&wb->b_io);
|
||||
INIT_LIST_HEAD(&wb->b_more_io);
|
||||
INIT_LIST_HEAD(&wb->b_dirty_time);
|
||||
spin_lock_init(&wb->list_lock);
|
||||
INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial write bandwidth: 100 MB/s
|
||||
*/
|
||||
#define INIT_BW (100 << (20 - PAGE_SHIFT))
|
||||
|
||||
int bdi_init(struct backing_dev_info *bdi)
|
||||
{
|
||||
int i, err;
|
||||
|
||||
bdi->dev = NULL;
|
||||
|
||||
bdi->min_ratio = 0;
|
||||
bdi->max_ratio = 100;
|
||||
bdi->max_prop_frac = FPROP_FRAC_BASE;
|
||||
spin_lock_init(&bdi->wb_lock);
|
||||
INIT_LIST_HEAD(&bdi->bdi_list);
|
||||
INIT_LIST_HEAD(&bdi->work_list);
|
||||
|
||||
bdi_wb_init(&bdi->wb, bdi);
|
||||
|
||||
for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
|
||||
err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
|
||||
if (err)
|
||||
goto err;
|
||||
}
|
||||
|
||||
bdi->dirty_exceeded = 0;
|
||||
|
||||
bdi->bw_time_stamp = jiffies;
|
||||
bdi->written_stamp = 0;
|
||||
|
||||
bdi->balanced_dirty_ratelimit = INIT_BW;
|
||||
bdi->dirty_ratelimit = INIT_BW;
|
||||
bdi->write_bandwidth = INIT_BW;
|
||||
bdi->avg_write_bandwidth = INIT_BW;
|
||||
|
||||
err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
|
||||
|
||||
if (err) {
|
||||
err:
|
||||
while (i--)
|
||||
percpu_counter_destroy(&bdi->bdi_stat[i]);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(bdi_init);
|
||||
|
||||
void bdi_destroy(struct backing_dev_info *bdi)
|
||||
{
|
||||
int i;
|
||||
|
||||
bdi_wb_shutdown(bdi);
|
||||
bdi_set_min_ratio(bdi, 0);
|
||||
|
||||
WARN_ON(!list_empty(&bdi->work_list));
|
||||
WARN_ON(delayed_work_pending(&bdi->wb.dwork));
|
||||
/* make sure nobody finds us on the bdi_list anymore */
|
||||
bdi_remove_from_list(bdi);
|
||||
wb_shutdown(&bdi->wb);
|
||||
cgwb_bdi_destroy(bdi);
|
||||
|
||||
if (bdi->dev) {
|
||||
bdi_debug_unregister(bdi);
|
||||
@@ -437,9 +821,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
|
||||
bdi->dev = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
|
||||
percpu_counter_destroy(&bdi->bdi_stat[i]);
|
||||
fprop_local_destroy_percpu(&bdi->completions);
|
||||
wb_exit(&bdi->wb);
|
||||
}
|
||||
EXPORT_SYMBOL(bdi_destroy);
|
||||
|
||||
@@ -472,31 +854,31 @@ static wait_queue_head_t congestion_wqh[2] = {
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
|
||||
};
|
||||
static atomic_t nr_bdi_congested[2];
|
||||
static atomic_t nr_wb_congested[2];
|
||||
|
||||
void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
|
||||
void clear_wb_congested(struct bdi_writeback_congested *congested, int sync)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
wait_queue_head_t *wqh = &congestion_wqh[sync];
|
||||
enum wb_state bit;
|
||||
|
||||
bit = sync ? BDI_sync_congested : BDI_async_congested;
|
||||
if (test_and_clear_bit(bit, &bdi->state))
|
||||
atomic_dec(&nr_bdi_congested[sync]);
|
||||
bit = sync ? WB_sync_congested : WB_async_congested;
|
||||
if (test_and_clear_bit(bit, &congested->state))
|
||||
atomic_dec(&nr_wb_congested[sync]);
|
||||
smp_mb__after_atomic();
|
||||
if (waitqueue_active(wqh))
|
||||
wake_up(wqh);
|
||||
}
|
||||
EXPORT_SYMBOL(clear_bdi_congested);
|
||||
EXPORT_SYMBOL(clear_wb_congested);
|
||||
|
||||
void set_bdi_congested(struct backing_dev_info *bdi, int sync)
|
||||
void set_wb_congested(struct bdi_writeback_congested *congested, int sync)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
enum wb_state bit;
|
||||
|
||||
bit = sync ? BDI_sync_congested : BDI_async_congested;
|
||||
if (!test_and_set_bit(bit, &bdi->state))
|
||||
atomic_inc(&nr_bdi_congested[sync]);
|
||||
bit = sync ? WB_sync_congested : WB_async_congested;
|
||||
if (!test_and_set_bit(bit, &congested->state))
|
||||
atomic_inc(&nr_wb_congested[sync]);
|
||||
}
|
||||
EXPORT_SYMBOL(set_bdi_congested);
|
||||
EXPORT_SYMBOL(set_wb_congested);
|
||||
|
||||
/**
|
||||
* congestion_wait - wait for a backing_dev to become uncongested
|
||||
@@ -555,7 +937,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
|
||||
* encountered in the current zone, yield if necessary instead
|
||||
* of sleeping on the congestion queue
|
||||
*/
|
||||
if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
|
||||
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
|
||||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
|
||||
cond_resched();
|
||||
|
||||
|
@@ -115,7 +115,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
|
||||
case POSIX_FADV_NOREUSE:
|
||||
break;
|
||||
case POSIX_FADV_DONTNEED:
|
||||
if (!bdi_write_congested(bdi))
|
||||
if (!inode_write_congested(mapping->host))
|
||||
__filemap_fdatawrite_range(mapping, offset, endbyte,
|
||||
WB_SYNC_NONE);
|
||||
|
||||
|
34
mm/filemap.c
34
mm/filemap.c
@@ -100,6 +100,7 @@
|
||||
* ->tree_lock (page_remove_rmap->set_page_dirty)
|
||||
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->inode->i_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat)
|
||||
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
|
||||
* ->inode->i_lock (zap_pte_range->set_page_dirty)
|
||||
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
|
||||
@@ -174,9 +175,11 @@ static void page_cache_tree_delete(struct address_space *mapping,
|
||||
/*
|
||||
* Delete a page from the page cache and free it. Caller has to make
|
||||
* sure the page is locked and that nobody else uses it - or that usage
|
||||
* is safe. The caller must hold the mapping's tree_lock.
|
||||
* is safe. The caller must hold the mapping's tree_lock and
|
||||
* mem_cgroup_begin_page_stat().
|
||||
*/
|
||||
void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
void __delete_from_page_cache(struct page *page, void *shadow,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
|
||||
@@ -212,7 +215,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
* anyway will be cleared before returning page into buddy allocator.
|
||||
*/
|
||||
if (WARN_ON_ONCE(PageDirty(page)))
|
||||
account_page_cleaned(page, mapping);
|
||||
account_page_cleaned(page, mapping, memcg,
|
||||
inode_to_wb(mapping->host));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -226,14 +230,20 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
void delete_from_page_cache(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned long flags;
|
||||
|
||||
void (*freepage)(struct page *);
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
|
||||
freepage = mapping->a_ops->freepage;
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
__delete_from_page_cache(page, NULL);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
|
||||
memcg = mem_cgroup_begin_page_stat(page);
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
__delete_from_page_cache(page, NULL, memcg);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
|
||||
if (freepage)
|
||||
freepage(page);
|
||||
@@ -283,7 +293,9 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||
if (!mapping_cap_writeback_dirty(mapping))
|
||||
return 0;
|
||||
|
||||
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
|
||||
ret = do_writepages(mapping, &wbc);
|
||||
wbc_detach_inode(&wbc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -472,6 +484,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
|
||||
if (!error) {
|
||||
struct address_space *mapping = old->mapping;
|
||||
void (*freepage)(struct page *);
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned long flags;
|
||||
|
||||
pgoff_t offset = old->index;
|
||||
freepage = mapping->a_ops->freepage;
|
||||
@@ -480,8 +494,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
|
||||
new->mapping = mapping;
|
||||
new->index = offset;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
__delete_from_page_cache(old, NULL);
|
||||
memcg = mem_cgroup_begin_page_stat(old);
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
__delete_from_page_cache(old, NULL, memcg);
|
||||
error = radix_tree_insert(&mapping->page_tree, offset, new);
|
||||
BUG_ON(error);
|
||||
mapping->nrpages++;
|
||||
@@ -493,7 +508,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
|
||||
__inc_zone_page_state(new, NR_FILE_PAGES);
|
||||
if (PageSwapBacked(new))
|
||||
__inc_zone_page_state(new, NR_SHMEM);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
mem_cgroup_migrate(old, new, true);
|
||||
radix_tree_preload_end();
|
||||
if (freepage)
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
|
||||
|
223
mm/memcontrol.c
223
mm/memcontrol.c
@@ -77,6 +77,7 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
|
||||
|
||||
#define MEM_CGROUP_RECLAIM_RETRIES 5
|
||||
static struct mem_cgroup *root_mem_cgroup __read_mostly;
|
||||
struct cgroup_subsys_state *mem_cgroup_root_css __read_mostly;
|
||||
|
||||
/* Whether the swap controller is active */
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
@@ -90,6 +91,7 @@ static const char * const mem_cgroup_stat_names[] = {
|
||||
"rss",
|
||||
"rss_huge",
|
||||
"mapped_file",
|
||||
"dirty",
|
||||
"writeback",
|
||||
"swap",
|
||||
};
|
||||
@@ -322,11 +324,6 @@ struct mem_cgroup {
|
||||
* percpu counter.
|
||||
*/
|
||||
struct mem_cgroup_stat_cpu __percpu *stat;
|
||||
/*
|
||||
* used when a cpu is offlined or other synchronizations
|
||||
* See mem_cgroup_read_stat().
|
||||
*/
|
||||
struct mem_cgroup_stat_cpu nocpu_base;
|
||||
spinlock_t pcp_counter_lock;
|
||||
|
||||
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
|
||||
@@ -346,6 +343,11 @@ struct mem_cgroup {
|
||||
atomic_t numainfo_updating;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head cgwb_list;
|
||||
struct wb_domain cgwb_domain;
|
||||
#endif
|
||||
|
||||
/* List of events which userspace want to receive */
|
||||
struct list_head event_list;
|
||||
spinlock_t event_list_lock;
|
||||
@@ -596,6 +598,39 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
|
||||
return &memcg->css;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_css_from_page - css of the memcg associated with a page
|
||||
* @page: page of interest
|
||||
*
|
||||
* If memcg is bound to the default hierarchy, css of the memcg associated
|
||||
* with @page is returned. The returned css remains associated with @page
|
||||
* until it is released.
|
||||
*
|
||||
* If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
|
||||
* is returned.
|
||||
*
|
||||
* XXX: The above description of behavior on the default hierarchy isn't
|
||||
* strictly true yet as replace_page_cache_page() can modify the
|
||||
* association before @page is released even on the default hierarchy;
|
||||
* however, the current and planned usages don't mix the the two functions
|
||||
* and replace_page_cache_page() will soon be updated to make the invariant
|
||||
* actually true.
|
||||
*/
|
||||
struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
memcg = page->mem_cgroup;
|
||||
|
||||
if (!memcg || !cgroup_on_dfl(memcg->css.cgroup))
|
||||
memcg = root_mem_cgroup;
|
||||
|
||||
rcu_read_unlock();
|
||||
return &memcg->css;
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
|
||||
{
|
||||
@@ -795,15 +830,8 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
|
||||
long val = 0;
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_possible_cpu(cpu)
|
||||
val += per_cpu(memcg->stat->count[idx], cpu);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
spin_lock(&memcg->pcp_counter_lock);
|
||||
val += memcg->nocpu_base.count[idx];
|
||||
spin_unlock(&memcg->pcp_counter_lock);
|
||||
#endif
|
||||
put_online_cpus();
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -813,15 +841,8 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
||||
unsigned long val = 0;
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_possible_cpu(cpu)
|
||||
val += per_cpu(memcg->stat->events[idx], cpu);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
spin_lock(&memcg->pcp_counter_lock);
|
||||
val += memcg->nocpu_base.events[idx];
|
||||
spin_unlock(&memcg->pcp_counter_lock);
|
||||
#endif
|
||||
put_online_cpus();
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -2020,6 +2041,7 @@ again:
|
||||
|
||||
return memcg;
|
||||
}
|
||||
EXPORT_SYMBOL(mem_cgroup_begin_page_stat);
|
||||
|
||||
/**
|
||||
* mem_cgroup_end_page_stat - finish a page state statistics transaction
|
||||
@@ -2038,6 +2060,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(mem_cgroup_end_page_stat);
|
||||
|
||||
/**
|
||||
* mem_cgroup_update_page_stat - update page state statistics
|
||||
@@ -2178,37 +2201,12 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
|
||||
mutex_unlock(&percpu_charge_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function drains percpu counter value from DEAD cpu and
|
||||
* move it to local cpu. Note that this function can be preempted.
|
||||
*/
|
||||
static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock(&memcg->pcp_counter_lock);
|
||||
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
|
||||
long x = per_cpu(memcg->stat->count[i], cpu);
|
||||
|
||||
per_cpu(memcg->stat->count[i], cpu) = 0;
|
||||
memcg->nocpu_base.count[i] += x;
|
||||
}
|
||||
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
|
||||
unsigned long x = per_cpu(memcg->stat->events[i], cpu);
|
||||
|
||||
per_cpu(memcg->stat->events[i], cpu) = 0;
|
||||
memcg->nocpu_base.events[i] += x;
|
||||
}
|
||||
spin_unlock(&memcg->pcp_counter_lock);
|
||||
}
|
||||
|
||||
static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
int cpu = (unsigned long)hcpu;
|
||||
struct memcg_stock_pcp *stock;
|
||||
struct mem_cgroup *iter;
|
||||
|
||||
if (action == CPU_ONLINE)
|
||||
return NOTIFY_OK;
|
||||
@@ -2216,9 +2214,6 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
|
||||
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
|
||||
return NOTIFY_OK;
|
||||
|
||||
for_each_mem_cgroup(iter)
|
||||
mem_cgroup_drain_pcp_counter(iter, cpu);
|
||||
|
||||
stock = &per_cpu(memcg_stock, cpu);
|
||||
drain_stock(stock);
|
||||
return NOTIFY_OK;
|
||||
@@ -4004,6 +3999,98 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg)
|
||||
{
|
||||
return &memcg->cgwb_list;
|
||||
}
|
||||
|
||||
static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
|
||||
{
|
||||
return wb_domain_init(&memcg->cgwb_domain, gfp);
|
||||
}
|
||||
|
||||
static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
|
||||
{
|
||||
wb_domain_exit(&memcg->cgwb_domain);
|
||||
}
|
||||
|
||||
static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
|
||||
{
|
||||
wb_domain_size_changed(&memcg->cgwb_domain);
|
||||
}
|
||||
|
||||
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
||||
|
||||
if (!memcg->css.parent)
|
||||
return NULL;
|
||||
|
||||
return &memcg->cgwb_domain;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
|
||||
* @wb: bdi_writeback in question
|
||||
* @pavail: out parameter for number of available pages
|
||||
* @pdirty: out parameter for number of dirty pages
|
||||
* @pwriteback: out parameter for number of pages under writeback
|
||||
*
|
||||
* Determine the numbers of available, dirty, and writeback pages in @wb's
|
||||
* memcg. Dirty and writeback are self-explanatory. Available is a bit
|
||||
* more involved.
|
||||
*
|
||||
* A memcg's headroom is "min(max, high) - used". The available memory is
|
||||
* calculated as the lowest headroom of itself and the ancestors plus the
|
||||
* number of pages already being used for file pages. Note that this
|
||||
* doesn't consider the actual amount of available memory in the system.
|
||||
* The caller should further cap *@pavail accordingly.
|
||||
*/
|
||||
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
|
||||
unsigned long *pdirty, unsigned long *pwriteback)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
||||
struct mem_cgroup *parent;
|
||||
unsigned long head_room = PAGE_COUNTER_MAX;
|
||||
unsigned long file_pages;
|
||||
|
||||
*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
|
||||
|
||||
/* this should eventually include NR_UNSTABLE_NFS */
|
||||
*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
|
||||
|
||||
file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
|
||||
(1 << LRU_ACTIVE_FILE));
|
||||
while ((parent = parent_mem_cgroup(memcg))) {
|
||||
unsigned long ceiling = min(memcg->memory.limit, memcg->high);
|
||||
unsigned long used = page_counter_read(&memcg->memory);
|
||||
|
||||
head_room = min(head_room, ceiling - min(ceiling, used));
|
||||
memcg = parent;
|
||||
}
|
||||
|
||||
*pavail = file_pages + head_room;
|
||||
}
|
||||
|
||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
|
||||
static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
/*
|
||||
* DO NOT USE IN NEW FILES.
|
||||
*
|
||||
@@ -4388,9 +4475,15 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
|
||||
if (!memcg->stat)
|
||||
goto out_free;
|
||||
|
||||
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
|
||||
goto out_free_stat;
|
||||
|
||||
spin_lock_init(&memcg->pcp_counter_lock);
|
||||
return memcg;
|
||||
|
||||
out_free_stat:
|
||||
free_percpu(memcg->stat);
|
||||
out_free:
|
||||
kfree(memcg);
|
||||
return NULL;
|
||||
@@ -4417,6 +4510,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
free_mem_cgroup_per_zone_info(memcg, node);
|
||||
|
||||
free_percpu(memcg->stat);
|
||||
memcg_wb_domain_exit(memcg);
|
||||
kfree(memcg);
|
||||
}
|
||||
|
||||
@@ -4449,6 +4543,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
/* root ? */
|
||||
if (parent_css == NULL) {
|
||||
root_mem_cgroup = memcg;
|
||||
mem_cgroup_root_css = &memcg->css;
|
||||
page_counter_init(&memcg->memory, NULL);
|
||||
memcg->high = PAGE_COUNTER_MAX;
|
||||
memcg->soft_limit = PAGE_COUNTER_MAX;
|
||||
@@ -4467,7 +4562,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
memcg->kmemcg_id = -1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
INIT_LIST_HEAD(&memcg->cgwb_list);
|
||||
#endif
|
||||
return &memcg->css;
|
||||
|
||||
free_out:
|
||||
@@ -4555,6 +4652,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
||||
vmpressure_cleanup(&memcg->vmpressure);
|
||||
|
||||
memcg_deactivate_kmem(memcg);
|
||||
|
||||
wb_memcg_offline(memcg);
|
||||
}
|
||||
|
||||
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
@@ -4588,6 +4687,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
||||
memcg->low = 0;
|
||||
memcg->high = PAGE_COUNTER_MAX;
|
||||
memcg->soft_limit = PAGE_COUNTER_MAX;
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
@@ -4757,6 +4857,7 @@ static int mem_cgroup_move_account(struct page *page,
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
bool anon;
|
||||
|
||||
VM_BUG_ON(from == to);
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
@@ -4782,15 +4883,33 @@ static int mem_cgroup_move_account(struct page *page,
|
||||
if (page->mem_cgroup != from)
|
||||
goto out_unlock;
|
||||
|
||||
anon = PageAnon(page);
|
||||
|
||||
spin_lock_irqsave(&from->move_lock, flags);
|
||||
|
||||
if (!PageAnon(page) && page_mapped(page)) {
|
||||
if (!anon && page_mapped(page)) {
|
||||
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
|
||||
nr_pages);
|
||||
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
|
||||
nr_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* move_lock grabbed above and caller set from->moving_account, so
|
||||
* mem_cgroup_update_page_stat() will serialize updates to PageDirty.
|
||||
* So mapping should be stable for dirty pages.
|
||||
*/
|
||||
if (!anon && PageDirty(page)) {
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
|
||||
if (mapping_cap_account_dirty(mapping)) {
|
||||
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_DIRTY],
|
||||
nr_pages);
|
||||
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_DIRTY],
|
||||
nr_pages);
|
||||
}
|
||||
}
|
||||
|
||||
if (PageWriteback(page)) {
|
||||
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK],
|
||||
nr_pages);
|
||||
@@ -5306,6 +5425,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
|
||||
|
||||
memcg->high = high;
|
||||
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
@@ -5338,6 +5458,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
|
1233
mm/page-writeback.c
1233
mm/page-writeback.c
文件差异内容过多而无法显示
加载差异
@@ -541,7 +541,7 @@ page_cache_async_readahead(struct address_space *mapping,
|
||||
/*
|
||||
* Defer asynchronous read-ahead on IO congestion.
|
||||
*/
|
||||
if (bdi_read_congested(inode_to_bdi(mapping->host)))
|
||||
if (inode_read_congested(mapping->host))
|
||||
return;
|
||||
|
||||
/* do read-ahead */
|
||||
|
@@ -30,6 +30,8 @@
|
||||
* swap_lock (in swap_duplicate, swap_info_get)
|
||||
* mmlist_lock (in mmput, drain_mmlist and others)
|
||||
* mapping->private_lock (in __set_page_dirty_buffers)
|
||||
* mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
|
||||
* mapping->tree_lock (widely used)
|
||||
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
|
||||
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
|
||||
* sb_lock (within inode_lock in fs/fs-writeback.c)
|
||||
|
@@ -116,9 +116,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
|
||||
* the VM has canceled the dirty bit (eg ext3 journaling).
|
||||
* Hence dirty accounting check is placed after invalidation.
|
||||
*/
|
||||
if (TestClearPageDirty(page))
|
||||
account_page_cleaned(page, mapping);
|
||||
|
||||
cancel_dirty_page(page);
|
||||
ClearPageMappedToDisk(page);
|
||||
delete_from_page_cache(page);
|
||||
return 0;
|
||||
@@ -512,19 +510,24 @@ EXPORT_SYMBOL(invalidate_mapping_pages);
|
||||
static int
|
||||
invalidate_complete_page2(struct address_space *mapping, struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned long flags;
|
||||
|
||||
if (page->mapping != mapping)
|
||||
return 0;
|
||||
|
||||
if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
|
||||
return 0;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
memcg = mem_cgroup_begin_page_stat(page);
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
if (PageDirty(page))
|
||||
goto failed;
|
||||
|
||||
BUG_ON(page_has_private(page));
|
||||
__delete_from_page_cache(page, NULL);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
__delete_from_page_cache(page, NULL, memcg);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
|
||||
if (mapping->a_ops->freepage)
|
||||
mapping->a_ops->freepage(page);
|
||||
@@ -532,7 +535,8 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
|
||||
page_cache_release(page); /* pagecache ref */
|
||||
return 1;
|
||||
failed:
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
79
mm/vmscan.c
79
mm/vmscan.c
@@ -154,11 +154,42 @@ static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return !sc->target_mem_cgroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* sane_reclaim - is the usual dirty throttling mechanism operational?
|
||||
* @sc: scan_control in question
|
||||
*
|
||||
* The normal page dirty throttling mechanism in balance_dirty_pages() is
|
||||
* completely broken with the legacy memcg and direct stalling in
|
||||
* shrink_page_list() is used for throttling instead, which lacks all the
|
||||
* niceties such as fairness, adaptive pausing, bandwidth proportional
|
||||
* allocation and configurability.
|
||||
*
|
||||
* This function tests whether the vmscan currently in progress can assume
|
||||
* that the normal dirty throttling mechanism is operational.
|
||||
*/
|
||||
static bool sane_reclaim(struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg = sc->target_mem_cgroup;
|
||||
|
||||
if (!memcg)
|
||||
return true;
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
if (cgroup_on_dfl(mem_cgroup_css(memcg)->cgroup))
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool sane_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long zone_reclaimable_pages(struct zone *zone)
|
||||
@@ -452,14 +483,13 @@ static inline int is_page_cache_freeable(struct page *page)
|
||||
return page_count(page) - page_has_private(page) == 2;
|
||||
}
|
||||
|
||||
static int may_write_to_queue(struct backing_dev_info *bdi,
|
||||
struct scan_control *sc)
|
||||
static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
|
||||
{
|
||||
if (current->flags & PF_SWAPWRITE)
|
||||
return 1;
|
||||
if (!bdi_write_congested(bdi))
|
||||
if (!inode_write_congested(inode))
|
||||
return 1;
|
||||
if (bdi == current->backing_dev_info)
|
||||
if (inode_to_bdi(inode) == current->backing_dev_info)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@@ -538,7 +568,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
|
||||
}
|
||||
if (mapping->a_ops->writepage == NULL)
|
||||
return PAGE_ACTIVATE;
|
||||
if (!may_write_to_queue(inode_to_bdi(mapping->host), sc))
|
||||
if (!may_write_to_inode(mapping->host, sc))
|
||||
return PAGE_KEEP;
|
||||
|
||||
if (clear_page_dirty_for_io(page)) {
|
||||
@@ -579,10 +609,14 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
|
||||
static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
bool reclaimed)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(mapping != page_mapping(page));
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
memcg = mem_cgroup_begin_page_stat(page);
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
/*
|
||||
* The non racy check for a busy page.
|
||||
*
|
||||
@@ -620,7 +654,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
swp_entry_t swap = { .val = page_private(page) };
|
||||
mem_cgroup_swapout(page, swap);
|
||||
__delete_from_swap_cache(page);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
swapcache_free(swap);
|
||||
} else {
|
||||
void (*freepage)(struct page *);
|
||||
@@ -640,8 +675,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
if (reclaimed && page_is_file_cache(page) &&
|
||||
!mapping_exiting(mapping))
|
||||
shadow = workingset_eviction(mapping, page);
|
||||
__delete_from_page_cache(page, shadow);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
__delete_from_page_cache(page, shadow, memcg);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
|
||||
if (freepage != NULL)
|
||||
freepage(page);
|
||||
@@ -650,7 +686,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
return 1;
|
||||
|
||||
cannot_free:
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_end_page_stat(memcg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -917,7 +954,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
*/
|
||||
mapping = page_mapping(page);
|
||||
if (((dirty || writeback) && mapping &&
|
||||
bdi_write_congested(inode_to_bdi(mapping->host))) ||
|
||||
inode_write_congested(mapping->host)) ||
|
||||
(writeback && PageReclaim(page)))
|
||||
nr_congested++;
|
||||
|
||||
@@ -935,10 +972,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
* note that the LRU is being scanned too quickly and the
|
||||
* caller can stall after page list has been processed.
|
||||
*
|
||||
* 2) Global reclaim encounters a page, memcg encounters a
|
||||
* page that is not marked for immediate reclaim or
|
||||
* the caller does not have __GFP_IO. In this case mark
|
||||
* the page for immediate reclaim and continue scanning.
|
||||
* 2) Global or new memcg reclaim encounters a page that is
|
||||
* not marked for immediate reclaim or the caller does not
|
||||
* have __GFP_IO. In this case mark the page for immediate
|
||||
* reclaim and continue scanning.
|
||||
*
|
||||
* __GFP_IO is checked because a loop driver thread might
|
||||
* enter reclaim, and deadlock if it waits on a page for
|
||||
@@ -952,7 +989,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
* grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing
|
||||
* may_enter_fs here is liable to OOM on them.
|
||||
*
|
||||
* 3) memcg encounters a page that is not already marked
|
||||
* 3) Legacy memcg encounters a page that is not already marked
|
||||
* PageReclaim. memcg does not have any dirty pages
|
||||
* throttling so we could easily OOM just because too many
|
||||
* pages are in writeback and there is nothing else to
|
||||
@@ -967,7 +1004,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
goto keep_locked;
|
||||
|
||||
/* Case 2 above */
|
||||
} else if (global_reclaim(sc) ||
|
||||
} else if (sane_reclaim(sc) ||
|
||||
!PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) {
|
||||
/*
|
||||
* This is slightly racy - end_page_writeback()
|
||||
@@ -1416,7 +1453,7 @@ static int too_many_isolated(struct zone *zone, int file,
|
||||
if (current_is_kswapd())
|
||||
return 0;
|
||||
|
||||
if (!global_reclaim(sc))
|
||||
if (!sane_reclaim(sc))
|
||||
return 0;
|
||||
|
||||
if (file) {
|
||||
@@ -1608,10 +1645,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
|
||||
set_bit(ZONE_WRITEBACK, &zone->flags);
|
||||
|
||||
/*
|
||||
* memcg will stall in page writeback so only consider forcibly
|
||||
* stalling for global reclaim
|
||||
* Legacy memcg will stall in page writeback so avoid forcibly
|
||||
* stalling here.
|
||||
*/
|
||||
if (global_reclaim(sc)) {
|
||||
if (sane_reclaim(sc)) {
|
||||
/*
|
||||
* Tag a zone as congested if all the dirty pages scanned were
|
||||
* backed by a congested BDI and wait_iff_congested will stall.
|
||||
|
在新工单中引用
屏蔽一个用户