mm: /proc/sys/vm/stat_refresh to force vmstat update
Provide /proc/sys/vm/stat_refresh to force an immediate update of per-cpu into global vmstats: useful to avoid a sleep(2) or whatever before checking counts when testing. Originally added to work around a bug which left counts stranded indefinitely on a cpu going idle (an inaccuracy magnified when small below-batch numbers represent "huge" amounts of memory), but I believe that bug is now fixed: nonetheless, this is still a useful knob. Its schedule_on_each_cpu() is probably too expensive just to fold into reading /proc/meminfo itself: give this mode 0600 to prevent abuse. Allow a write or a read to do the same: nothing to read, but "grep -h Shmem /proc/sys/vm/stat_refresh /proc/meminfo" is convenient. Oh, and since global_page_state() itself is careful to disguise any underflow as 0, hack in an "Invalid argument" and pr_warn() if a counter is negative after the refresh - this helped to fix a misaccounting of NR_ISOLATED_FILE in my migration code. But on recent kernels, I find that NR_ALLOC_BATCH and NR_PAGES_SCANNED often go negative some of the time. I have not yet worked out why, but have no evidence that it's actually harmful. Punt for the moment by just ignoring the anomaly on those. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andres Lagar-Cavilla <andreslc@google.com> Cc: Yang Shi <yang.shi@linaro.org> Cc: Ning Qu <quning@gmail.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Andres Lagar-Cavilla <andreslc@google.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
9e18eb2935
commit
52b6f46bc1
60
mm/vmstat.c
60
mm/vmstat.c
@@ -1379,6 +1379,66 @@ static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
|
||||
int sysctl_stat_interval __read_mostly = HZ;
|
||||
static cpumask_var_t cpu_stat_off;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static void refresh_vm_stats(struct work_struct *work)
|
||||
{
|
||||
refresh_cpu_vm_stats(true);
|
||||
}
|
||||
|
||||
int vmstat_refresh(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
long val;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The regular update, every sysctl_stat_interval, may come later
|
||||
* than expected: leaving a significant amount in per_cpu buckets.
|
||||
* This is particularly misleading when checking a quantity of HUGE
|
||||
* pages, immediately after running a test. /proc/sys/vm/stat_refresh,
|
||||
* which can equally be echo'ed to or cat'ted from (by root),
|
||||
* can be used to update the stats just before reading them.
|
||||
*
|
||||
* Oh, and since global_page_state() etc. are so careful to hide
|
||||
* transiently negative values, report an error here if any of
|
||||
* the stats is negative, so we know to go looking for imbalance.
|
||||
*/
|
||||
err = schedule_on_each_cpu(refresh_vm_stats);
|
||||
if (err)
|
||||
return err;
|
||||
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
|
||||
val = atomic_long_read(&vm_stat[i]);
|
||||
if (val < 0) {
|
||||
switch (i) {
|
||||
case NR_ALLOC_BATCH:
|
||||
case NR_PAGES_SCANNED:
|
||||
/*
|
||||
* These are often seen to go negative in
|
||||
* recent kernels, but not to go permanently
|
||||
* negative. Whilst it would be nicer not to
|
||||
* have exceptions, rooting them out would be
|
||||
* another task, of rather low priority.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
pr_warn("%s: %s %ld\n",
|
||||
__func__, vmstat_text[i], val);
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
if (write)
|
||||
*ppos += *lenp;
|
||||
else
|
||||
*lenp = 0;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
static void vmstat_update(struct work_struct *w)
|
||||
{
|
||||
if (refresh_cpu_vm_stats(true)) {
|
||||
|
Reference in New Issue
Block a user