Merge branch 'akpm' (patches from Andrew Morton)
Merge first patch-bomb from Andrew Morton: "Quite a lot of other stuff is banked up awaiting further next->mainline merging, but this batch contains: - Lots of random misc patches - OCFS2 - Most of MM - backlight updates - lib/ updates - printk updates - checkpatch updates - epoll tweaking - rtc updates - hfs - hfsplus - documentation - procfs - update gcov to gcc-4.7 format - IPC" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (269 commits) ipc, msg: fix message length check for negative values ipc/util.c: remove unnecessary work pending test devpts: plug the memory leak in kill_sb ./Makefile: export initial ramdisk compression config option init/Kconfig: add option to disable kernel compression drivers: w1: make w1_slave::flags long to avoid memory corruption drivers/w1/masters/ds1wm.cuse dev_get_platdata() drivers/memstick/core/ms_block.c: fix unreachable state in h_msb_read_page() drivers/memstick/core/mspro_block.c: fix attributes array allocation drivers/pps/clients/pps-gpio.c: remove redundant of_match_ptr kernel/panic.c: reduce 1 byte usage for print tainted buffer gcov: reuse kbasename helper kernel/gcov/fs.c: use pr_warn() kernel/module.c: use pr_foo() gcov: compile specific gcov implementation based on gcc version gcov: add support for gcc 4.7 gcov format gcov: move gcov structs definitions to a gcc version specific file kernel/taskstats.c: return -ENOMEM when alloc memory fails in add_del_listener() kernel/taskstats.c: add nla_nest_cancel() for failure processing between nla_nest_start() and nla_nest_end() kernel/sysctl_binary.c: use scnprintf() instead of snprintf() ...
This commit is contained in:
17
mm/Kconfig
17
mm/Kconfig
@@ -153,11 +153,18 @@ config MOVABLE_NODE
|
||||
help
|
||||
Allow a node to have only movable memory. Pages used by the kernel,
|
||||
such as direct mapping pages cannot be migrated. So the corresponding
|
||||
memory device cannot be hotplugged. This option allows users to
|
||||
online all the memory of a node as movable memory so that the whole
|
||||
node can be hotplugged. Users who don't use the memory hotplug
|
||||
feature are fine with this option on since they don't online memory
|
||||
as movable.
|
||||
memory device cannot be hotplugged. This option allows the following
|
||||
two things:
|
||||
- When the system is booting, node full of hotpluggable memory can
|
||||
be arranged to have only movable memory so that the whole node can
|
||||
be hot-removed. (need movable_node boot option specified).
|
||||
- After the system is up, the option allows users to online all the
|
||||
memory of a node as movable memory so that the whole node can be
|
||||
hot-removed.
|
||||
|
||||
Users who don't use the memory hotplug feature are fine with this
|
||||
option on since they don't specify movable_node boot option or they
|
||||
don't online memory as movable.
|
||||
|
||||
Say Y here if you want to hotplug a whole node.
|
||||
Say N here if you want kernel to use memory on all nodes evenly.
|
||||
|
@@ -172,11 +172,12 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
|
||||
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long start, end, pages, count = 0;
|
||||
unsigned long *map, start, end, pages, count = 0;
|
||||
|
||||
if (!bdata->node_bootmem_map)
|
||||
return 0;
|
||||
|
||||
map = bdata->node_bootmem_map;
|
||||
start = bdata->node_min_pfn;
|
||||
end = bdata->node_low_pfn;
|
||||
|
||||
@@ -184,10 +185,9 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
|
||||
bdata - bootmem_node_data, start, end);
|
||||
|
||||
while (start < end) {
|
||||
unsigned long *map, idx, vec;
|
||||
unsigned long idx, vec;
|
||||
unsigned shift;
|
||||
|
||||
map = bdata->node_bootmem_map;
|
||||
idx = start - bdata->node_min_pfn;
|
||||
shift = idx & (BITS_PER_LONG - 1);
|
||||
/*
|
||||
@@ -784,7 +784,7 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
|
||||
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
|
||||
|
||||
/* update goal according ...MAX_DMA32_PFN */
|
||||
end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
|
||||
end_pfn = pgdat_end_pfn(pgdat);
|
||||
|
||||
if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
|
||||
(goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
|
||||
|
@@ -235,10 +235,9 @@ static bool suitable_migration_target(struct page *page)
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
|
||||
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
|
||||
* pages inside of the pageblock (even though it may still end up isolating
|
||||
* some pages).
|
||||
* Isolate free pages onto a private freelist. If @strict is true, will abort
|
||||
* returning 0 on any invalid PFNs or non-free pages inside of the pageblock
|
||||
* (even though it may still end up isolating some pages).
|
||||
*/
|
||||
static unsigned long isolate_freepages_block(struct compact_control *cc,
|
||||
unsigned long blockpfn,
|
||||
|
@@ -27,11 +27,12 @@
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* By default transparent hugepage support is enabled for all mappings
|
||||
* and khugepaged scans all mappings. Defrag is only invoked by
|
||||
* khugepaged hugepage allocations and by page faults inside
|
||||
* MADV_HUGEPAGE regions to avoid the risk of slowing down short lived
|
||||
* allocations.
|
||||
* By default transparent hugepage support is disabled in order that avoid
|
||||
* to risk increase the memory footprint of applications without a guaranteed
|
||||
* benefit. When transparent hugepage support is enabled, is for all mappings,
|
||||
* and khugepaged scans all mappings.
|
||||
* Defrag is invoked by khugepaged hugepage allocations and by page faults
|
||||
* for all hugepage allocations.
|
||||
*/
|
||||
unsigned long transparent_hugepage_flags __read_mostly =
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS
|
||||
@@ -758,14 +759,6 @@ static inline struct page *alloc_hugepage_vma(int defrag,
|
||||
HPAGE_PMD_ORDER, vma, haddr, nd);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA
|
||||
static inline struct page *alloc_hugepage(int defrag)
|
||||
{
|
||||
return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
|
||||
HPAGE_PMD_ORDER);
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
|
||||
struct page *zero_page)
|
||||
@@ -2198,7 +2191,34 @@ static void khugepaged_alloc_sleep(void)
|
||||
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
|
||||
}
|
||||
|
||||
static int khugepaged_node_load[MAX_NUMNODES];
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static int khugepaged_find_target_node(void)
|
||||
{
|
||||
static int last_khugepaged_target_node = NUMA_NO_NODE;
|
||||
int nid, target_node = 0, max_value = 0;
|
||||
|
||||
/* find first node with max normal pages hit */
|
||||
for (nid = 0; nid < MAX_NUMNODES; nid++)
|
||||
if (khugepaged_node_load[nid] > max_value) {
|
||||
max_value = khugepaged_node_load[nid];
|
||||
target_node = nid;
|
||||
}
|
||||
|
||||
/* do some balance if several nodes have the same hit record */
|
||||
if (target_node <= last_khugepaged_target_node)
|
||||
for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
|
||||
nid++)
|
||||
if (max_value == khugepaged_node_load[nid]) {
|
||||
target_node = nid;
|
||||
break;
|
||||
}
|
||||
|
||||
last_khugepaged_target_node = target_node;
|
||||
return target_node;
|
||||
}
|
||||
|
||||
static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
|
||||
{
|
||||
if (IS_ERR(*hpage)) {
|
||||
@@ -2232,9 +2252,8 @@ static struct page
|
||||
* mmap_sem in read mode is good idea also to allow greater
|
||||
* scalability.
|
||||
*/
|
||||
*hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
|
||||
node, __GFP_OTHER_NODE);
|
||||
|
||||
*hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask(
|
||||
khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER);
|
||||
/*
|
||||
* After allocating the hugepage, release the mmap_sem read lock in
|
||||
* preparation for taking it in write mode.
|
||||
@@ -2250,6 +2269,17 @@ static struct page
|
||||
return *hpage;
|
||||
}
|
||||
#else
|
||||
static int khugepaged_find_target_node(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct page *alloc_hugepage(int defrag)
|
||||
{
|
||||
return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
|
||||
HPAGE_PMD_ORDER);
|
||||
}
|
||||
|
||||
static struct page *khugepaged_alloc_hugepage(bool *wait)
|
||||
{
|
||||
struct page *hpage;
|
||||
@@ -2456,6 +2486,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
||||
if (pmd_trans_huge(*pmd))
|
||||
goto out;
|
||||
|
||||
memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
|
||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
|
||||
_pte++, _address += PAGE_SIZE) {
|
||||
@@ -2472,12 +2503,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
||||
if (unlikely(!page))
|
||||
goto out_unmap;
|
||||
/*
|
||||
* Chose the node of the first page. This could
|
||||
* be more sophisticated and look at more pages,
|
||||
* but isn't for now.
|
||||
* Record which node the original page is from and save this
|
||||
* information to khugepaged_node_load[].
|
||||
* Khupaged will allocate hugepage from the node has the max
|
||||
* hit record.
|
||||
*/
|
||||
if (node == NUMA_NO_NODE)
|
||||
node = page_to_nid(page);
|
||||
node = page_to_nid(page);
|
||||
khugepaged_node_load[node]++;
|
||||
VM_BUG_ON(PageCompound(page));
|
||||
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
|
||||
goto out_unmap;
|
||||
@@ -2492,9 +2524,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
||||
ret = 1;
|
||||
out_unmap:
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
node = khugepaged_find_target_node();
|
||||
/* collapse_huge_page will return with the mmap_sem released */
|
||||
collapse_huge_page(mm, address, hpage, vma, node);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
@@ -753,7 +753,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&object->lock, flags);
|
||||
if (ptr + size > object->pointer + object->size) {
|
||||
if (size == SIZE_MAX) {
|
||||
size = object->pointer + object->size - ptr;
|
||||
} else if (ptr + size > object->pointer + object->size) {
|
||||
kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
|
||||
dump_object_info(object);
|
||||
kmem_cache_free(scan_area_cache, area);
|
||||
|
4
mm/ksm.c
4
mm/ksm.c
@@ -2309,8 +2309,8 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
|
||||
* Allocate stable and unstable together:
|
||||
* MAXSMP NODES_SHIFT 10 will use 16kB.
|
||||
*/
|
||||
buf = kcalloc(nr_node_ids + nr_node_ids,
|
||||
sizeof(*buf), GFP_KERNEL | __GFP_ZERO);
|
||||
buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
|
||||
GFP_KERNEL);
|
||||
/* Let us assume that RB_ROOT is NULL is zero */
|
||||
if (!buf)
|
||||
err = -ENOMEM;
|
||||
|
126
mm/memblock.c
126
mm/memblock.c
@@ -20,6 +20,8 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm-generic/sections.h>
|
||||
|
||||
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
|
||||
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
|
||||
|
||||
@@ -32,6 +34,7 @@ struct memblock memblock __initdata_memblock = {
|
||||
.reserved.cnt = 1, /* empty dummy entry */
|
||||
.reserved.max = INIT_MEMBLOCK_REGIONS,
|
||||
|
||||
.bottom_up = false,
|
||||
.current_limit = MEMBLOCK_ALLOC_ANYWHERE,
|
||||
};
|
||||
|
||||
@@ -82,33 +85,57 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
|
||||
return (i < type->cnt) ? i : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_find_in_range_node - find free area in given range and node
|
||||
/*
|
||||
* __memblock_find_range_bottom_up - find free area utility in bottom-up
|
||||
* @start: start of candidate range
|
||||
* @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
|
||||
* @size: size of free area to find
|
||||
* @align: alignment of free area to find
|
||||
* @nid: nid of the free area to find, %MAX_NUMNODES for any node
|
||||
*
|
||||
* Find @size free area aligned to @align in the specified range and node.
|
||||
* Utility called from memblock_find_in_range_node(), find free area bottom-up.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found address on success, %0 on failure.
|
||||
* Found address on success, 0 on failure.
|
||||
*/
|
||||
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
|
||||
phys_addr_t end, phys_addr_t size,
|
||||
phys_addr_t align, int nid)
|
||||
static phys_addr_t __init_memblock
|
||||
__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
|
||||
phys_addr_t size, phys_addr_t align, int nid)
|
||||
{
|
||||
phys_addr_t this_start, this_end, cand;
|
||||
u64 i;
|
||||
|
||||
/* pump up @end */
|
||||
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
|
||||
end = memblock.current_limit;
|
||||
for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) {
|
||||
this_start = clamp(this_start, start, end);
|
||||
this_end = clamp(this_end, start, end);
|
||||
|
||||
/* avoid allocating the first page */
|
||||
start = max_t(phys_addr_t, start, PAGE_SIZE);
|
||||
end = max(start, end);
|
||||
cand = round_up(this_start, align);
|
||||
if (cand < this_end && this_end - cand >= size)
|
||||
return cand;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __memblock_find_range_top_down - find free area utility, in top-down
|
||||
* @start: start of candidate range
|
||||
* @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
|
||||
* @size: size of free area to find
|
||||
* @align: alignment of free area to find
|
||||
* @nid: nid of the free area to find, %MAX_NUMNODES for any node
|
||||
*
|
||||
* Utility called from memblock_find_in_range_node(), find free area top-down.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found address on success, 0 on failure.
|
||||
*/
|
||||
static phys_addr_t __init_memblock
|
||||
__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
|
||||
phys_addr_t size, phys_addr_t align, int nid)
|
||||
{
|
||||
phys_addr_t this_start, this_end, cand;
|
||||
u64 i;
|
||||
|
||||
for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
|
||||
this_start = clamp(this_start, start, end);
|
||||
@@ -121,9 +148,80 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
|
||||
if (cand >= this_start)
|
||||
return cand;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_find_in_range_node - find free area in given range and node
|
||||
* @start: start of candidate range
|
||||
* @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
|
||||
* @size: size of free area to find
|
||||
* @align: alignment of free area to find
|
||||
* @nid: nid of the free area to find, %MAX_NUMNODES for any node
|
||||
*
|
||||
* Find @size free area aligned to @align in the specified range and node.
|
||||
*
|
||||
* When allocation direction is bottom-up, the @start should be greater
|
||||
* than the end of the kernel image. Otherwise, it will be trimmed. The
|
||||
* reason is that we want the bottom-up allocation just near the kernel
|
||||
* image so it is highly likely that the allocated memory and the kernel
|
||||
* will reside in the same node.
|
||||
*
|
||||
* If bottom-up allocation failed, will try to allocate memory top-down.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found address on success, 0 on failure.
|
||||
*/
|
||||
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
|
||||
phys_addr_t end, phys_addr_t size,
|
||||
phys_addr_t align, int nid)
|
||||
{
|
||||
int ret;
|
||||
phys_addr_t kernel_end;
|
||||
|
||||
/* pump up @end */
|
||||
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
|
||||
end = memblock.current_limit;
|
||||
|
||||
/* avoid allocating the first page */
|
||||
start = max_t(phys_addr_t, start, PAGE_SIZE);
|
||||
end = max(start, end);
|
||||
kernel_end = __pa_symbol(_end);
|
||||
|
||||
/*
|
||||
* try bottom-up allocation only when bottom-up mode
|
||||
* is set and @end is above the kernel image.
|
||||
*/
|
||||
if (memblock_bottom_up() && end > kernel_end) {
|
||||
phys_addr_t bottom_up_start;
|
||||
|
||||
/* make sure we will allocate above the kernel */
|
||||
bottom_up_start = max(start, kernel_end);
|
||||
|
||||
/* ok, try bottom-up allocation first */
|
||||
ret = __memblock_find_range_bottom_up(bottom_up_start, end,
|
||||
size, align, nid);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* we always limit bottom-up allocation above the kernel,
|
||||
* but top-down allocation doesn't have the limit, so
|
||||
* retrying top-down allocation may succeed when bottom-up
|
||||
* allocation failed.
|
||||
*
|
||||
* bottom-up allocation is expected to be fail very rarely,
|
||||
* so we use WARN_ONCE() here to see the stack trace if
|
||||
* fail happens.
|
||||
*/
|
||||
WARN_ONCE(1, "memblock: bottom-up allocation failed, "
|
||||
"memory hotunplug may be affected\n");
|
||||
}
|
||||
|
||||
return __memblock_find_range_top_down(start, end, size, align, nid);
|
||||
}
|
||||
|
||||
/**
|
||||
* memblock_find_in_range - find free area in given range
|
||||
* @start: start of candidate range
|
||||
@@ -134,7 +232,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
|
||||
* Find @size free area aligned to @align in the specified range.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found address on success, %0 on failure.
|
||||
* Found address on success, 0 on failure.
|
||||
*/
|
||||
phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
|
||||
phys_addr_t end, phys_addr_t size,
|
||||
|
@@ -59,6 +59,7 @@
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/tcp_memcontrol.h>
|
||||
#include "slab.h"
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@@ -2968,7 +2969,7 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
|
||||
|
||||
VM_BUG_ON(p->is_root_cache);
|
||||
cachep = p->root_cache;
|
||||
return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
|
||||
return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLABINFO
|
||||
@@ -2997,21 +2998,14 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
|
||||
struct res_counter *fail_res;
|
||||
struct mem_cgroup *_memcg;
|
||||
int ret = 0;
|
||||
bool may_oom;
|
||||
|
||||
ret = res_counter_charge(&memcg->kmem, size, &fail_res);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Conditions under which we can wait for the oom_killer. Those are
|
||||
* the same conditions tested by the core page allocator
|
||||
*/
|
||||
may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
|
||||
|
||||
_memcg = memcg;
|
||||
ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
|
||||
&_memcg, may_oom);
|
||||
&_memcg, oom_gfp_allowed(gfp));
|
||||
|
||||
if (ret == -EINTR) {
|
||||
/*
|
||||
@@ -3151,7 +3145,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
|
||||
{
|
||||
struct memcg_cache_params *cur_params = s->memcg_params;
|
||||
|
||||
VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
|
||||
VM_BUG_ON(!is_root_cache(s));
|
||||
|
||||
if (num_groups > memcg_limited_groups_array_size) {
|
||||
int i;
|
||||
@@ -3412,7 +3406,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
|
||||
idx = memcg_cache_id(memcg);
|
||||
|
||||
mutex_lock(&memcg_cache_mutex);
|
||||
new_cachep = cachep->memcg_params->memcg_caches[idx];
|
||||
new_cachep = cache_from_memcg_idx(cachep, idx);
|
||||
if (new_cachep) {
|
||||
css_put(&memcg->css);
|
||||
goto out;
|
||||
@@ -3458,8 +3452,8 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
|
||||
* we'll take the set_limit_mutex to protect ourselves against this.
|
||||
*/
|
||||
mutex_lock(&set_limit_mutex);
|
||||
for (i = 0; i < memcg_limited_groups_array_size; i++) {
|
||||
c = s->memcg_params->memcg_caches[i];
|
||||
for_each_memcg_cache_index(i) {
|
||||
c = cache_from_memcg_idx(s, i);
|
||||
if (!c)
|
||||
continue;
|
||||
|
||||
@@ -3592,8 +3586,8 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
|
||||
* code updating memcg_caches will issue a write barrier to match this.
|
||||
*/
|
||||
read_barrier_depends();
|
||||
if (likely(cachep->memcg_params->memcg_caches[idx])) {
|
||||
cachep = cachep->memcg_params->memcg_caches[idx];
|
||||
if (likely(cache_from_memcg_idx(cachep, idx))) {
|
||||
cachep = cache_from_memcg_idx(cachep, idx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -5389,45 +5383,50 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
|
||||
static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, struct seq_file *m)
|
||||
{
|
||||
struct numa_stat {
|
||||
const char *name;
|
||||
unsigned int lru_mask;
|
||||
};
|
||||
|
||||
static const struct numa_stat stats[] = {
|
||||
{ "total", LRU_ALL },
|
||||
{ "file", LRU_ALL_FILE },
|
||||
{ "anon", LRU_ALL_ANON },
|
||||
{ "unevictable", BIT(LRU_UNEVICTABLE) },
|
||||
};
|
||||
const struct numa_stat *stat;
|
||||
int nid;
|
||||
unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
|
||||
unsigned long node_nr;
|
||||
unsigned long nr;
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
|
||||
seq_printf(m, "total=%lu", total_nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL);
|
||||
seq_printf(m, " N%d=%lu", nid, node_nr);
|
||||
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
|
||||
nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
|
||||
seq_printf(m, "%s=%lu", stat->name, nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
|
||||
stat->lru_mask);
|
||||
seq_printf(m, " N%d=%lu", nid, nr);
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
|
||||
file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE);
|
||||
seq_printf(m, "file=%lu", file_nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
|
||||
LRU_ALL_FILE);
|
||||
seq_printf(m, " N%d=%lu", nid, node_nr);
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
|
||||
struct mem_cgroup *iter;
|
||||
|
||||
anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON);
|
||||
seq_printf(m, "anon=%lu", anon_nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
|
||||
LRU_ALL_ANON);
|
||||
seq_printf(m, " N%d=%lu", nid, node_nr);
|
||||
nr = 0;
|
||||
for_each_mem_cgroup_tree(iter, memcg)
|
||||
nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask);
|
||||
seq_printf(m, "hierarchical_%s=%lu", stat->name, nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
nr = 0;
|
||||
for_each_mem_cgroup_tree(iter, memcg)
|
||||
nr += mem_cgroup_node_nr_lru_pages(
|
||||
iter, nid, stat->lru_mask);
|
||||
seq_printf(m, " N%d=%lu", nid, nr);
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
|
||||
unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
|
||||
seq_printf(m, "unevictable=%lu", unevictable_nr);
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
|
||||
BIT(LRU_UNEVICTABLE));
|
||||
seq_printf(m, " N%d=%lu", nid, node_nr);
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
@@ -1422,19 +1422,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
|
||||
if (flags & MF_COUNT_INCREASED)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* The lock_memory_hotplug prevents a race with memory hotplug.
|
||||
* This is a big hammer, a better would be nicer.
|
||||
*/
|
||||
lock_memory_hotplug();
|
||||
|
||||
/*
|
||||
* Isolate the page, so that it doesn't get reallocated if it
|
||||
* was free. This flag should be kept set until the source page
|
||||
* is freed and PG_hwpoison on it is set.
|
||||
*/
|
||||
if (get_pageblock_migratetype(p) != MIGRATE_ISOLATE)
|
||||
set_migratetype_isolate(p, true);
|
||||
/*
|
||||
* When the target page is a free hugepage, just remove it
|
||||
* from free hugepage list.
|
||||
@@ -1455,7 +1442,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
|
||||
/* Not a free page */
|
||||
ret = 1;
|
||||
}
|
||||
unlock_memory_hotplug();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1654,15 +1640,28 @@ int soft_offline_page(struct page *page, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The lock_memory_hotplug prevents a race with memory hotplug.
|
||||
* This is a big hammer, a better would be nicer.
|
||||
*/
|
||||
lock_memory_hotplug();
|
||||
|
||||
/*
|
||||
* Isolate the page, so that it doesn't get reallocated if it
|
||||
* was free. This flag should be kept set until the source page
|
||||
* is freed and PG_hwpoison on it is set.
|
||||
*/
|
||||
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
||||
set_migratetype_isolate(page, true);
|
||||
|
||||
ret = get_any_page(page, pfn, flags);
|
||||
if (ret < 0)
|
||||
goto unset;
|
||||
if (ret) { /* for in-use pages */
|
||||
unlock_memory_hotplug();
|
||||
if (ret > 0) { /* for in-use pages */
|
||||
if (PageHuge(page))
|
||||
ret = soft_offline_huge_page(page, flags);
|
||||
else
|
||||
ret = __soft_offline_page(page, flags);
|
||||
} else { /* for free pages */
|
||||
} else if (ret == 0) { /* for free pages */
|
||||
if (PageHuge(page)) {
|
||||
set_page_hwpoison_huge_page(hpage);
|
||||
dequeue_hwpoisoned_huge_page(hpage);
|
||||
@@ -1673,7 +1672,6 @@ int soft_offline_page(struct page *page, int flags)
|
||||
atomic_long_inc(&num_poisoned_pages);
|
||||
}
|
||||
}
|
||||
unset:
|
||||
unset_migratetype_isolate(page, MIGRATE_MOVABLE);
|
||||
return ret;
|
||||
}
|
||||
|
@@ -453,8 +453,6 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
|
||||
|
||||
/*
|
||||
* This function frees user-level page tables of a process.
|
||||
*
|
||||
* Must be called with pagetable lock held.
|
||||
*/
|
||||
void free_pgd_range(struct mmu_gather *tlb,
|
||||
unsigned long addr, unsigned long end,
|
||||
|
@@ -31,6 +31,7 @@
|
||||
#include <linux/firmware-map.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
@@ -365,8 +366,7 @@ out_fail:
|
||||
static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long old_pgdat_end_pfn =
|
||||
pgdat->node_start_pfn + pgdat->node_spanned_pages;
|
||||
unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
|
||||
|
||||
if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
|
||||
pgdat->node_start_pfn = start_pfn;
|
||||
@@ -402,13 +402,12 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
|
||||
static int __meminit __add_section(int nid, struct zone *zone,
|
||||
unsigned long phys_start_pfn)
|
||||
{
|
||||
int nr_pages = PAGES_PER_SECTION;
|
||||
int ret;
|
||||
|
||||
if (pfn_valid(phys_start_pfn))
|
||||
return -EEXIST;
|
||||
|
||||
ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
|
||||
ret = sparse_add_one_section(zone, phys_start_pfn);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@@ -579,9 +578,9 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
|
||||
static void shrink_pgdat_span(struct pglist_data *pgdat,
|
||||
unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
|
||||
unsigned long pgdat_end_pfn =
|
||||
pgdat->node_start_pfn + pgdat->node_spanned_pages;
|
||||
unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
|
||||
unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
|
||||
unsigned long pgdat_end_pfn = p;
|
||||
unsigned long pfn;
|
||||
struct mem_section *ms;
|
||||
int nid = pgdat->node_id;
|
||||
@@ -935,7 +934,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
||||
arg.nr_pages = nr_pages;
|
||||
node_states_check_changes_online(nr_pages, zone, &arg);
|
||||
|
||||
nid = page_to_nid(pfn_to_page(pfn));
|
||||
nid = pfn_to_nid(pfn);
|
||||
|
||||
ret = memory_notify(MEM_GOING_ONLINE, &arg);
|
||||
ret = notifier_to_errno(ret);
|
||||
@@ -1044,17 +1043,23 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/**
|
||||
* try_online_node - online a node if offlined
|
||||
*
|
||||
* called by cpu_up() to online a node without onlined memory.
|
||||
*/
|
||||
int mem_online_node(int nid)
|
||||
int try_online_node(int nid)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
int ret;
|
||||
|
||||
if (node_online(nid))
|
||||
return 0;
|
||||
|
||||
lock_memory_hotplug();
|
||||
pgdat = hotadd_new_pgdat(nid, 0);
|
||||
if (!pgdat) {
|
||||
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -1062,6 +1067,12 @@ int mem_online_node(int nid)
|
||||
ret = register_one_node(nid);
|
||||
BUG_ON(ret);
|
||||
|
||||
if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
|
||||
mutex_lock(&zonelists_mutex);
|
||||
build_all_zonelists(NULL, NULL);
|
||||
mutex_unlock(&zonelists_mutex);
|
||||
}
|
||||
|
||||
out:
|
||||
unlock_memory_hotplug();
|
||||
return ret;
|
||||
@@ -1412,6 +1423,36 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
}
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
static int __init cmdline_parse_movable_node(char *p)
|
||||
{
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/*
|
||||
* Memory used by the kernel cannot be hot-removed because Linux
|
||||
* cannot migrate the kernel pages. When memory hotplug is
|
||||
* enabled, we should prevent memblock from allocating memory
|
||||
* for the kernel.
|
||||
*
|
||||
* ACPI SRAT records all hotpluggable memory ranges. But before
|
||||
* SRAT is parsed, we don't know about it.
|
||||
*
|
||||
* The kernel image is loaded into memory at very early time. We
|
||||
* cannot prevent this anyway. So on NUMA system, we set any
|
||||
* node the kernel resides in as un-hotpluggable.
|
||||
*
|
||||
* Since on modern servers, one node could have double-digit
|
||||
* gigabytes memory, we can assume the memory around the kernel
|
||||
* image is also un-hotpluggable. So before SRAT is parsed, just
|
||||
* allocate memory near the kernel image to try the best to keep
|
||||
* the kernel away from hotpluggable memory.
|
||||
*/
|
||||
memblock_set_bottom_up(true);
|
||||
#else
|
||||
pr_warn("movable_node option not supported\n");
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
early_param("movable_node", cmdline_parse_movable_node);
|
||||
|
||||
/* check which state of node_states will be changed when offline memory */
|
||||
static void node_states_check_changes_offline(unsigned long nr_pages,
|
||||
struct zone *zone, struct memory_notify *arg)
|
||||
@@ -1702,7 +1743,7 @@ int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
static int is_memblock_offlined_cb(struct memory_block *mem, void *arg)
|
||||
static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
|
||||
{
|
||||
int ret = !is_memblock_offlined(mem);
|
||||
|
||||
@@ -1854,7 +1895,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
|
||||
* if this is not the case.
|
||||
*/
|
||||
ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
|
||||
is_memblock_offlined_cb);
|
||||
check_memblock_offlined_cb);
|
||||
if (ret) {
|
||||
unlock_memory_hotplug();
|
||||
BUG();
|
||||
|
@@ -1125,7 +1125,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
||||
tmp = *from;
|
||||
while (!nodes_empty(tmp)) {
|
||||
int s,d;
|
||||
int source = -1;
|
||||
int source = NUMA_NO_NODE;
|
||||
int dest = 0;
|
||||
|
||||
for_each_node_mask(s, tmp) {
|
||||
@@ -1160,7 +1160,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
||||
if (!node_isset(dest, tmp))
|
||||
break;
|
||||
}
|
||||
if (source == -1)
|
||||
if (source == NUMA_NO_NODE)
|
||||
break;
|
||||
|
||||
node_clear(source, tmp);
|
||||
@@ -1835,7 +1835,7 @@ static unsigned offset_il_node(struct mempolicy *pol,
|
||||
unsigned nnodes = nodes_weight(pol->v.nodes);
|
||||
unsigned target;
|
||||
int c;
|
||||
int nid = -1;
|
||||
int nid = NUMA_NO_NODE;
|
||||
|
||||
if (!nnodes)
|
||||
return numa_node_id();
|
||||
@@ -1872,11 +1872,11 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
|
||||
|
||||
/*
|
||||
* Return the bit number of a random bit set in the nodemask.
|
||||
* (returns -1 if nodemask is empty)
|
||||
* (returns NUMA_NO_NODE if nodemask is empty)
|
||||
*/
|
||||
int node_random(const nodemask_t *maskp)
|
||||
{
|
||||
int w, bit = -1;
|
||||
int w, bit = NUMA_NO_NODE;
|
||||
|
||||
w = nodes_weight(*maskp);
|
||||
if (w)
|
||||
@@ -2914,62 +2914,45 @@ out:
|
||||
* @maxlen: length of @buffer
|
||||
* @pol: pointer to mempolicy to be formatted
|
||||
*
|
||||
* Convert a mempolicy into a string.
|
||||
* Returns the number of characters in buffer (if positive)
|
||||
* or an error (negative)
|
||||
* Convert @pol into a string. If @buffer is too short, truncate the string.
|
||||
* Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
|
||||
* longest flag, "relative", and to display at least a few node ids.
|
||||
*/
|
||||
int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
{
|
||||
char *p = buffer;
|
||||
int l;
|
||||
nodemask_t nodes;
|
||||
unsigned short mode;
|
||||
unsigned short flags = pol ? pol->flags : 0;
|
||||
nodemask_t nodes = NODE_MASK_NONE;
|
||||
unsigned short mode = MPOL_DEFAULT;
|
||||
unsigned short flags = 0;
|
||||
|
||||
/*
|
||||
* Sanity check: room for longest mode, flag and some nodes
|
||||
*/
|
||||
VM_BUG_ON(maxlen < strlen("interleave") + strlen("relative") + 16);
|
||||
|
||||
if (!pol || pol == &default_policy)
|
||||
mode = MPOL_DEFAULT;
|
||||
else
|
||||
if (pol && pol != &default_policy) {
|
||||
mode = pol->mode;
|
||||
flags = pol->flags;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case MPOL_DEFAULT:
|
||||
nodes_clear(nodes);
|
||||
break;
|
||||
|
||||
case MPOL_PREFERRED:
|
||||
nodes_clear(nodes);
|
||||
if (flags & MPOL_F_LOCAL)
|
||||
mode = MPOL_LOCAL;
|
||||
else
|
||||
node_set(pol->v.preferred_node, nodes);
|
||||
break;
|
||||
|
||||
case MPOL_BIND:
|
||||
/* Fall through */
|
||||
case MPOL_INTERLEAVE:
|
||||
nodes = pol->v.nodes;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
WARN_ON_ONCE(1);
|
||||
snprintf(p, maxlen, "unknown");
|
||||
return;
|
||||
}
|
||||
|
||||
l = strlen(policy_modes[mode]);
|
||||
if (buffer + maxlen < p + l + 1)
|
||||
return -ENOSPC;
|
||||
|
||||
strcpy(p, policy_modes[mode]);
|
||||
p += l;
|
||||
p += snprintf(p, maxlen, policy_modes[mode]);
|
||||
|
||||
if (flags & MPOL_MODE_FLAGS) {
|
||||
if (buffer + maxlen < p + 2)
|
||||
return -ENOSPC;
|
||||
*p++ = '=';
|
||||
p += snprintf(p, buffer + maxlen - p, "=");
|
||||
|
||||
/*
|
||||
* Currently, the only defined flags are mutually exclusive
|
||||
@@ -2981,10 +2964,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
}
|
||||
|
||||
if (!nodes_empty(nodes)) {
|
||||
if (buffer + maxlen < p + 2)
|
||||
return -ENOSPC;
|
||||
*p++ = ':';
|
||||
p += snprintf(p, buffer + maxlen - p, ":");
|
||||
p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
|
||||
}
|
||||
return p - buffer;
|
||||
}
|
||||
|
16
mm/mmap.c
16
mm/mmap.c
@@ -179,14 +179,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
||||
goto error;
|
||||
}
|
||||
|
||||
allowed = (totalram_pages - hugetlb_total_pages())
|
||||
* sysctl_overcommit_ratio / 100;
|
||||
allowed = vm_commit_limit();
|
||||
/*
|
||||
* Reserve some for root
|
||||
*/
|
||||
if (!cap_sys_admin)
|
||||
allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
|
||||
allowed += total_swap_pages;
|
||||
|
||||
/*
|
||||
* Don't let a single process grow so big a user can't recover
|
||||
@@ -1856,7 +1854,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
struct vm_area_struct *vma;
|
||||
struct vm_unmapped_area_info info;
|
||||
|
||||
if (len > TASK_SIZE)
|
||||
if (len > TASK_SIZE - mmap_min_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (flags & MAP_FIXED)
|
||||
@@ -1865,14 +1863,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
if (addr) {
|
||||
addr = PAGE_ALIGN(addr);
|
||||
vma = find_vma(mm, addr);
|
||||
if (TASK_SIZE - len >= addr &&
|
||||
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
||||
(!vma || addr + len <= vma->vm_start))
|
||||
return addr;
|
||||
}
|
||||
|
||||
info.flags = 0;
|
||||
info.length = len;
|
||||
info.low_limit = TASK_UNMAPPED_BASE;
|
||||
info.low_limit = mm->mmap_base;
|
||||
info.high_limit = TASK_SIZE;
|
||||
info.align_mask = 0;
|
||||
return vm_unmapped_area(&info);
|
||||
@@ -1895,7 +1893,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
struct vm_unmapped_area_info info;
|
||||
|
||||
/* requested length too big for entire address space */
|
||||
if (len > TASK_SIZE)
|
||||
if (len > TASK_SIZE - mmap_min_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (flags & MAP_FIXED)
|
||||
@@ -1905,14 +1903,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
if (addr) {
|
||||
addr = PAGE_ALIGN(addr);
|
||||
vma = find_vma(mm, addr);
|
||||
if (TASK_SIZE - len >= addr &&
|
||||
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
||||
(!vma || addr + len <= vma->vm_start))
|
||||
return addr;
|
||||
}
|
||||
|
||||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.low_limit = PAGE_SIZE;
|
||||
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
|
||||
info.high_limit = mm->mmap_base;
|
||||
info.align_mask = 0;
|
||||
addr = vm_unmapped_area(&info);
|
||||
|
@@ -112,6 +112,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
||||
pmd_t *pmd;
|
||||
unsigned long next;
|
||||
unsigned long pages = 0;
|
||||
unsigned long nr_huge_updates = 0;
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
do {
|
||||
@@ -126,9 +127,10 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
||||
newprot, prot_numa);
|
||||
|
||||
if (nr_ptes) {
|
||||
if (nr_ptes == HPAGE_PMD_NR)
|
||||
pages++;
|
||||
|
||||
if (nr_ptes == HPAGE_PMD_NR) {
|
||||
pages += HPAGE_PMD_NR;
|
||||
nr_huge_updates++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -141,6 +143,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
||||
pages += this_pages;
|
||||
} while (pmd++, addr = next, addr != end);
|
||||
|
||||
if (nr_huge_updates)
|
||||
count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
|
||||
return pages;
|
||||
}
|
||||
|
||||
|
@@ -82,27 +82,18 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
|
||||
|
||||
static void __init __free_pages_memory(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long i, start_aligned, end_aligned;
|
||||
int order = ilog2(BITS_PER_LONG);
|
||||
int order;
|
||||
|
||||
start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
|
||||
end_aligned = end & ~(BITS_PER_LONG - 1);
|
||||
while (start < end) {
|
||||
order = min(MAX_ORDER - 1UL, __ffs(start));
|
||||
|
||||
if (end_aligned <= start_aligned) {
|
||||
for (i = start; i < end; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
while (start + (1UL << order) > end)
|
||||
order--;
|
||||
|
||||
return;
|
||||
__free_pages_bootmem(pfn_to_page(start), order);
|
||||
|
||||
start += (1UL << order);
|
||||
}
|
||||
|
||||
for (i = start; i < start_aligned; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
|
||||
for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
|
||||
__free_pages_bootmem(pfn_to_page(i), order);
|
||||
|
||||
for (i = end_aligned; i < end; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
}
|
||||
|
||||
static unsigned long __init __free_memory_core(phys_addr_t start,
|
||||
|
@@ -1948,13 +1948,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
||||
goto error;
|
||||
}
|
||||
|
||||
allowed = totalram_pages * sysctl_overcommit_ratio / 100;
|
||||
allowed = vm_commit_limit();
|
||||
/*
|
||||
* Reserve some 3% for root
|
||||
*/
|
||||
if (!cap_sys_admin)
|
||||
allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
|
||||
allowed += total_swap_pages;
|
||||
|
||||
/*
|
||||
* Don't let a single process grow so big a user can't recover
|
||||
|
@@ -234,8 +234,8 @@ int page_group_by_mobility_disabled __read_mostly;
|
||||
|
||||
void set_pageblock_migratetype(struct page *page, int migratetype)
|
||||
{
|
||||
|
||||
if (unlikely(page_group_by_mobility_disabled))
|
||||
if (unlikely(page_group_by_mobility_disabled &&
|
||||
migratetype < MIGRATE_PCPTYPES))
|
||||
migratetype = MIGRATE_UNMOVABLE;
|
||||
|
||||
set_pageblock_flags_group(page, (unsigned long)migratetype,
|
||||
@@ -1027,6 +1027,10 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
|
||||
{
|
||||
int current_order = page_order(page);
|
||||
|
||||
/*
|
||||
* When borrowing from MIGRATE_CMA, we need to release the excess
|
||||
* buddy pages to CMA itself.
|
||||
*/
|
||||
if (is_migrate_cma(fallback_type))
|
||||
return fallback_type;
|
||||
|
||||
@@ -1091,21 +1095,11 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
|
||||
list_del(&page->lru);
|
||||
rmv_page_order(page);
|
||||
|
||||
/*
|
||||
* Borrow the excess buddy pages as well, irrespective
|
||||
* of whether we stole freepages, or took ownership of
|
||||
* the pageblock or not.
|
||||
*
|
||||
* Exception: When borrowing from MIGRATE_CMA, release
|
||||
* the excess buddy pages to CMA itself.
|
||||
*/
|
||||
expand(zone, page, order, current_order, area,
|
||||
is_migrate_cma(migratetype)
|
||||
? migratetype : start_migratetype);
|
||||
new_type);
|
||||
|
||||
trace_mm_page_alloc_extfrag(page, order,
|
||||
current_order, start_migratetype, migratetype,
|
||||
new_type == start_migratetype);
|
||||
trace_mm_page_alloc_extfrag(page, order, current_order,
|
||||
start_migratetype, migratetype, new_type);
|
||||
|
||||
return page;
|
||||
}
|
||||
@@ -1711,7 +1705,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
|
||||
* comments in mmzone.h. Reduces cache footprint of zonelist scans
|
||||
* that have to skip over a lot of full or unallowed zones.
|
||||
*
|
||||
* If the zonelist cache is present in the passed in zonelist, then
|
||||
* If the zonelist cache is present in the passed zonelist, then
|
||||
* returns a pointer to the allowed node mask (either the current
|
||||
* tasks mems_allowed, or node_states[N_MEMORY].)
|
||||
*
|
||||
@@ -2593,7 +2587,7 @@ rebalance:
|
||||
* running out of options and have to consider going OOM
|
||||
*/
|
||||
if (!did_some_progress) {
|
||||
if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
|
||||
if (oom_gfp_allowed(gfp_mask)) {
|
||||
if (oom_killer_disabled)
|
||||
goto nopage;
|
||||
/* Coredumps can quickly deplete all memory reserves */
|
||||
@@ -3881,8 +3875,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
|
||||
return ffz(~size);
|
||||
}
|
||||
|
||||
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
|
||||
|
||||
/*
|
||||
* Check if a pageblock contains reserved pages
|
||||
*/
|
||||
@@ -4266,7 +4258,7 @@ static __meminit void zone_pcp_init(struct zone *zone)
|
||||
*/
|
||||
zone->pageset = &boot_pageset;
|
||||
|
||||
if (zone->present_pages)
|
||||
if (populated_zone(zone))
|
||||
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
|
||||
zone->name, zone->present_pages,
|
||||
zone_batchsize(zone));
|
||||
@@ -5160,7 +5152,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
|
||||
|
||||
for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
|
||||
struct zone *zone = &pgdat->node_zones[zone_type];
|
||||
if (zone->present_pages) {
|
||||
if (populated_zone(zone)) {
|
||||
node_set_state(nid, N_HIGH_MEMORY);
|
||||
if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
|
||||
zone_type <= ZONE_NORMAL)
|
||||
|
@@ -401,6 +401,7 @@ ondemand_readahead(struct address_space *mapping,
|
||||
unsigned long req_size)
|
||||
{
|
||||
unsigned long max = max_sane_readahead(ra->ra_pages);
|
||||
pgoff_t prev_offset;
|
||||
|
||||
/*
|
||||
* start of file
|
||||
@@ -452,8 +453,11 @@ ondemand_readahead(struct address_space *mapping,
|
||||
|
||||
/*
|
||||
* sequential cache miss
|
||||
* trivial case: (offset - prev_offset) == 1
|
||||
* unaligned reads: (offset - prev_offset) == 0
|
||||
*/
|
||||
if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
|
||||
prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT;
|
||||
if (offset - prev_offset <= 1UL)
|
||||
goto initial_readahead;
|
||||
|
||||
/*
|
||||
@@ -569,7 +573,7 @@ static ssize_t
|
||||
do_readahead(struct address_space *mapping, struct file *filp,
|
||||
pgoff_t index, unsigned long nr)
|
||||
{
|
||||
if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
|
||||
if (!mapping || !mapping->a_ops)
|
||||
return -EINVAL;
|
||||
|
||||
force_page_cache_readahead(mapping, filp, index, nr);
|
||||
|
@@ -3982,7 +3982,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
|
||||
VM_BUG_ON(!mutex_is_locked(&slab_mutex));
|
||||
for_each_memcg_cache_index(i) {
|
||||
c = cache_from_memcg(cachep, i);
|
||||
c = cache_from_memcg_idx(cachep, i);
|
||||
if (c)
|
||||
/* return value determined by the parent cache only */
|
||||
__do_tune_cpucache(c, limit, batchcount, shared, gfp);
|
||||
|
@@ -160,7 +160,8 @@ static inline const char *cache_name(struct kmem_cache *s)
|
||||
return s->name;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
|
||||
static inline struct kmem_cache *
|
||||
cache_from_memcg_idx(struct kmem_cache *s, int idx)
|
||||
{
|
||||
if (!s->memcg_params)
|
||||
return NULL;
|
||||
@@ -204,7 +205,8 @@ static inline const char *cache_name(struct kmem_cache *s)
|
||||
return s->name;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
|
||||
static inline struct kmem_cache *
|
||||
cache_from_memcg_idx(struct kmem_cache *s, int idx)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -571,7 +571,7 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
|
||||
return;
|
||||
|
||||
for_each_memcg_cache_index(i) {
|
||||
c = cache_from_memcg(s, i);
|
||||
c = cache_from_memcg_idx(s, i);
|
||||
if (!c)
|
||||
continue;
|
||||
|
||||
|
@@ -4983,7 +4983,7 @@ static ssize_t slab_attr_store(struct kobject *kobj,
|
||||
* through the descendants with best-effort propagation.
|
||||
*/
|
||||
for_each_memcg_cache_index(i) {
|
||||
struct kmem_cache *c = cache_from_memcg(s, i);
|
||||
struct kmem_cache *c = cache_from_memcg_idx(s, i);
|
||||
if (c)
|
||||
attribute->store(c, buf, len);
|
||||
}
|
||||
|
53
mm/sparse.c
53
mm/sparse.c
@@ -590,33 +590,32 @@ void __init sparse_init(void)
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
|
||||
unsigned long nr_pages)
|
||||
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
|
||||
{
|
||||
/* This will make the necessary allocations eventually. */
|
||||
return sparse_mem_map_populate(pnum, nid);
|
||||
}
|
||||
static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
|
||||
static void __kfree_section_memmap(struct page *memmap)
|
||||
{
|
||||
unsigned long start = (unsigned long)memmap;
|
||||
unsigned long end = (unsigned long)(memmap + nr_pages);
|
||||
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
|
||||
|
||||
vmemmap_free(start, end);
|
||||
}
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
|
||||
static void free_map_bootmem(struct page *memmap)
|
||||
{
|
||||
unsigned long start = (unsigned long)memmap;
|
||||
unsigned long end = (unsigned long)(memmap + nr_pages);
|
||||
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
|
||||
|
||||
vmemmap_free(start, end);
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
#else
|
||||
static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
|
||||
static struct page *__kmalloc_section_memmap(void)
|
||||
{
|
||||
struct page *page, *ret;
|
||||
unsigned long memmap_size = sizeof(struct page) * nr_pages;
|
||||
unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
|
||||
|
||||
page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
|
||||
if (page)
|
||||
@@ -634,28 +633,30 @@ got_map_ptr:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
|
||||
unsigned long nr_pages)
|
||||
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
|
||||
{
|
||||
return __kmalloc_section_memmap(nr_pages);
|
||||
return __kmalloc_section_memmap();
|
||||
}
|
||||
|
||||
static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
|
||||
static void __kfree_section_memmap(struct page *memmap)
|
||||
{
|
||||
if (is_vmalloc_addr(memmap))
|
||||
vfree(memmap);
|
||||
else
|
||||
free_pages((unsigned long)memmap,
|
||||
get_order(sizeof(struct page) * nr_pages));
|
||||
get_order(sizeof(struct page) * PAGES_PER_SECTION));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
|
||||
static void free_map_bootmem(struct page *memmap)
|
||||
{
|
||||
unsigned long maps_section_nr, removing_section_nr, i;
|
||||
unsigned long magic;
|
||||
unsigned long magic, nr_pages;
|
||||
struct page *page = virt_to_page(memmap);
|
||||
|
||||
nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
for (i = 0; i < nr_pages; i++, page++) {
|
||||
magic = (unsigned long) page->lru.next;
|
||||
|
||||
@@ -684,8 +685,7 @@ static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
|
||||
* set. If this is <=0, then that means that the passed-in
|
||||
* map was not consumed and must be freed.
|
||||
*/
|
||||
int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
|
||||
int nr_pages)
|
||||
int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
|
||||
{
|
||||
unsigned long section_nr = pfn_to_section_nr(start_pfn);
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
@@ -702,12 +702,12 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
|
||||
ret = sparse_index_init(section_nr, pgdat->node_id);
|
||||
if (ret < 0 && ret != -EEXIST)
|
||||
return ret;
|
||||
memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
|
||||
memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
|
||||
if (!memmap)
|
||||
return -ENOMEM;
|
||||
usemap = __kmalloc_section_usemap();
|
||||
if (!usemap) {
|
||||
__kfree_section_memmap(memmap, nr_pages);
|
||||
__kfree_section_memmap(memmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -719,7 +719,7 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(memmap, 0, sizeof(struct page) * nr_pages);
|
||||
memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
|
||||
|
||||
ms->section_mem_map |= SECTION_MARKED_PRESENT;
|
||||
|
||||
@@ -729,7 +729,7 @@ out:
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
if (ret <= 0) {
|
||||
kfree(usemap);
|
||||
__kfree_section_memmap(memmap, nr_pages);
|
||||
__kfree_section_memmap(memmap);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -759,7 +759,6 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
|
||||
static void free_section_usemap(struct page *memmap, unsigned long *usemap)
|
||||
{
|
||||
struct page *usemap_page;
|
||||
unsigned long nr_pages;
|
||||
|
||||
if (!usemap)
|
||||
return;
|
||||
@@ -771,7 +770,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
|
||||
if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
|
||||
kfree(usemap);
|
||||
if (memmap)
|
||||
__kfree_section_memmap(memmap, PAGES_PER_SECTION);
|
||||
__kfree_section_memmap(memmap);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -780,12 +779,8 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
|
||||
* on the section which has pgdat at boot time. Just keep it as is now.
|
||||
*/
|
||||
|
||||
if (memmap) {
|
||||
nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
free_map_bootmem(memmap, nr_pages);
|
||||
}
|
||||
if (memmap)
|
||||
free_map_bootmem(memmap);
|
||||
}
|
||||
|
||||
void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
|
||||
|
@@ -707,7 +707,7 @@ noswap:
|
||||
return (swp_entry_t) {0};
|
||||
}
|
||||
|
||||
/* The only caller of this function is now susupend routine */
|
||||
/* The only caller of this function is now suspend routine */
|
||||
swp_entry_t get_swap_page_of_type(int type)
|
||||
{
|
||||
struct swap_info_struct *si;
|
||||
@@ -845,7 +845,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller has made sure that the swapdevice corresponding to entry
|
||||
* Caller has made sure that the swap device corresponding to entry
|
||||
* is still around or has not been recycled.
|
||||
*/
|
||||
void swap_free(swp_entry_t entry)
|
||||
@@ -947,7 +947,7 @@ int try_to_free_swap(struct page *page)
|
||||
* original page might be freed under memory pressure, then
|
||||
* later read back in from swap, now with the wrong data.
|
||||
*
|
||||
* Hibration suspends storage while it is writing the image
|
||||
* Hibernation suspends storage while it is writing the image
|
||||
* to disk so check that here.
|
||||
*/
|
||||
if (pm_suspended_storage())
|
||||
@@ -1179,7 +1179,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
* some architectures (e.g. x86_32 with PAE) we might catch a glimpse
|
||||
* of unmatched parts which look like swp_pte, so unuse_pte must
|
||||
* recheck under pte lock. Scanning without pte lock lets it be
|
||||
* preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
|
||||
* preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
|
||||
*/
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
do {
|
||||
@@ -1924,17 +1924,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
p->cluster_info = NULL;
|
||||
p->flags = 0;
|
||||
frontswap_map = frontswap_map_get(p);
|
||||
frontswap_map_set(p, NULL);
|
||||
spin_unlock(&p->lock);
|
||||
spin_unlock(&swap_lock);
|
||||
frontswap_invalidate_area(type);
|
||||
frontswap_map_set(p, NULL);
|
||||
mutex_unlock(&swapon_mutex);
|
||||
free_percpu(p->percpu_cluster);
|
||||
p->percpu_cluster = NULL;
|
||||
vfree(swap_map);
|
||||
vfree(cluster_info);
|
||||
vfree(frontswap_map);
|
||||
/* Destroy swap account informatin */
|
||||
/* Destroy swap account information */
|
||||
swap_cgroup_swapoff(type);
|
||||
|
||||
inode = mapping->host;
|
||||
@@ -2786,8 +2786,8 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
||||
|
||||
/*
|
||||
* We are fortunate that although vmalloc_to_page uses pte_offset_map,
|
||||
* no architecture is using highmem pages for kernel pagetables: so it
|
||||
* will not corrupt the GFP_ATOMIC caller's atomic pagetable kmaps.
|
||||
* no architecture is using highmem pages for kernel page tables: so it
|
||||
* will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
|
||||
*/
|
||||
head = vmalloc_to_page(si->swap_map + offset);
|
||||
offset &= ~PAGE_MASK;
|
||||
|
13
mm/util.c
13
mm/util.c
@@ -7,6 +7,9 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "internal.h"
|
||||
@@ -398,6 +401,16 @@ struct address_space *page_mapping(struct page *page)
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/*
|
||||
* Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
|
||||
*/
|
||||
unsigned long vm_commit_limit(void)
|
||||
{
|
||||
return ((totalram_pages - hugetlb_total_pages())
|
||||
* sysctl_overcommit_ratio / 100) + total_swap_pages;
|
||||
}
|
||||
|
||||
|
||||
/* Tracepoints definitions. */
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
|
||||
|
48
mm/vmalloc.c
48
mm/vmalloc.c
@@ -359,6 +359,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
|
||||
if (unlikely(!va))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* Only scan the relevant parts containing pointers to other objects
|
||||
* to avoid false negatives.
|
||||
*/
|
||||
kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
|
||||
|
||||
retry:
|
||||
spin_lock(&vmap_area_lock);
|
||||
/*
|
||||
@@ -1546,7 +1552,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
|
||||
gfp_t gfp_mask, pgprot_t prot,
|
||||
int node, const void *caller);
|
||||
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
pgprot_t prot, int node, const void *caller)
|
||||
pgprot_t prot, int node)
|
||||
{
|
||||
const int order = 0;
|
||||
struct page **pages;
|
||||
@@ -1560,13 +1566,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
/* Please note that the recursion is strictly bounded. */
|
||||
if (array_size > PAGE_SIZE) {
|
||||
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
|
||||
PAGE_KERNEL, node, caller);
|
||||
PAGE_KERNEL, node, area->caller);
|
||||
area->flags |= VM_VPAGES;
|
||||
} else {
|
||||
pages = kmalloc_node(array_size, nested_gfp, node);
|
||||
}
|
||||
area->pages = pages;
|
||||
area->caller = caller;
|
||||
if (!area->pages) {
|
||||
remove_vm_area(area->addr);
|
||||
kfree(area);
|
||||
@@ -1577,7 +1582,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
struct page *page;
|
||||
gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
|
||||
|
||||
if (node < 0)
|
||||
if (node == NUMA_NO_NODE)
|
||||
page = alloc_page(tmp_mask);
|
||||
else
|
||||
page = alloc_pages_node(node, tmp_mask, order);
|
||||
@@ -1634,9 +1639,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
if (!area)
|
||||
goto fail;
|
||||
|
||||
addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
|
||||
addr = __vmalloc_area_node(area, gfp_mask, prot, node);
|
||||
if (!addr)
|
||||
goto fail;
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
|
||||
@@ -1646,11 +1651,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
clear_vm_uninitialized_flag(area);
|
||||
|
||||
/*
|
||||
* A ref_count = 3 is needed because the vm_struct and vmap_area
|
||||
* structures allocated in the __get_vm_area_node() function contain
|
||||
* references to the virtual address of the vmalloc'ed block.
|
||||
* A ref_count = 2 is needed because vm_struct allocated in
|
||||
* __get_vm_area_node() contains a reference to the virtual address of
|
||||
* the vmalloc'ed block.
|
||||
*/
|
||||
kmemleak_alloc(addr, real_size, 3, gfp_mask);
|
||||
kmemleak_alloc(addr, real_size, 2, gfp_mask);
|
||||
|
||||
return addr;
|
||||
|
||||
@@ -2563,6 +2568,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
|
||||
if (!counters)
|
||||
return;
|
||||
|
||||
/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
|
||||
smp_rmb();
|
||||
if (v->flags & VM_UNINITIALIZED)
|
||||
return;
|
||||
|
||||
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
|
||||
|
||||
for (nr = 0; nr < v->nr_pages; nr++)
|
||||
@@ -2579,23 +2589,15 @@ static int s_show(struct seq_file *m, void *p)
|
||||
struct vmap_area *va = p;
|
||||
struct vm_struct *v;
|
||||
|
||||
if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
|
||||
/*
|
||||
* s_show can encounter race with remove_vm_area, !VM_VM_AREA on
|
||||
* behalf of vmap area is being tear down or vm_map_ram allocation.
|
||||
*/
|
||||
if (!(va->flags & VM_VM_AREA))
|
||||
return 0;
|
||||
|
||||
if (!(va->flags & VM_VM_AREA)) {
|
||||
seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
|
||||
(void *)va->va_start, (void *)va->va_end,
|
||||
va->va_end - va->va_start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
v = va->vm;
|
||||
|
||||
/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
|
||||
smp_rmb();
|
||||
if (v->flags & VM_UNINITIALIZED)
|
||||
return 0;
|
||||
|
||||
seq_printf(m, "0x%pK-0x%pK %7ld",
|
||||
v->addr, v->addr + v->size, v->size);
|
||||
|
||||
|
22
mm/vmstat.c
22
mm/vmstat.c
@@ -812,6 +812,7 @@ const char * const vmstat_text[] = {
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
"numa_pte_updates",
|
||||
"numa_huge_pte_updates",
|
||||
"numa_hint_faults",
|
||||
"numa_hint_faults_local",
|
||||
"numa_pages_migrated",
|
||||
@@ -1229,6 +1230,20 @@ static void start_cpu_timer(int cpu)
|
||||
schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
|
||||
}
|
||||
|
||||
static void vmstat_cpu_dead(int node)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
if (cpu_to_node(cpu) == node)
|
||||
goto end;
|
||||
|
||||
node_clear_state(node, N_CPU);
|
||||
end:
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the cpu notifier to insure that the thresholds are recalculated
|
||||
* when necessary.
|
||||
@@ -1258,6 +1273,7 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
refresh_zone_stat_thresholds();
|
||||
vmstat_cpu_dead(cpu_to_node(cpu));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -1276,8 +1292,12 @@ static int __init setup_vmstat(void)
|
||||
|
||||
register_cpu_notifier(&vmstat_notifier);
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
start_cpu_timer(cpu);
|
||||
node_set_state(cpu_to_node(cpu), N_CPU);
|
||||
}
|
||||
put_online_cpus();
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_FS
|
||||
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
|
||||
|
195
mm/zswap.c
195
mm/zswap.c
@@ -217,6 +217,7 @@ static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
|
||||
if (!entry)
|
||||
return NULL;
|
||||
entry->refcount = 1;
|
||||
RB_CLEAR_NODE(&entry->rbnode);
|
||||
return entry;
|
||||
}
|
||||
|
||||
@@ -225,19 +226,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
|
||||
kmem_cache_free(zswap_entry_cache, entry);
|
||||
}
|
||||
|
||||
/* caller must hold the tree lock */
|
||||
static void zswap_entry_get(struct zswap_entry *entry)
|
||||
{
|
||||
entry->refcount++;
|
||||
}
|
||||
|
||||
/* caller must hold the tree lock */
|
||||
static int zswap_entry_put(struct zswap_entry *entry)
|
||||
{
|
||||
entry->refcount--;
|
||||
return entry->refcount;
|
||||
}
|
||||
|
||||
/*********************************
|
||||
* rbtree functions
|
||||
**********************************/
|
||||
@@ -285,6 +273,61 @@ static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
|
||||
{
|
||||
if (!RB_EMPTY_NODE(&entry->rbnode)) {
|
||||
rb_erase(&entry->rbnode, root);
|
||||
RB_CLEAR_NODE(&entry->rbnode);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Carries out the common pattern of freeing and entry's zsmalloc allocation,
|
||||
* freeing the entry itself, and decrementing the number of stored pages.
|
||||
*/
|
||||
static void zswap_free_entry(struct zswap_tree *tree,
|
||||
struct zswap_entry *entry)
|
||||
{
|
||||
zbud_free(tree->pool, entry->handle);
|
||||
zswap_entry_cache_free(entry);
|
||||
atomic_dec(&zswap_stored_pages);
|
||||
zswap_pool_pages = zbud_get_pool_size(tree->pool);
|
||||
}
|
||||
|
||||
/* caller must hold the tree lock */
|
||||
static void zswap_entry_get(struct zswap_entry *entry)
|
||||
{
|
||||
entry->refcount++;
|
||||
}
|
||||
|
||||
/* caller must hold the tree lock
|
||||
* remove from the tree and free it, if nobody reference the entry
|
||||
*/
|
||||
static void zswap_entry_put(struct zswap_tree *tree,
|
||||
struct zswap_entry *entry)
|
||||
{
|
||||
int refcount = --entry->refcount;
|
||||
|
||||
BUG_ON(refcount < 0);
|
||||
if (refcount == 0) {
|
||||
zswap_rb_erase(&tree->rbroot, entry);
|
||||
zswap_free_entry(tree, entry);
|
||||
}
|
||||
}
|
||||
|
||||
/* caller must hold the tree lock */
|
||||
static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
|
||||
pgoff_t offset)
|
||||
{
|
||||
struct zswap_entry *entry = NULL;
|
||||
|
||||
entry = zswap_rb_search(root, offset);
|
||||
if (entry)
|
||||
zswap_entry_get(entry);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/*********************************
|
||||
* per-cpu code
|
||||
**********************************/
|
||||
@@ -368,18 +411,6 @@ static bool zswap_is_full(void)
|
||||
zswap_pool_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Carries out the common pattern of freeing and entry's zsmalloc allocation,
|
||||
* freeing the entry itself, and decrementing the number of stored pages.
|
||||
*/
|
||||
static void zswap_free_entry(struct zswap_tree *tree, struct zswap_entry *entry)
|
||||
{
|
||||
zbud_free(tree->pool, entry->handle);
|
||||
zswap_entry_cache_free(entry);
|
||||
atomic_dec(&zswap_stored_pages);
|
||||
zswap_pool_pages = zbud_get_pool_size(tree->pool);
|
||||
}
|
||||
|
||||
/*********************************
|
||||
* writeback code
|
||||
**********************************/
|
||||
@@ -387,7 +418,7 @@ static void zswap_free_entry(struct zswap_tree *tree, struct zswap_entry *entry)
|
||||
enum zswap_get_swap_ret {
|
||||
ZSWAP_SWAPCACHE_NEW,
|
||||
ZSWAP_SWAPCACHE_EXIST,
|
||||
ZSWAP_SWAPCACHE_NOMEM
|
||||
ZSWAP_SWAPCACHE_FAIL,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -401,9 +432,10 @@ enum zswap_get_swap_ret {
|
||||
* added to the swap cache, and returned in retpage.
|
||||
*
|
||||
* If success, the swap cache page is returned in retpage
|
||||
* Returns 0 if page was already in the swap cache, page is not locked
|
||||
* Returns 1 if the new page needs to be populated, page is locked
|
||||
* Returns <0 on error
|
||||
* Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
|
||||
* Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
|
||||
* the new page is added to swapcache and locked
|
||||
* Returns ZSWAP_SWAPCACHE_FAIL on error
|
||||
*/
|
||||
static int zswap_get_swap_cache_page(swp_entry_t entry,
|
||||
struct page **retpage)
|
||||
@@ -475,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
|
||||
if (new_page)
|
||||
page_cache_release(new_page);
|
||||
if (!found_page)
|
||||
return ZSWAP_SWAPCACHE_NOMEM;
|
||||
return ZSWAP_SWAPCACHE_FAIL;
|
||||
*retpage = found_page;
|
||||
return ZSWAP_SWAPCACHE_EXIST;
|
||||
}
|
||||
@@ -502,7 +534,7 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
|
||||
struct page *page;
|
||||
u8 *src, *dst;
|
||||
unsigned int dlen;
|
||||
int ret, refcount;
|
||||
int ret;
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
};
|
||||
@@ -517,23 +549,22 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
|
||||
|
||||
/* find and ref zswap entry */
|
||||
spin_lock(&tree->lock);
|
||||
entry = zswap_rb_search(&tree->rbroot, offset);
|
||||
entry = zswap_entry_find_get(&tree->rbroot, offset);
|
||||
if (!entry) {
|
||||
/* entry was invalidated */
|
||||
spin_unlock(&tree->lock);
|
||||
return 0;
|
||||
}
|
||||
zswap_entry_get(entry);
|
||||
spin_unlock(&tree->lock);
|
||||
BUG_ON(offset != entry->offset);
|
||||
|
||||
/* try to allocate swap cache page */
|
||||
switch (zswap_get_swap_cache_page(swpentry, &page)) {
|
||||
case ZSWAP_SWAPCACHE_NOMEM: /* no memory */
|
||||
case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
|
||||
case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */
|
||||
case ZSWAP_SWAPCACHE_EXIST:
|
||||
/* page is already in the swap cache, ignore for now */
|
||||
page_cache_release(page);
|
||||
ret = -EEXIST;
|
||||
@@ -556,43 +587,44 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
/* move it to the tail of the inactive list after end_writeback */
|
||||
SetPageReclaim(page);
|
||||
|
||||
/* start writeback */
|
||||
__swap_writepage(page, &wbc, end_swap_bio_write);
|
||||
page_cache_release(page);
|
||||
zswap_written_back_pages++;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
|
||||
/* drop local reference */
|
||||
zswap_entry_put(entry);
|
||||
/* drop the initial reference from entry creation */
|
||||
refcount = zswap_entry_put(entry);
|
||||
zswap_entry_put(tree, entry);
|
||||
|
||||
/*
|
||||
* There are three possible values for refcount here:
|
||||
* (1) refcount is 1, load is in progress, unlink from rbtree,
|
||||
* load will free
|
||||
* (2) refcount is 0, (normal case) entry is valid,
|
||||
* remove from rbtree and free entry
|
||||
* (3) refcount is -1, invalidate happened during writeback,
|
||||
* free entry
|
||||
*/
|
||||
if (refcount >= 0) {
|
||||
/* no invalidate yet, remove from rbtree */
|
||||
rb_erase(&entry->rbnode, &tree->rbroot);
|
||||
}
|
||||
* There are two possible situations for entry here:
|
||||
* (1) refcount is 1(normal case), entry is valid and on the tree
|
||||
* (2) refcount is 0, entry is freed and not on the tree
|
||||
* because invalidate happened during writeback
|
||||
* search the tree and free the entry if find entry
|
||||
*/
|
||||
if (entry == zswap_rb_search(&tree->rbroot, offset))
|
||||
zswap_entry_put(tree, entry);
|
||||
spin_unlock(&tree->lock);
|
||||
if (refcount <= 0) {
|
||||
/* free the entry */
|
||||
zswap_free_entry(tree, entry);
|
||||
return 0;
|
||||
}
|
||||
return -EAGAIN;
|
||||
|
||||
goto end;
|
||||
|
||||
/*
|
||||
* if we get here due to ZSWAP_SWAPCACHE_EXIST
|
||||
* a load may happening concurrently
|
||||
* it is safe and okay to not free the entry
|
||||
* if we free the entry in the following put
|
||||
* it it either okay to return !0
|
||||
*/
|
||||
fail:
|
||||
spin_lock(&tree->lock);
|
||||
zswap_entry_put(entry);
|
||||
zswap_entry_put(tree, entry);
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
end:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -676,11 +708,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
|
||||
if (ret == -EEXIST) {
|
||||
zswap_duplicate_entry++;
|
||||
/* remove from rbtree */
|
||||
rb_erase(&dupentry->rbnode, &tree->rbroot);
|
||||
if (!zswap_entry_put(dupentry)) {
|
||||
/* free */
|
||||
zswap_free_entry(tree, dupentry);
|
||||
}
|
||||
zswap_rb_erase(&tree->rbroot, dupentry);
|
||||
zswap_entry_put(tree, dupentry);
|
||||
}
|
||||
} while (ret == -EEXIST);
|
||||
spin_unlock(&tree->lock);
|
||||
@@ -709,17 +738,16 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
|
||||
struct zswap_entry *entry;
|
||||
u8 *src, *dst;
|
||||
unsigned int dlen;
|
||||
int refcount, ret;
|
||||
int ret;
|
||||
|
||||
/* find */
|
||||
spin_lock(&tree->lock);
|
||||
entry = zswap_rb_search(&tree->rbroot, offset);
|
||||
entry = zswap_entry_find_get(&tree->rbroot, offset);
|
||||
if (!entry) {
|
||||
/* entry was written back */
|
||||
spin_unlock(&tree->lock);
|
||||
return -1;
|
||||
}
|
||||
zswap_entry_get(entry);
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
/* decompress */
|
||||
@@ -734,22 +762,9 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
|
||||
BUG_ON(ret);
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
refcount = zswap_entry_put(entry);
|
||||
if (likely(refcount)) {
|
||||
spin_unlock(&tree->lock);
|
||||
return 0;
|
||||
}
|
||||
zswap_entry_put(tree, entry);
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
/*
|
||||
* We don't have to unlink from the rbtree because
|
||||
* zswap_writeback_entry() or zswap_frontswap_invalidate page()
|
||||
* has already done this for us if we are the last reference.
|
||||
*/
|
||||
/* free */
|
||||
|
||||
zswap_free_entry(tree, entry);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -758,7 +773,6 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
|
||||
{
|
||||
struct zswap_tree *tree = zswap_trees[type];
|
||||
struct zswap_entry *entry;
|
||||
int refcount;
|
||||
|
||||
/* find */
|
||||
spin_lock(&tree->lock);
|
||||
@@ -770,20 +784,12 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
|
||||
}
|
||||
|
||||
/* remove from rbtree */
|
||||
rb_erase(&entry->rbnode, &tree->rbroot);
|
||||
zswap_rb_erase(&tree->rbroot, entry);
|
||||
|
||||
/* drop the initial reference from entry creation */
|
||||
refcount = zswap_entry_put(entry);
|
||||
zswap_entry_put(tree, entry);
|
||||
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
if (refcount) {
|
||||
/* writeback in progress, writeback will free */
|
||||
return;
|
||||
}
|
||||
|
||||
/* free */
|
||||
zswap_free_entry(tree, entry);
|
||||
}
|
||||
|
||||
/* frees all zswap entries for the given swap type */
|
||||
@@ -797,11 +803,8 @@ static void zswap_frontswap_invalidate_area(unsigned type)
|
||||
|
||||
/* walk the tree and free everything */
|
||||
spin_lock(&tree->lock);
|
||||
rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) {
|
||||
zbud_free(tree->pool, entry->handle);
|
||||
zswap_entry_cache_free(entry);
|
||||
atomic_dec(&zswap_stored_pages);
|
||||
}
|
||||
rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
|
||||
zswap_free_entry(tree, entry);
|
||||
tree->rbroot = RB_ROOT;
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
|
Reference in New Issue
Block a user