Merge branch 'slab/urgent' into slab/next
This commit is contained in:
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
static int __init failslab_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dir;
|
||||
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
|
||||
int err;
|
||||
|
||||
err = init_fault_attr_dentries(&failslab.attr, "failslab");
|
||||
if (err)
|
||||
return err;
|
||||
dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir,
|
||||
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
|
||||
&failslab.ignore_gfp_wait))
|
||||
goto fail;
|
||||
if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir,
|
||||
if (!debugfs_create_bool("cache-filter", mode, dir,
|
||||
&failslab.cache_filter))
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
cleanup_fault_attr_dentries(&failslab.attr);
|
||||
debugfs_remove_recursive(dir);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
106
mm/filemap.c
106
mm/filemap.c
@@ -33,7 +33,6 @@
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/mm_inline.h> /* for page_is_file_cache() */
|
||||
#include <linux/cleancache.h>
|
||||
#include "internal.h"
|
||||
|
||||
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
|
||||
int error;
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(PageSwapBacked(page));
|
||||
|
||||
error = mem_cgroup_cache_charge(page, current->mm,
|
||||
gfp_mask & GFP_RECLAIM_MASK);
|
||||
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
|
||||
if (likely(!error)) {
|
||||
mapping->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
if (PageSwapBacked(page))
|
||||
__inc_zone_page_state(page, NR_SHMEM);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
} else {
|
||||
page->mapping = NULL;
|
||||
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Splice_read and readahead add shmem/tmpfs pages into the page cache
|
||||
* before shmem_readpage has a chance to mark them as SwapBacked: they
|
||||
* need to go on the anon lru below, and mem_cgroup_cache_charge
|
||||
* (called in add_to_page_cache) needs to know where they're going too.
|
||||
*/
|
||||
if (mapping_cap_swap_backed(mapping))
|
||||
SetPageSwapBacked(page);
|
||||
|
||||
ret = add_to_page_cache(page, mapping, offset, gfp_mask);
|
||||
if (ret == 0) {
|
||||
if (page_is_file_cache(page))
|
||||
lru_cache_add_file(page);
|
||||
else
|
||||
lru_cache_add_anon(page);
|
||||
}
|
||||
if (ret == 0)
|
||||
lru_cache_add_file(page);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
|
||||
@@ -714,9 +699,16 @@ repeat:
|
||||
page = radix_tree_deref_slot(pagep);
|
||||
if (unlikely(!page))
|
||||
goto out;
|
||||
if (radix_tree_deref_retry(page))
|
||||
goto repeat;
|
||||
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_deref_retry(page))
|
||||
goto repeat;
|
||||
/*
|
||||
* Otherwise, shmem/tmpfs must be storing a swap entry
|
||||
* here as an exceptional entry: so return it without
|
||||
* attempting to raise page count.
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
||||
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
|
||||
|
||||
repeat:
|
||||
page = find_get_page(mapping, offset);
|
||||
if (page) {
|
||||
if (page && !radix_tree_exception(page)) {
|
||||
lock_page(page);
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||
(void ***)pages, start, nr_pages);
|
||||
(void ***)pages, NULL, start, nr_pages);
|
||||
ret = 0;
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
struct page *page;
|
||||
@@ -849,13 +841,22 @@ repeat:
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page)) {
|
||||
WARN_ON(start | i);
|
||||
goto restart;
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_deref_retry(page)) {
|
||||
/*
|
||||
* Transient condition which can only trigger
|
||||
* when entry at index 0 moves out of or back
|
||||
* to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
WARN_ON(start | i);
|
||||
goto restart;
|
||||
}
|
||||
/*
|
||||
* Otherwise, shmem/tmpfs must be storing a swap entry
|
||||
* here as an exceptional entry: so skip over it -
|
||||
* we only reach this from invalidate_mapping_pages().
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
|
||||
rcu_read_lock();
|
||||
restart:
|
||||
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
||||
(void ***)pages, index, nr_pages);
|
||||
(void ***)pages, NULL, index, nr_pages);
|
||||
ret = 0;
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
struct page *page;
|
||||
@@ -912,12 +913,22 @@ repeat:
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page))
|
||||
goto restart;
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_deref_retry(page)) {
|
||||
/*
|
||||
* Transient condition which can only trigger
|
||||
* when entry at index 0 moves out of or back
|
||||
* to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
goto restart;
|
||||
}
|
||||
/*
|
||||
* Otherwise, shmem/tmpfs must be storing a swap entry
|
||||
* here as an exceptional entry: so stop looking for
|
||||
* contiguous pages.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
@@ -977,12 +988,21 @@ repeat:
|
||||
if (unlikely(!page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This can only trigger when the entry at index 0 moves out
|
||||
* of or back to the root: none yet gotten, safe to restart.
|
||||
*/
|
||||
if (radix_tree_deref_retry(page))
|
||||
goto restart;
|
||||
if (radix_tree_exception(page)) {
|
||||
if (radix_tree_deref_retry(page)) {
|
||||
/*
|
||||
* Transient condition which can only trigger
|
||||
* when entry at index 0 moves out of or back
|
||||
* to root: none yet gotten, safe to restart.
|
||||
*/
|
||||
goto restart;
|
||||
}
|
||||
/*
|
||||
* This function is never used on a shmem/tmpfs
|
||||
* mapping, so a swap entry won't be found here.
|
||||
*/
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
goto repeat;
|
||||
|
@@ -35,7 +35,6 @@
|
||||
#include <linux/limits.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
|
||||
return 0;
|
||||
if (PageCompound(page))
|
||||
return 0;
|
||||
/*
|
||||
* Corner case handling. This is called from add_to_page_cache()
|
||||
* in usual. But some FS (shmem) precharges this page before calling it
|
||||
* and call add_to_page_cache() with GFP_NOWAIT.
|
||||
*
|
||||
* For GFP_NOWAIT case, the page may be pre-charged before calling
|
||||
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
|
||||
* charge twice. (It works but has to pay a bit larger cost.)
|
||||
* And when the page is SwapCache, it should take swap information
|
||||
* into account. This is under lock_page() now.
|
||||
*/
|
||||
if (!(gfp_mask & __GFP_WAIT)) {
|
||||
struct page_cgroup *pc;
|
||||
|
||||
pc = lookup_page_cgroup(page);
|
||||
if (!pc)
|
||||
return 0;
|
||||
lock_page_cgroup(pc);
|
||||
if (PageCgroupUsed(pc)) {
|
||||
unlock_page_cgroup(pc);
|
||||
return 0;
|
||||
}
|
||||
unlock_page_cgroup(pc);
|
||||
}
|
||||
|
||||
if (unlikely(!mm))
|
||||
mm = &init_mm;
|
||||
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
|
||||
cgroup_release_and_wakeup_rmdir(&mem->css);
|
||||
}
|
||||
|
||||
/*
|
||||
* A call to try to shrink memory usage on charge failure at shmem's swapin.
|
||||
* Calling hierarchical_reclaim is not enough because we should update
|
||||
* last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
|
||||
* Moreover considering hierarchy, we should reclaim from the mem_over_limit,
|
||||
* not from the memcg which this page would be charged to.
|
||||
* try_charge_swapin does all of these works properly.
|
||||
*/
|
||||
int mem_cgroup_shmem_charge_fallback(struct page *page,
|
||||
struct mm_struct *mm,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct mem_cgroup *mem;
|
||||
int ret;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return 0;
|
||||
|
||||
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
|
||||
if (!ret)
|
||||
mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
|
||||
{
|
||||
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
||||
pgoff = pte_to_pgoff(ptent);
|
||||
|
||||
/* page is moved even if it's not RSS of this task(page-faulted). */
|
||||
if (!mapping_cap_swap_backed(mapping)) { /* normal file */
|
||||
page = find_get_page(mapping, pgoff);
|
||||
} else { /* shmem/tmpfs file. we should take account of swap too. */
|
||||
swp_entry_t ent;
|
||||
mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
|
||||
if (do_swap_account)
|
||||
entry->val = ent.val;
|
||||
}
|
||||
page = find_get_page(mapping, pgoff);
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
/* shmem/tmpfs may report page out on swap: account for that too. */
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
swp_entry_t swap = radix_to_swp_entry(page);
|
||||
if (do_swap_account)
|
||||
*entry = swap;
|
||||
page = find_get_page(&swapper_space, swap.val);
|
||||
}
|
||||
#endif
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@@ -53,6 +53,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/memory_hotplug.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include "internal.h"
|
||||
|
||||
int sysctl_memory_failure_early_kill __read_mostly = 0;
|
||||
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
|
||||
__memory_failure(pfn, trapno, 0);
|
||||
}
|
||||
|
||||
#define MEMORY_FAILURE_FIFO_ORDER 4
|
||||
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
|
||||
|
||||
struct memory_failure_entry {
|
||||
unsigned long pfn;
|
||||
int trapno;
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct memory_failure_cpu {
|
||||
DECLARE_KFIFO(fifo, struct memory_failure_entry,
|
||||
MEMORY_FAILURE_FIFO_SIZE);
|
||||
spinlock_t lock;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
|
||||
|
||||
/**
|
||||
* memory_failure_queue - Schedule handling memory failure of a page.
|
||||
* @pfn: Page Number of the corrupted page
|
||||
* @trapno: Trap number reported in the signal to user space.
|
||||
* @flags: Flags for memory failure handling
|
||||
*
|
||||
* This function is called by the low level hardware error handler
|
||||
* when it detects hardware memory corruption of a page. It schedules
|
||||
* the recovering of error page, including dropping pages, killing
|
||||
* processes etc.
|
||||
*
|
||||
* The function is primarily of use for corruptions that
|
||||
* happen outside the current execution context (e.g. when
|
||||
* detected by a background scrubber)
|
||||
*
|
||||
* Can run in IRQ context.
|
||||
*/
|
||||
void memory_failure_queue(unsigned long pfn, int trapno, int flags)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
unsigned long proc_flags;
|
||||
struct memory_failure_entry entry = {
|
||||
.pfn = pfn,
|
||||
.trapno = trapno,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
mf_cpu = &get_cpu_var(memory_failure_cpu);
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
if (kfifo_put(&mf_cpu->fifo, &entry))
|
||||
schedule_work_on(smp_processor_id(), &mf_cpu->work);
|
||||
else
|
||||
pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
|
||||
pfn);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
put_cpu_var(memory_failure_cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_failure_queue);
|
||||
|
||||
static void memory_failure_work_func(struct work_struct *work)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
struct memory_failure_entry entry = { 0, };
|
||||
unsigned long proc_flags;
|
||||
int gotten;
|
||||
|
||||
mf_cpu = &__get_cpu_var(memory_failure_cpu);
|
||||
for (;;) {
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
gotten = kfifo_get(&mf_cpu->fifo, &entry);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
if (!gotten)
|
||||
break;
|
||||
__memory_failure(entry.pfn, entry.trapno, entry.flags);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init memory_failure_init(void)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
|
||||
spin_lock_init(&mf_cpu->lock);
|
||||
INIT_KFIFO(mf_cpu->fifo);
|
||||
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
core_initcall(memory_failure_init);
|
||||
|
||||
/**
|
||||
* unpoison_memory - Unpoison a previously poisoned page
|
||||
* @pfn: Page number of the to be unpoisoned page
|
||||
|
11
mm/mincore.c
11
mm/mincore.c
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
|
||||
* file will not get a swp_entry_t in its pte, but rather it is like
|
||||
* any other file mapping (ie. marked !present and faulted in with
|
||||
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
|
||||
*
|
||||
* However when tmpfs moves the page from pagecache and into swapcache,
|
||||
* it is still in core, but the find_get_page below won't find it.
|
||||
* No big deal, but make a note of it.
|
||||
*/
|
||||
page = find_get_page(mapping, pgoff);
|
||||
#ifdef CONFIG_SWAP
|
||||
/* shmem/tmpfs may return swap: account for swapcache page too. */
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
swp_entry_t swap = radix_to_swp_entry(page);
|
||||
page = find_get_page(&swapper_space, swap.val);
|
||||
}
|
||||
#endif
|
||||
if (page) {
|
||||
present = PageUptodate(page);
|
||||
page_cache_release(page);
|
||||
|
@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
|
||||
do_each_thread(g, p) {
|
||||
unsigned int points;
|
||||
|
||||
if (!p->mm)
|
||||
if (p->exit_state)
|
||||
continue;
|
||||
if (oom_unkillable_task(p, mem, nodemask))
|
||||
continue;
|
||||
@@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
|
||||
*/
|
||||
if (test_tsk_thread_flag(p, TIF_MEMDIE))
|
||||
return ERR_PTR(-1UL);
|
||||
if (!p->mm)
|
||||
continue;
|
||||
|
||||
if (p->flags & PF_EXITING) {
|
||||
/*
|
||||
|
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
|
||||
{
|
||||
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
|
||||
struct dentry *dir;
|
||||
int err;
|
||||
|
||||
err = init_fault_attr_dentries(&fail_page_alloc.attr,
|
||||
"fail_page_alloc");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
dir = fail_page_alloc.attr.dir;
|
||||
dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
|
||||
&fail_page_alloc.attr);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
|
||||
&fail_page_alloc.ignore_gfp_wait))
|
||||
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
cleanup_fault_attr_dentries(&fail_page_alloc.attr);
|
||||
debugfs_remove_recursive(dir);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
1527
mm/shmem.c
1527
mm/shmem.c
File diff suppressed because it is too large
Load Diff
99
mm/slab.c
99
mm/slab.c
@@ -622,6 +622,51 @@ int slab_is_available(void)
|
||||
static struct lock_class_key on_slab_l3_key;
|
||||
static struct lock_class_key on_slab_alc_key;
|
||||
|
||||
static struct lock_class_key debugobj_l3_key;
|
||||
static struct lock_class_key debugobj_alc_key;
|
||||
|
||||
static void slab_set_lock_classes(struct kmem_cache *cachep,
|
||||
struct lock_class_key *l3_key, struct lock_class_key *alc_key,
|
||||
int q)
|
||||
{
|
||||
struct array_cache **alc;
|
||||
struct kmem_list3 *l3;
|
||||
int r;
|
||||
|
||||
l3 = cachep->nodelists[q];
|
||||
if (!l3)
|
||||
return;
|
||||
|
||||
lockdep_set_class(&l3->list_lock, l3_key);
|
||||
alc = l3->alien;
|
||||
/*
|
||||
* FIXME: This check for BAD_ALIEN_MAGIC
|
||||
* should go away when common slab code is taught to
|
||||
* work even without alien caches.
|
||||
* Currently, non NUMA code returns BAD_ALIEN_MAGIC
|
||||
* for alloc_alien_cache,
|
||||
*/
|
||||
if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
|
||||
return;
|
||||
for_each_node(r) {
|
||||
if (alc[r])
|
||||
lockdep_set_class(&alc[r]->lock, alc_key);
|
||||
}
|
||||
}
|
||||
|
||||
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
|
||||
{
|
||||
slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
|
||||
}
|
||||
|
||||
static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
|
||||
{
|
||||
int node;
|
||||
|
||||
for_each_online_node(node)
|
||||
slab_set_debugobj_lock_classes_node(cachep, node);
|
||||
}
|
||||
|
||||
static void init_node_lock_keys(int q)
|
||||
{
|
||||
struct cache_sizes *s = malloc_sizes;
|
||||
@@ -630,29 +675,14 @@ static void init_node_lock_keys(int q)
|
||||
return;
|
||||
|
||||
for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
|
||||
struct array_cache **alc;
|
||||
struct kmem_list3 *l3;
|
||||
int r;
|
||||
|
||||
l3 = s->cs_cachep->nodelists[q];
|
||||
if (!l3 || OFF_SLAB(s->cs_cachep))
|
||||
continue;
|
||||
lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
|
||||
alc = l3->alien;
|
||||
/*
|
||||
* FIXME: This check for BAD_ALIEN_MAGIC
|
||||
* should go away when common slab code is taught to
|
||||
* work even without alien caches.
|
||||
* Currently, non NUMA code returns BAD_ALIEN_MAGIC
|
||||
* for alloc_alien_cache,
|
||||
*/
|
||||
if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
|
||||
continue;
|
||||
for_each_node(r) {
|
||||
if (alc[r])
|
||||
lockdep_set_class(&alc[r]->lock,
|
||||
&on_slab_alc_key);
|
||||
}
|
||||
|
||||
slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
|
||||
&on_slab_alc_key, q);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -671,6 +701,14 @@ static void init_node_lock_keys(int q)
|
||||
static inline void init_lock_keys(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
|
||||
{
|
||||
}
|
||||
|
||||
static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu)
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
kfree(shared);
|
||||
free_alien_cache(alien);
|
||||
if (cachep->flags & SLAB_DEBUG_OBJECTS)
|
||||
slab_set_debugobj_lock_classes_node(cachep, node);
|
||||
}
|
||||
init_node_lock_keys(node);
|
||||
|
||||
@@ -1626,6 +1666,9 @@ void __init kmem_cache_init_late(void)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
|
||||
/* Annotate slab for lockdep -- annotate the malloc caches */
|
||||
init_lock_keys();
|
||||
|
||||
/* 6) resize the head arrays to their final sizes */
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
list_for_each_entry(cachep, &cache_chain, next)
|
||||
@@ -1636,9 +1679,6 @@ void __init kmem_cache_init_late(void)
|
||||
/* Done! */
|
||||
g_cpucache_up = FULL;
|
||||
|
||||
/* Annotate slab for lockdep -- annotate the malloc caches */
|
||||
init_lock_keys();
|
||||
|
||||
/*
|
||||
* Register a cpu startup notifier callback that initializes
|
||||
* cpu_cache_get for all new cpus
|
||||
@@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align,
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (flags & SLAB_DEBUG_OBJECTS) {
|
||||
/*
|
||||
* Would deadlock through slab_destroy()->call_rcu()->
|
||||
* debug_object_activate()->kmem_cache_alloc().
|
||||
*/
|
||||
WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
|
||||
|
||||
slab_set_debugobj_lock_classes(cachep);
|
||||
}
|
||||
|
||||
/* cache setup completed, link it into the list */
|
||||
list_add(&cachep->next, &cache_chain);
|
||||
oops:
|
||||
@@ -3398,7 +3448,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
|
||||
cache_alloc_debugcheck_before(cachep, flags);
|
||||
local_irq_save(save_flags);
|
||||
|
||||
if (nodeid == -1)
|
||||
if (nodeid == NUMA_NO_NODE)
|
||||
nodeid = slab_node;
|
||||
|
||||
if (unlikely(!cachep->nodelists[nodeid])) {
|
||||
@@ -3929,7 +3979,7 @@ fail:
|
||||
|
||||
struct ccupdate_struct {
|
||||
struct kmem_cache *cachep;
|
||||
struct array_cache *new[NR_CPUS];
|
||||
struct array_cache *new[0];
|
||||
};
|
||||
|
||||
static void do_ccupdate_local(void *info)
|
||||
@@ -3951,7 +4001,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
struct ccupdate_struct *new;
|
||||
int i;
|
||||
|
||||
new = kzalloc(sizeof(*new), gfp);
|
||||
new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
|
||||
gfp);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
|
22
mm/slub.c
22
mm/slub.c
@@ -675,7 +675,7 @@ static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
|
||||
return check_bytes8(start, value, bytes);
|
||||
|
||||
value64 = value | value << 8 | value << 16 | value << 24;
|
||||
value64 = value64 | value64 << 32;
|
||||
value64 = (value64 & 0xffffffff) | value64 << 32;
|
||||
prefix = 8 - ((unsigned long)start) % 8;
|
||||
|
||||
if (prefix) {
|
||||
@@ -1508,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
|
||||
struct page *page, int tail)
|
||||
{
|
||||
n->nr_partial++;
|
||||
if (tail)
|
||||
if (tail == DEACTIVATE_TO_TAIL)
|
||||
list_add_tail(&page->lru, &n->partial);
|
||||
else
|
||||
list_add(&page->lru, &n->partial);
|
||||
@@ -1755,13 +1755,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
|
||||
enum slab_modes l = M_NONE, m = M_NONE;
|
||||
void *freelist;
|
||||
void *nextfree;
|
||||
int tail = 0;
|
||||
int tail = DEACTIVATE_TO_HEAD;
|
||||
struct page new;
|
||||
struct page old;
|
||||
|
||||
if (page->freelist) {
|
||||
stat(s, DEACTIVATE_REMOTE_FREES);
|
||||
tail = 1;
|
||||
tail = DEACTIVATE_TO_TAIL;
|
||||
}
|
||||
|
||||
c->tid = next_tid(c->tid);
|
||||
@@ -1828,7 +1828,7 @@ redo:
|
||||
|
||||
new.frozen = 0;
|
||||
|
||||
if (!new.inuse && n->nr_partial < s->min_partial)
|
||||
if (!new.inuse && n->nr_partial > s->min_partial)
|
||||
m = M_FREE;
|
||||
else if (new.freelist) {
|
||||
m = M_PARTIAL;
|
||||
@@ -1867,7 +1867,7 @@ redo:
|
||||
if (m == M_PARTIAL) {
|
||||
|
||||
add_partial(n, page, tail);
|
||||
stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
|
||||
stat(s, tail);
|
||||
|
||||
} else if (m == M_FULL) {
|
||||
|
||||
@@ -2351,7 +2351,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
*/
|
||||
if (unlikely(!prior)) {
|
||||
remove_full(s, page);
|
||||
add_partial(n, page, 0);
|
||||
add_partial(n, page, DEACTIVATE_TO_TAIL);
|
||||
stat(s, FREE_ADD_PARTIAL);
|
||||
}
|
||||
}
|
||||
@@ -2361,11 +2361,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
slab_empty:
|
||||
if (prior) {
|
||||
/*
|
||||
* Slab still on the partial list.
|
||||
* Slab on the partial list.
|
||||
*/
|
||||
remove_partial(n, page);
|
||||
stat(s, FREE_REMOVE_PARTIAL);
|
||||
}
|
||||
} else
|
||||
/* Slab must be on the full list */
|
||||
remove_full(s, page);
|
||||
|
||||
spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
stat(s, FREE_SLAB);
|
||||
@@ -2667,7 +2669,7 @@ static void early_kmem_cache_node_alloc(int node)
|
||||
init_kmem_cache_node(n, kmem_cache_node);
|
||||
inc_slabs_node(kmem_cache_node, node, page->objects);
|
||||
|
||||
add_partial(n, page, 0);
|
||||
add_partial(n, page, DEACTIVATE_TO_HEAD);
|
||||
}
|
||||
|
||||
static void free_kmem_cache_nodes(struct kmem_cache *s)
|
||||
|
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
|
||||
|
||||
/*
|
||||
* Find out how many pages are allowed for a single swap
|
||||
* device. There are two limiting factors: 1) the number of
|
||||
* bits for the swap offset in the swp_entry_t type and
|
||||
* 2) the number of bits in the a swap pte as defined by
|
||||
* the different architectures. In order to find the
|
||||
* largest possible bit mask a swap entry with swap type 0
|
||||
* device. There are three limiting factors: 1) the number
|
||||
* of bits for the swap offset in the swp_entry_t type, and
|
||||
* 2) the number of bits in the swap pte as defined by the
|
||||
* the different architectures, and 3) the number of free bits
|
||||
* in an exceptional radix_tree entry. In order to find the
|
||||
* largest possible bit mask, a swap entry with swap type 0
|
||||
* and swap offset ~0UL is created, encoded to a swap pte,
|
||||
* decoded to a swp_entry_t again and finally the swap
|
||||
* decoded to a swp_entry_t again, and finally the swap
|
||||
* offset is extracted. This will mask all the bits from
|
||||
* the initial ~0UL mask that can't be encoded in either
|
||||
* the swp_entry_t or the architecture definition of a
|
||||
* swap pte.
|
||||
* swap pte. Then the same is done for a radix_tree entry.
|
||||
*/
|
||||
maxpages = swp_offset(pte_to_swp_entry(
|
||||
swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
|
||||
swp_entry_to_pte(swp_entry(0, ~0UL))));
|
||||
maxpages = swp_offset(radix_to_swp_entry(
|
||||
swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
|
||||
|
||||
if (maxpages > swap_header->info.last_page) {
|
||||
maxpages = swap_header->info.last_page + 1;
|
||||
/* p->max is an unsigned int: don't overflow it */
|
||||
|
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
||||
unsigned long count = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Note: this function may get called on a shmem/tmpfs mapping:
|
||||
* pagevec_lookup() might then return 0 prematurely (because it
|
||||
* got a gangful of swap entries); but it's hardly worth worrying
|
||||
* about - it can rarely have anything to free from such a mapping
|
||||
* (most pages are dirty), and already skips over any difficulties.
|
||||
*/
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
while (index <= end && pagevec_lookup(&pvec, mapping, index,
|
||||
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
|
||||
|
Reference in New Issue
Block a user