Merge branch 'slab/urgent' into slab/next

This commit is contained in:
Pekka Enberg
2011-09-19 17:46:07 +03:00
880 changed files with 40706 additions and 13347 deletions

View File

@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init failslab_debugfs_init(void)
{
struct dentry *dir;
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
int err;
err = init_fault_attr_dentries(&failslab.attr, "failslab");
if (err)
return err;
dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir,
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
&failslab.ignore_gfp_wait))
goto fail;
if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir,
if (!debugfs_create_bool("cache-filter", mode, dir,
&failslab.cache_filter))
goto fail;
return 0;
fail:
cleanup_fault_attr_dentries(&failslab.attr);
debugfs_remove_recursive(dir);
return -ENOMEM;
}

View File

@@ -33,7 +33,6 @@
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
#include <linux/mm_inline.h> /* for page_is_file_cache() */
#include <linux/cleancache.h>
#include "internal.h"
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
int error;
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(PageSwapBacked(page));
error = mem_cgroup_cache_charge(page, current->mm,
gfp_mask & GFP_RECLAIM_MASK);
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
if (PageSwapBacked(page))
__inc_zone_page_state(page, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
{
int ret;
/*
* Splice_read and readahead add shmem/tmpfs pages into the page cache
* before shmem_readpage has a chance to mark them as SwapBacked: they
* need to go on the anon lru below, and mem_cgroup_cache_charge
* (called in add_to_page_cache) needs to know where they're going too.
*/
if (mapping_cap_swap_backed(mapping))
SetPageSwapBacked(page);
ret = add_to_page_cache(page, mapping, offset, gfp_mask);
if (ret == 0) {
if (page_is_file_cache(page))
lru_cache_add_file(page);
else
lru_cache_add_anon(page);
}
if (ret == 0)
lru_cache_add_file(page);
return ret;
}
EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -714,9 +699,16 @@ repeat:
page = radix_tree_deref_slot(pagep);
if (unlikely(!page))
goto out;
if (radix_tree_deref_retry(page))
goto repeat;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page))
goto repeat;
/*
* Otherwise, shmem/tmpfs must be storing a swap entry
* here as an exceptional entry: so return it without
* attempting to raise page count.
*/
goto out;
}
if (!page_cache_get_speculative(page))
goto repeat;
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
repeat:
page = find_get_page(mapping, offset);
if (page) {
if (page && !radix_tree_exception(page)) {
lock_page(page);
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
(void ***)pages, start, nr_pages);
(void ***)pages, NULL, start, nr_pages);
ret = 0;
for (i = 0; i < nr_found; i++) {
struct page *page;
@@ -849,13 +841,22 @@ repeat:
if (unlikely(!page))
continue;
/*
* This can only trigger when the entry at index 0 moves out
* of or back to the root: none yet gotten, safe to restart.
*/
if (radix_tree_deref_retry(page)) {
WARN_ON(start | i);
goto restart;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
/*
* Transient condition which can only trigger
* when entry at index 0 moves out of or back
* to root: none yet gotten, safe to restart.
*/
WARN_ON(start | i);
goto restart;
}
/*
* Otherwise, shmem/tmpfs must be storing a swap entry
* here as an exceptional entry: so skip over it -
* we only reach this from invalidate_mapping_pages().
*/
continue;
}
if (!page_cache_get_speculative(page))
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
(void ***)pages, index, nr_pages);
(void ***)pages, NULL, index, nr_pages);
ret = 0;
for (i = 0; i < nr_found; i++) {
struct page *page;
@@ -912,12 +913,22 @@ repeat:
if (unlikely(!page))
continue;
/*
* This can only trigger when the entry at index 0 moves out
* of or back to the root: none yet gotten, safe to restart.
*/
if (radix_tree_deref_retry(page))
goto restart;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
/*
* Transient condition which can only trigger
* when entry at index 0 moves out of or back
* to root: none yet gotten, safe to restart.
*/
goto restart;
}
/*
* Otherwise, shmem/tmpfs must be storing a swap entry
* here as an exceptional entry: so stop looking for
* contiguous pages.
*/
break;
}
if (!page_cache_get_speculative(page))
goto repeat;
@@ -977,12 +988,21 @@ repeat:
if (unlikely(!page))
continue;
/*
* This can only trigger when the entry at index 0 moves out
* of or back to the root: none yet gotten, safe to restart.
*/
if (radix_tree_deref_retry(page))
goto restart;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
/*
* Transient condition which can only trigger
* when entry at index 0 moves out of or back
* to root: none yet gotten, safe to restart.
*/
goto restart;
}
/*
* This function is never used on a shmem/tmpfs
* mapping, so a swap entry won't be found here.
*/
BUG();
}
if (!page_cache_get_speculative(page))
goto repeat;

View File

@@ -35,7 +35,6 @@
#include <linux/limits.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
return 0;
if (PageCompound(page))
return 0;
/*
* Corner case handling. This is called from add_to_page_cache()
* in usual. But some FS (shmem) precharges this page before calling it
* and call add_to_page_cache() with GFP_NOWAIT.
*
* For GFP_NOWAIT case, the page may be pre-charged before calling
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
* charge twice. (It works but has to pay a bit larger cost.)
* And when the page is SwapCache, it should take swap information
* into account. This is under lock_page() now.
*/
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
if (!pc)
return 0;
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
unlock_page_cgroup(pc);
return 0;
}
unlock_page_cgroup(pc);
}
if (unlikely(!mm))
mm = &init_mm;
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
cgroup_release_and_wakeup_rmdir(&mem->css);
}
/*
* A call to try to shrink memory usage on charge failure at shmem's swapin.
* Calling hierarchical_reclaim is not enough because we should update
* last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
* Moreover considering hierarchy, we should reclaim from the mem_over_limit,
* not from the memcg which this page would be charged to.
* try_charge_swapin does all of these works properly.
*/
int mem_cgroup_shmem_charge_fallback(struct page *page,
struct mm_struct *mm,
gfp_t gfp_mask)
{
struct mem_cgroup *mem;
int ret;
if (mem_cgroup_disabled())
return 0;
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
return ret;
}
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
pgoff = pte_to_pgoff(ptent);
/* page is moved even if it's not RSS of this task(page-faulted). */
if (!mapping_cap_swap_backed(mapping)) { /* normal file */
page = find_get_page(mapping, pgoff);
} else { /* shmem/tmpfs file. we should take account of swap too. */
swp_entry_t ent;
mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
if (do_swap_account)
entry->val = ent.val;
}
page = find_get_page(mapping, pgoff);
#ifdef CONFIG_SWAP
/* shmem/tmpfs may report page out on swap: account for that too. */
if (radix_tree_exceptional_entry(page)) {
swp_entry_t swap = radix_to_swp_entry(page);
if (do_swap_account)
*entry = swap;
page = find_get_page(&swapper_space, swap.val);
}
#endif
return page;
}

View File

@@ -53,6 +53,7 @@
#include <linux/hugetlb.h>
#include <linux/memory_hotplug.h>
#include <linux/mm_inline.h>
#include <linux/kfifo.h>
#include "internal.h"
int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
__memory_failure(pfn, trapno, 0);
}
#define MEMORY_FAILURE_FIFO_ORDER 4
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
struct memory_failure_entry {
unsigned long pfn;
int trapno;
int flags;
};
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
struct work_struct work;
};
static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
/**
* memory_failure_queue - Schedule handling memory failure of a page.
* @pfn: Page Number of the corrupted page
* @trapno: Trap number reported in the signal to user space.
* @flags: Flags for memory failure handling
*
* This function is called by the low level hardware error handler
* when it detects hardware memory corruption of a page. It schedules
* the recovering of error page, including dropping pages, killing
* processes etc.
*
* The function is primarily of use for corruptions that
* happen outside the current execution context (e.g. when
* detected by a background scrubber)
*
* Can run in IRQ context.
*/
void memory_failure_queue(unsigned long pfn, int trapno, int flags)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
struct memory_failure_entry entry = {
.pfn = pfn,
.trapno = trapno,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, &entry))
schedule_work_on(smp_processor_id(), &mf_cpu->work);
else
pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
static void memory_failure_work_func(struct work_struct *work)
{
struct memory_failure_cpu *mf_cpu;
struct memory_failure_entry entry = { 0, };
unsigned long proc_flags;
int gotten;
mf_cpu = &__get_cpu_var(memory_failure_cpu);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
__memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}
static int __init memory_failure_init(void)
{
struct memory_failure_cpu *mf_cpu;
int cpu;
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
return 0;
}
core_initcall(memory_failure_init);
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page

View File

@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
* file will not get a swp_entry_t in its pte, but rather it is like
* any other file mapping (ie. marked !present and faulted in with
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
*
* However when tmpfs moves the page from pagecache and into swapcache,
* it is still in core, but the find_get_page below won't find it.
* No big deal, but make a note of it.
*/
page = find_get_page(mapping, pgoff);
#ifdef CONFIG_SWAP
/* shmem/tmpfs may return swap: account for swapcache page too. */
if (radix_tree_exceptional_entry(page)) {
swp_entry_t swap = radix_to_swp_entry(page);
page = find_get_page(&swapper_space, swap.val);
}
#endif
if (page) {
present = PageUptodate(page);
page_cache_release(page);

View File

@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
do_each_thread(g, p) {
unsigned int points;
if (!p->mm)
if (p->exit_state)
continue;
if (oom_unkillable_task(p, mem, nodemask))
continue;
@@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
*/
if (test_tsk_thread_flag(p, TIF_MEMDIE))
return ERR_PTR(-1UL);
if (!p->mm)
continue;
if (p->flags & PF_EXITING) {
/*

View File

@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
{
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
int err;
err = init_fault_attr_dentries(&fail_page_alloc.attr,
"fail_page_alloc");
if (err)
return err;
dir = fail_page_alloc.attr.dir;
dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
&fail_page_alloc.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
&fail_page_alloc.ignore_gfp_wait))
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
return 0;
fail:
cleanup_fault_attr_dentries(&fail_page_alloc.attr);
debugfs_remove_recursive(dir);
return -ENOMEM;
}

1527
mm/shmem.c

File diff suppressed because it is too large Load Diff

View File

@@ -622,6 +622,51 @@ int slab_is_available(void)
static struct lock_class_key on_slab_l3_key;
static struct lock_class_key on_slab_alc_key;
static struct lock_class_key debugobj_l3_key;
static struct lock_class_key debugobj_alc_key;
static void slab_set_lock_classes(struct kmem_cache *cachep,
struct lock_class_key *l3_key, struct lock_class_key *alc_key,
int q)
{
struct array_cache **alc;
struct kmem_list3 *l3;
int r;
l3 = cachep->nodelists[q];
if (!l3)
return;
lockdep_set_class(&l3->list_lock, l3_key);
alc = l3->alien;
/*
* FIXME: This check for BAD_ALIEN_MAGIC
* should go away when common slab code is taught to
* work even without alien caches.
* Currently, non NUMA code returns BAD_ALIEN_MAGIC
* for alloc_alien_cache,
*/
if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
return;
for_each_node(r) {
if (alc[r])
lockdep_set_class(&alc[r]->lock, alc_key);
}
}
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
{
slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
}
static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
{
int node;
for_each_online_node(node)
slab_set_debugobj_lock_classes_node(cachep, node);
}
static void init_node_lock_keys(int q)
{
struct cache_sizes *s = malloc_sizes;
@@ -630,29 +675,14 @@ static void init_node_lock_keys(int q)
return;
for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
struct array_cache **alc;
struct kmem_list3 *l3;
int r;
l3 = s->cs_cachep->nodelists[q];
if (!l3 || OFF_SLAB(s->cs_cachep))
continue;
lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
alc = l3->alien;
/*
* FIXME: This check for BAD_ALIEN_MAGIC
* should go away when common slab code is taught to
* work even without alien caches.
* Currently, non NUMA code returns BAD_ALIEN_MAGIC
* for alloc_alien_cache,
*/
if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
continue;
for_each_node(r) {
if (alc[r])
lockdep_set_class(&alc[r]->lock,
&on_slab_alc_key);
}
slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
&on_slab_alc_key, q);
}
}
@@ -671,6 +701,14 @@ static void init_node_lock_keys(int q)
static inline void init_lock_keys(void)
{
}
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
{
}
static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
{
}
#endif
/*
@@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu)
spin_unlock_irq(&l3->list_lock);
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
slab_set_debugobj_lock_classes_node(cachep, node);
}
init_node_lock_keys(node);
@@ -1626,6 +1666,9 @@ void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
/* 6) resize the head arrays to their final sizes */
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
@@ -1636,9 +1679,6 @@ void __init kmem_cache_init_late(void)
/* Done! */
g_cpucache_up = FULL;
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
/*
* Register a cpu startup notifier callback that initializes
* cpu_cache_get for all new cpus
@@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align,
goto oops;
}
if (flags & SLAB_DEBUG_OBJECTS) {
/*
* Would deadlock through slab_destroy()->call_rcu()->
* debug_object_activate()->kmem_cache_alloc().
*/
WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
slab_set_debugobj_lock_classes(cachep);
}
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
oops:
@@ -3398,7 +3448,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
if (nodeid == -1)
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
if (unlikely(!cachep->nodelists[nodeid])) {
@@ -3929,7 +3979,7 @@ fail:
struct ccupdate_struct {
struct kmem_cache *cachep;
struct array_cache *new[NR_CPUS];
struct array_cache *new[0];
};
static void do_ccupdate_local(void *info)
@@ -3951,7 +4001,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct ccupdate_struct *new;
int i;
new = kzalloc(sizeof(*new), gfp);
new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
gfp);
if (!new)
return -ENOMEM;

View File

@@ -675,7 +675,7 @@ static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
return check_bytes8(start, value, bytes);
value64 = value | value << 8 | value << 16 | value << 24;
value64 = value64 | value64 << 32;
value64 = (value64 & 0xffffffff) | value64 << 32;
prefix = 8 - ((unsigned long)start) % 8;
if (prefix) {
@@ -1508,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
struct page *page, int tail)
{
n->nr_partial++;
if (tail)
if (tail == DEACTIVATE_TO_TAIL)
list_add_tail(&page->lru, &n->partial);
else
list_add(&page->lru, &n->partial);
@@ -1755,13 +1755,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
enum slab_modes l = M_NONE, m = M_NONE;
void *freelist;
void *nextfree;
int tail = 0;
int tail = DEACTIVATE_TO_HEAD;
struct page new;
struct page old;
if (page->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = 1;
tail = DEACTIVATE_TO_TAIL;
}
c->tid = next_tid(c->tid);
@@ -1828,7 +1828,7 @@ redo:
new.frozen = 0;
if (!new.inuse && n->nr_partial < s->min_partial)
if (!new.inuse && n->nr_partial > s->min_partial)
m = M_FREE;
else if (new.freelist) {
m = M_PARTIAL;
@@ -1867,7 +1867,7 @@ redo:
if (m == M_PARTIAL) {
add_partial(n, page, tail);
stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
stat(s, tail);
} else if (m == M_FULL) {
@@ -2351,7 +2351,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
*/
if (unlikely(!prior)) {
remove_full(s, page);
add_partial(n, page, 0);
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
@@ -2361,11 +2361,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
slab_empty:
if (prior) {
/*
* Slab still on the partial list.
* Slab on the partial list.
*/
remove_partial(n, page);
stat(s, FREE_REMOVE_PARTIAL);
}
} else
/* Slab must be on the full list */
remove_full(s, page);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
@@ -2667,7 +2669,7 @@ static void early_kmem_cache_node_alloc(int node)
init_kmem_cache_node(n, kmem_cache_node);
inc_slabs_node(kmem_cache_node, node, page->objects);
add_partial(n, page, 0);
add_partial(n, page, DEACTIVATE_TO_HEAD);
}
static void free_kmem_cache_nodes(struct kmem_cache *s)

View File

@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
/*
* Find out how many pages are allowed for a single swap
* device. There are two limiting factors: 1) the number of
* bits for the swap offset in the swp_entry_t type and
* 2) the number of bits in the a swap pte as defined by
* the different architectures. In order to find the
* largest possible bit mask a swap entry with swap type 0
* device. There are three limiting factors: 1) the number
* of bits for the swap offset in the swp_entry_t type, and
* 2) the number of bits in the swap pte as defined by the
* the different architectures, and 3) the number of free bits
* in an exceptional radix_tree entry. In order to find the
* largest possible bit mask, a swap entry with swap type 0
* and swap offset ~0UL is created, encoded to a swap pte,
* decoded to a swp_entry_t again and finally the swap
* decoded to a swp_entry_t again, and finally the swap
* offset is extracted. This will mask all the bits from
* the initial ~0UL mask that can't be encoded in either
* the swp_entry_t or the architecture definition of a
* swap pte.
* swap pte. Then the same is done for a radix_tree entry.
*/
maxpages = swp_offset(pte_to_swp_entry(
swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
swp_entry_to_pte(swp_entry(0, ~0UL))));
maxpages = swp_offset(radix_to_swp_entry(
swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
if (maxpages > swap_header->info.last_page) {
maxpages = swap_header->info.last_page + 1;
/* p->max is an unsigned int: don't overflow it */

View File

@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
unsigned long count = 0;
int i;
/*
* Note: this function may get called on a shmem/tmpfs mapping:
* pagevec_lookup() might then return 0 prematurely (because it
* got a gangful of swap entries); but it's hardly worth worrying
* about - it can rarely have anything to free from such a mapping
* (most pages are dirty), and already skips over any difficulties.
*/
pagevec_init(&pvec, 0);
while (index <= end && pagevec_lookup(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {