Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
All conflicts seemed rather trivial, with some guidance from Saeed Mameed on the tc_ct.c one. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
4
mm/cma.c
4
mm/cma.c
@@ -339,13 +339,13 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
|
||||
*/
|
||||
if (base < highmem_start && limit > highmem_start) {
|
||||
addr = memblock_alloc_range_nid(size, alignment,
|
||||
highmem_start, limit, nid, false);
|
||||
highmem_start, limit, nid, true);
|
||||
limit = highmem_start;
|
||||
}
|
||||
|
||||
if (!addr) {
|
||||
addr = memblock_alloc_range_nid(size, alignment, base,
|
||||
limit, nid, false);
|
||||
limit, nid, true);
|
||||
if (!addr) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
|
@@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
|
||||
.page = NULL,
|
||||
};
|
||||
|
||||
current->capture_control = &capc;
|
||||
/*
|
||||
* Make sure the structs are really initialized before we expose the
|
||||
* capture control, in case we are interrupted and the interrupt handler
|
||||
* frees a page.
|
||||
*/
|
||||
barrier();
|
||||
WRITE_ONCE(current->capture_control, &capc);
|
||||
|
||||
ret = compact_zone(&cc, &capc);
|
||||
|
||||
VM_BUG_ON(!list_empty(&cc.freepages));
|
||||
VM_BUG_ON(!list_empty(&cc.migratepages));
|
||||
|
||||
*capture = capc.page;
|
||||
current->capture_control = NULL;
|
||||
/*
|
||||
* Make sure we hide capture control first before we read the captured
|
||||
* page pointer, otherwise an interrupt could free and capture a page
|
||||
* and we would leak it.
|
||||
*/
|
||||
WRITE_ONCE(current->capture_control, NULL);
|
||||
*capture = READ_ONCE(capc.page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -246,13 +246,13 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
|
||||
static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
|
||||
unsigned long vaddr)
|
||||
{
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
pte_t pte = ptep_get(ptep);
|
||||
|
||||
pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
|
||||
set_pte_at(mm, vaddr, ptep, pte);
|
||||
barrier();
|
||||
pte_clear(mm, vaddr, ptep);
|
||||
pte = READ_ONCE(*ptep);
|
||||
pte = ptep_get(ptep);
|
||||
WARN_ON(!pte_none(pte));
|
||||
}
|
||||
|
||||
|
23
mm/filemap.c
23
mm/filemap.c
@@ -2028,7 +2028,7 @@ find_page:
|
||||
|
||||
page = find_get_page(mapping, index);
|
||||
if (!page) {
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
|
||||
goto would_block;
|
||||
page_cache_sync_readahead(mapping,
|
||||
ra, filp,
|
||||
@@ -2038,6 +2038,10 @@ find_page:
|
||||
goto no_cached_page;
|
||||
}
|
||||
if (PageReadahead(page)) {
|
||||
if (iocb->ki_flags & IOCB_NOIO) {
|
||||
put_page(page);
|
||||
goto out;
|
||||
}
|
||||
page_cache_async_readahead(mapping,
|
||||
ra, filp, page,
|
||||
index, last_index - index);
|
||||
@@ -2160,6 +2164,11 @@ page_not_up_to_date_locked:
|
||||
}
|
||||
|
||||
readpage:
|
||||
if (iocb->ki_flags & IOCB_NOIO) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto would_block;
|
||||
}
|
||||
/*
|
||||
* A previous I/O error may have been due to temporary
|
||||
* failures, eg. multipath errors.
|
||||
@@ -2249,9 +2258,19 @@ EXPORT_SYMBOL_GPL(generic_file_buffered_read);
|
||||
*
|
||||
* This is the "read_iter()" routine for all filesystems
|
||||
* that can use the page cache directly.
|
||||
*
|
||||
* The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
|
||||
* be returned when no data can be read without waiting for I/O requests
|
||||
* to complete; it doesn't prevent readahead.
|
||||
*
|
||||
* The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
|
||||
* requests shall be made for the read or for readahead. When no data
|
||||
* can be read, -EAGAIN shall be returned. When readahead would be
|
||||
* triggered, a partial, possibly empty read shall be returned.
|
||||
*
|
||||
* Return:
|
||||
* * number of bytes copied, even for partial reads
|
||||
* * negative error code if nothing was read
|
||||
* * negative error code (or 0 if IOCB_NOIO) if nothing was read
|
||||
*/
|
||||
ssize_t
|
||||
generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
|
@@ -1593,7 +1593,7 @@ static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
|
||||
|
||||
/* Use first found vma */
|
||||
pgoff_start = page_to_pgoff(hpage);
|
||||
pgoff_end = pgoff_start + hpage_nr_pages(hpage) - 1;
|
||||
pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
|
||||
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
|
||||
pgoff_start, pgoff_end) {
|
||||
struct vm_area_struct *vma = avc->vma;
|
||||
|
@@ -2772,8 +2772,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
|
||||
return;
|
||||
|
||||
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (!cw)
|
||||
if (!cw) {
|
||||
css_put(&memcg->css);
|
||||
return;
|
||||
}
|
||||
|
||||
cw->memcg = memcg;
|
||||
cw->cachep = cachep;
|
||||
@@ -6360,11 +6362,16 @@ static unsigned long effective_protection(unsigned long usage,
|
||||
* We're using unprotected memory for the weight so that if
|
||||
* some cgroups DO claim explicit protection, we don't protect
|
||||
* the same bytes twice.
|
||||
*
|
||||
* Check both usage and parent_usage against the respective
|
||||
* protected values. One should imply the other, but they
|
||||
* aren't read atomically - make sure the division is sane.
|
||||
*/
|
||||
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
|
||||
return ep;
|
||||
|
||||
if (parent_effective > siblings_protected && usage > protected) {
|
||||
if (parent_effective > siblings_protected &&
|
||||
parent_usage > siblings_protected &&
|
||||
usage > protected) {
|
||||
unsigned long unclaimed;
|
||||
|
||||
unclaimed = parent_effective - siblings_protected;
|
||||
@@ -6416,7 +6423,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
|
||||
|
||||
if (parent == root) {
|
||||
memcg->memory.emin = READ_ONCE(memcg->memory.min);
|
||||
memcg->memory.elow = memcg->memory.low;
|
||||
memcg->memory.elow = READ_ONCE(memcg->memory.low);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -6428,7 +6435,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
|
||||
atomic_long_read(&parent->memory.children_min_usage)));
|
||||
|
||||
WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
|
||||
memcg->memory.low, READ_ONCE(parent->memory.elow),
|
||||
READ_ONCE(memcg->memory.low),
|
||||
READ_ONCE(parent->memory.elow),
|
||||
atomic_long_read(&parent->memory.children_low_usage)));
|
||||
|
||||
out:
|
||||
|
33
mm/memory.c
33
mm/memory.c
@@ -1498,7 +1498,7 @@ out:
|
||||
}
|
||||
|
||||
#ifdef pte_index
|
||||
static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
|
||||
static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
|
||||
unsigned long addr, struct page *page, pgprot_t prot)
|
||||
{
|
||||
int err;
|
||||
@@ -1506,8 +1506,9 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
|
||||
if (!page_count(page))
|
||||
return -EINVAL;
|
||||
err = validate_page_before_insert(page);
|
||||
return err ? err : insert_page_into_pte_locked(
|
||||
mm, pte_offset_map(pmd, addr), addr, page, prot);
|
||||
if (err)
|
||||
return err;
|
||||
return insert_page_into_pte_locked(mm, pte, addr, page, prot);
|
||||
}
|
||||
|
||||
/* insert_pages() amortizes the cost of spinlock operations
|
||||
@@ -1517,7 +1518,8 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
|
||||
struct page **pages, unsigned long *num, pgprot_t prot)
|
||||
{
|
||||
pmd_t *pmd = NULL;
|
||||
spinlock_t *pte_lock = NULL;
|
||||
pte_t *start_pte, *pte;
|
||||
spinlock_t *pte_lock;
|
||||
struct mm_struct *const mm = vma->vm_mm;
|
||||
unsigned long curr_page_idx = 0;
|
||||
unsigned long remaining_pages_total = *num;
|
||||
@@ -1536,18 +1538,17 @@ more:
|
||||
ret = -ENOMEM;
|
||||
if (pte_alloc(mm, pmd))
|
||||
goto out;
|
||||
pte_lock = pte_lockptr(mm, pmd);
|
||||
|
||||
while (pages_to_write_in_pmd) {
|
||||
int pte_idx = 0;
|
||||
const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
|
||||
|
||||
spin_lock(pte_lock);
|
||||
for (; pte_idx < batch_size; ++pte_idx) {
|
||||
int err = insert_page_in_batch_locked(mm, pmd,
|
||||
start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
|
||||
for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
|
||||
int err = insert_page_in_batch_locked(mm, pte,
|
||||
addr, pages[curr_page_idx], prot);
|
||||
if (unlikely(err)) {
|
||||
spin_unlock(pte_lock);
|
||||
pte_unmap_unlock(start_pte, pte_lock);
|
||||
ret = err;
|
||||
remaining_pages_total -= pte_idx;
|
||||
goto out;
|
||||
@@ -1555,7 +1556,7 @@ more:
|
||||
addr += PAGE_SIZE;
|
||||
++curr_page_idx;
|
||||
}
|
||||
spin_unlock(pte_lock);
|
||||
pte_unmap_unlock(start_pte, pte_lock);
|
||||
pages_to_write_in_pmd -= batch_size;
|
||||
remaining_pages_total -= batch_size;
|
||||
}
|
||||
@@ -3140,8 +3141,18 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
err = mem_cgroup_charge(page, vma->vm_mm,
|
||||
GFP_KERNEL);
|
||||
ClearPageSwapCache(page);
|
||||
if (err)
|
||||
if (err) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_page;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Move to lru_cache_add() when it
|
||||
* supports new vs putback
|
||||
*/
|
||||
spin_lock_irq(&page_pgdat(page)->lru_lock);
|
||||
lru_note_cost_page(page);
|
||||
spin_unlock_irq(&page_pgdat(page)->lru_lock);
|
||||
|
||||
lru_cache_add(page);
|
||||
swap_readpage(page, true);
|
||||
|
@@ -471,11 +471,20 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
|
||||
unsigned long start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
const unsigned long end_pfn = start_pfn + nr_pages;
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
unsigned long flags;
|
||||
unsigned long pfn, cur_nr_pages, flags;
|
||||
|
||||
/* Poison struct pages because they are now uninitialized again. */
|
||||
page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
|
||||
cond_resched();
|
||||
|
||||
/* Select all remaining pages up to the next section boundary */
|
||||
cur_nr_pages =
|
||||
min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
|
||||
page_init_poison(pfn_to_page(pfn),
|
||||
sizeof(struct page) * cur_nr_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
/*
|
||||
|
13
mm/migrate.c
13
mm/migrate.c
@@ -1160,22 +1160,11 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work
|
||||
* around it.
|
||||
*/
|
||||
#if defined(CONFIG_ARM) && \
|
||||
defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700
|
||||
#define ICE_noinline noinline
|
||||
#else
|
||||
#define ICE_noinline
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Obtain the lock on page, remove all ptes and migrate the page
|
||||
* to the newly allocated page in newpage.
|
||||
*/
|
||||
static ICE_noinline int unmap_and_move(new_page_t get_new_page,
|
||||
static int unmap_and_move(new_page_t get_new_page,
|
||||
free_page_t put_new_page,
|
||||
unsigned long private, struct page *page,
|
||||
int force, enum migrate_mode mode,
|
||||
|
17
mm/nommu.c
17
mm/nommu.c
@@ -290,23 +290,6 @@ void *vzalloc_node(unsigned long size, int node)
|
||||
}
|
||||
EXPORT_SYMBOL(vzalloc_node);
|
||||
|
||||
/**
|
||||
* vmalloc_exec - allocate virtually contiguous, executable memory
|
||||
* @size: allocation size
|
||||
*
|
||||
* Kernel-internal function to allocate enough pages to cover @size
|
||||
* the page level allocator and map them into contiguous and
|
||||
* executable kernel virtual space.
|
||||
*
|
||||
* For tight control over page level allocator and protection flags
|
||||
* use __vmalloc() instead.
|
||||
*/
|
||||
|
||||
void *vmalloc_exec(unsigned long size)
|
||||
{
|
||||
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
|
||||
* @size: allocation size
|
||||
|
@@ -7832,7 +7832,7 @@ void setup_per_zone_wmarks(void)
|
||||
* Initialise min_free_kbytes.
|
||||
*
|
||||
* For small machines we want it small (128k min). For large machines
|
||||
* we want it large (64MB max). But it is not linear, because network
|
||||
* we want it large (256MB max). But it is not linear, because network
|
||||
* bandwidth does not increase linearly with machine size. We use
|
||||
*
|
||||
* min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy:
|
||||
|
@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
|
||||
gfp_t gfp, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
unsigned int nr_pages = 1 << order;
|
||||
int nr_pages = 1 << order;
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
int ret;
|
||||
@@ -388,7 +388,7 @@ out:
|
||||
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
unsigned int nr_pages = 1 << order;
|
||||
int nr_pages = 1 << order;
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
|
@@ -1726,7 +1726,7 @@ void kzfree(const void *p)
|
||||
if (unlikely(ZERO_OR_NULL_PTR(mem)))
|
||||
return;
|
||||
ks = ksize(mem);
|
||||
memset(mem, 0, ks);
|
||||
memzero_explicit(mem, ks);
|
||||
kfree(mem);
|
||||
}
|
||||
EXPORT_SYMBOL(kzfree);
|
||||
|
19
mm/slub.c
19
mm/slub.c
@@ -3766,15 +3766,13 @@ error:
|
||||
}
|
||||
|
||||
static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
||||
const char *text, unsigned long *map)
|
||||
const char *text)
|
||||
{
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
void *addr = page_address(page);
|
||||
unsigned long *map;
|
||||
void *p;
|
||||
|
||||
if (!map)
|
||||
return;
|
||||
|
||||
slab_err(s, page, text, s->name);
|
||||
slab_lock(page);
|
||||
|
||||
@@ -3786,6 +3784,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
|
||||
print_tracking(s, p);
|
||||
}
|
||||
}
|
||||
put_map(map);
|
||||
slab_unlock(page);
|
||||
#endif
|
||||
}
|
||||
@@ -3799,11 +3798,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
||||
{
|
||||
LIST_HEAD(discard);
|
||||
struct page *page, *h;
|
||||
unsigned long *map = NULL;
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
BUG_ON(irqs_disabled());
|
||||
spin_lock_irq(&n->list_lock);
|
||||
@@ -3813,16 +3807,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
|
||||
list_add(&page->slab_list, &discard);
|
||||
} else {
|
||||
list_slab_objects(s, page,
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()",
|
||||
map);
|
||||
"Objects remaining in %s on __kmem_cache_shutdown()");
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&n->list_lock);
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
bitmap_free(map);
|
||||
#endif
|
||||
|
||||
list_for_each_entry_safe(page, h, &discard, slab_list)
|
||||
discard_slab(s, page);
|
||||
}
|
||||
|
@@ -443,8 +443,7 @@ void mark_page_accessed(struct page *page)
|
||||
else
|
||||
__lru_cache_activate_page(page);
|
||||
ClearPageReferenced(page);
|
||||
if (page_is_file_lru(page))
|
||||
workingset_activation(page);
|
||||
workingset_activation(page);
|
||||
}
|
||||
if (page_is_idle(page))
|
||||
clear_page_idle(page);
|
||||
|
@@ -21,7 +21,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/swap_slots.h>
|
||||
#include <linux/huge_mm.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* swapper_space is a fiction, retained to simplify the path through
|
||||
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
||||
__SetPageSwapBacked(page);
|
||||
|
||||
/* May fail (-ENOMEM) if XArray node allocation failed. */
|
||||
if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
|
||||
if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
|
||||
put_swap_page(page, entry);
|
||||
goto fail_unlock;
|
||||
}
|
||||
|
21
mm/vmalloc.c
21
mm/vmalloc.c
@@ -1862,7 +1862,6 @@ EXPORT_SYMBOL(vm_unmap_ram);
|
||||
* @pages: an array of pointers to the pages to be mapped
|
||||
* @count: number of pages
|
||||
* @node: prefer to allocate data structures on this node
|
||||
* @prot: memory protection to use. PAGE_KERNEL for regular RAM
|
||||
*
|
||||
* If you use this function for less than VMAP_MAX_ALLOC pages, it could be
|
||||
* faster than vmap so it's good. But if you mix long-life and short-life
|
||||
@@ -2696,26 +2695,6 @@ void *vzalloc_node(unsigned long size, int node)
|
||||
}
|
||||
EXPORT_SYMBOL(vzalloc_node);
|
||||
|
||||
/**
|
||||
* vmalloc_exec - allocate virtually contiguous, executable memory
|
||||
* @size: allocation size
|
||||
*
|
||||
* Kernel-internal function to allocate enough pages to cover @size
|
||||
* the page level allocator and map them into contiguous and
|
||||
* executable kernel virtual space.
|
||||
*
|
||||
* For tight control over page level allocator and protection flags
|
||||
* use __vmalloc() instead.
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL on error
|
||||
*/
|
||||
void *vmalloc_exec(unsigned long size)
|
||||
{
|
||||
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
|
||||
NUMA_NO_NODE, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
|
||||
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
|
||||
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
|
||||
|
@@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
__delete_from_swap_cache(page, swap);
|
||||
xa_unlock_irqrestore(&mapping->i_pages, flags);
|
||||
put_swap_page(page, swap);
|
||||
workingset_eviction(page, target_memcg);
|
||||
} else {
|
||||
void (*freepage)(struct page *);
|
||||
void *shadow = NULL;
|
||||
@@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
|
||||
list_add(&page->lru, &pages_to_free);
|
||||
} else {
|
||||
nr_moved += nr_pages;
|
||||
if (PageActive(page))
|
||||
workingset_age_nonresident(lruvec, nr_pages);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -156,8 +156,8 @@
|
||||
*
|
||||
* Implementation
|
||||
*
|
||||
* For each node's file LRU lists, a counter for inactive evictions
|
||||
* and activations is maintained (node->inactive_age).
|
||||
* For each node's LRU lists, a counter for inactive evictions and
|
||||
* activations is maintained (node->nonresident_age).
|
||||
*
|
||||
* On eviction, a snapshot of this counter (along with some bits to
|
||||
* identify the node) is stored in the now empty page cache
|
||||
@@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
||||
*workingsetp = workingset;
|
||||
}
|
||||
|
||||
static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
|
||||
/**
|
||||
* workingset_age_nonresident - age non-resident entries as LRU ages
|
||||
* @memcg: the lruvec that was aged
|
||||
* @nr_pages: the number of pages to count
|
||||
*
|
||||
* As in-memory pages are aged, non-resident pages need to be aged as
|
||||
* well, in order for the refault distances later on to be comparable
|
||||
* to the in-memory dimensions. This function allows reclaim and LRU
|
||||
* operations to drive the non-resident aging along in parallel.
|
||||
*/
|
||||
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
|
||||
{
|
||||
/*
|
||||
* Reclaiming a cgroup means reclaiming all its children in a
|
||||
@@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
|
||||
* the root cgroup's, age as well.
|
||||
*/
|
||||
do {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
atomic_long_inc(&lruvec->inactive_age);
|
||||
} while (memcg && (memcg = parent_mem_cgroup(memcg)));
|
||||
atomic_long_add(nr_pages, &lruvec->nonresident_age);
|
||||
} while ((lruvec = parent_lruvec(lruvec)));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
||||
advance_inactive_age(page_memcg(page), pgdat);
|
||||
|
||||
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
/* XXX: target_memcg can be NULL, go through lruvec */
|
||||
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
|
||||
eviction = atomic_long_read(&lruvec->inactive_age);
|
||||
eviction = atomic_long_read(&lruvec->nonresident_age);
|
||||
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
|
||||
}
|
||||
|
||||
@@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
if (!mem_cgroup_disabled() && !eviction_memcg)
|
||||
goto out;
|
||||
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
|
||||
refault = atomic_long_read(&eviction_lruvec->inactive_age);
|
||||
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
|
||||
|
||||
/*
|
||||
* Calculate the refault distance
|
||||
*
|
||||
* The unsigned subtraction here gives an accurate distance
|
||||
* across inactive_age overflows in most cases. There is a
|
||||
* across nonresident_age overflows in most cases. There is a
|
||||
* special case: usually, shadow entries have a short lifetime
|
||||
* and are either refaulted or reclaimed along with the inode
|
||||
* before they get too old. But it is not impossible for the
|
||||
* inactive_age to lap a shadow entry in the field, which can
|
||||
* then result in a false small refault distance, leading to a
|
||||
* false activation should this old entry actually refault
|
||||
* again. However, earlier kernels used to deactivate
|
||||
* nonresident_age to lap a shadow entry in the field, which
|
||||
* can then result in a false small refault distance, leading
|
||||
* to a false activation should this old entry actually
|
||||
* refault again. However, earlier kernels used to deactivate
|
||||
* unconditionally with *every* reclaim invocation for the
|
||||
* longest time, so the occasional inappropriate activation
|
||||
* leading to pressure on the active list is not a problem.
|
||||
@@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
goto out;
|
||||
|
||||
SetPageActive(page);
|
||||
advance_inactive_age(memcg, pgdat);
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
|
||||
|
||||
/* Page was active prior to eviction */
|
||||
@@ -382,6 +388,7 @@ out:
|
||||
void workingset_activation(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
@@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
|
||||
memcg = page_memcg_rcu(page);
|
||||
if (!mem_cgroup_disabled() && !memcg)
|
||||
goto out;
|
||||
advance_inactive_age(memcg, page_pgdat(page));
|
||||
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
Reference in New Issue
Block a user