Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton:

 - a few misc bits

 - ocfs2

 - most(?) of MM

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (125 commits)
  thp: fix comments of __pmd_trans_huge_lock()
  cgroup: remove unnecessary 0 check from css_from_id()
  cgroup: fix idr leak for the first cgroup root
  mm: memcontrol: fix documentation for compound parameter
  mm: memcontrol: remove BUG_ON in uncharge_list
  mm: fix build warnings in <linux/compaction.h>
  mm, thp: convert from optimistic swapin collapsing to conservative
  mm, thp: fix comment inconsistency for swapin readahead functions
  thp: update Documentation/{vm/transhuge,filesystems/proc}.txt
  shmem: split huge pages beyond i_size under memory pressure
  thp: introduce CONFIG_TRANSPARENT_HUGE_PAGECACHE
  khugepaged: add support of collapse for tmpfs/shmem pages
  shmem: make shmem_inode_info::lock irq-safe
  khugepaged: move up_read(mmap_sem) out of khugepaged_alloc_page()
  thp: extract khugepaged from mm/huge_memory.c
  shmem, thp: respect MADV_{NO,}HUGEPAGE for file mappings
  shmem: add huge pages support
  shmem: get_unmapped_area align huge page
  shmem: prepare huge= mount option and sysfs knob
  mm, rmap: account shmem thp pages
  ...
Tento commit je obsažen v:
Linus Torvalds
2016-07-26 19:55:54 -07:00
186 změnil soubory, kde provedl 7380 přidání a 4151 odebrání

Zobrazit soubor

@@ -439,6 +439,14 @@ choice
benefit.
endchoice
#
# We don't deposit page tables on file THP mapping,
# but Power makes use of them to address MMU quirk.
#
config TRANSPARENT_HUGE_PAGECACHE
def_bool y
depends on TRANSPARENT_HUGEPAGE && !PPC
#
# UP and nommu archs use km based percpu allocator
#

Zobrazit soubor

@@ -74,7 +74,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o

Zobrazit soubor

@@ -70,7 +70,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
*/
if (trylock_page(page)) {
#ifdef CONFIG_BALLOON_COMPACTION
if (!PagePrivate(page)) {
if (PageIsolated(page)) {
/* raced with isolation */
unlock_page(page);
continue;
@@ -106,110 +106,50 @@ EXPORT_SYMBOL_GPL(balloon_page_dequeue);
#ifdef CONFIG_BALLOON_COMPACTION
static inline void __isolate_balloon_page(struct page *page)
bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
{
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
unsigned long flags;
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
ClearPagePrivate(page);
list_del(&page->lru);
b_dev_info->isolated_pages++;
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
return true;
}
static inline void __putback_balloon_page(struct page *page)
void balloon_page_putback(struct page *page)
{
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
unsigned long flags;
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
SetPagePrivate(page);
list_add(&page->lru, &b_dev_info->pages);
b_dev_info->isolated_pages--;
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
}
/* __isolate_lru_page() counterpart for a ballooned page */
bool balloon_page_isolate(struct page *page)
{
/*
* Avoid burning cycles with pages that are yet under __free_pages(),
* or just got freed under us.
*
* In case we 'win' a race for a balloon page being freed under us and
* raise its refcount preventing __free_pages() from doing its job
* the put_page() at the end of this block will take care of
* release this page, thus avoiding a nasty leakage.
*/
if (likely(get_page_unless_zero(page))) {
/*
* As balloon pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
* as well as race against the balloon driver releasing a page.
*
* In order to avoid having an already isolated balloon page
* being (wrongly) re-isolated while it is under migration,
* or to avoid attempting to isolate pages being released by
* the balloon driver, lets be sure we have the page lock
* before proceeding with the balloon page isolation steps.
*/
if (likely(trylock_page(page))) {
/*
* A ballooned page, by default, has PagePrivate set.
* Prevent concurrent compaction threads from isolating
* an already isolated balloon page by clearing it.
*/
if (balloon_page_movable(page)) {
__isolate_balloon_page(page);
unlock_page(page);
return true;
}
unlock_page(page);
}
put_page(page);
}
return false;
}
/* putback_lru_page() counterpart for a ballooned page */
void balloon_page_putback(struct page *page)
{
/*
* 'lock_page()' stabilizes the page and prevents races against
* concurrent isolation threads attempting to re-isolate it.
*/
lock_page(page);
if (__is_movable_balloon_page(page)) {
__putback_balloon_page(page);
/* drop the extra ref count taken for page isolation */
put_page(page);
} else {
WARN_ON(1);
dump_page(page, "not movable balloon page");
}
unlock_page(page);
}
/* move_to_new_page() counterpart for a ballooned page */
int balloon_page_migrate(struct page *newpage,
struct page *page, enum migrate_mode mode)
int balloon_page_migrate(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode)
{
struct balloon_dev_info *balloon = balloon_page_device(page);
int rc = -EAGAIN;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
if (WARN_ON(!__is_movable_balloon_page(page))) {
dump_page(page, "not movable balloon page");
return rc;
}
if (balloon && balloon->migratepage)
rc = balloon->migratepage(balloon, newpage, page, mode);
return rc;
return balloon->migratepage(balloon, newpage, page, mode);
}
const struct address_space_operations balloon_aops = {
.migratepage = balloon_page_migrate,
.isolate_page = balloon_page_isolate,
.putback_page = balloon_page_putback,
};
EXPORT_SYMBOL_GPL(balloon_aops);
#endif /* CONFIG_BALLOON_COMPACTION */

Zobrazit soubor

@@ -15,11 +15,11 @@
#include <linux/backing-dev.h>
#include <linux/sysctl.h>
#include <linux/sysfs.h>
#include <linux/balloon_compaction.h>
#include <linux/page-isolation.h>
#include <linux/kasan.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/page_owner.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
@@ -65,13 +65,27 @@ static unsigned long release_freepages(struct list_head *freelist)
static void map_pages(struct list_head *list)
{
struct page *page;
unsigned int i, order, nr_pages;
struct page *page, *next;
LIST_HEAD(tmp_list);
list_for_each_entry(page, list, lru) {
arch_alloc_page(page, 0);
kernel_map_pages(page, 1, 1);
kasan_alloc_pages(page, 0);
list_for_each_entry_safe(page, next, list, lru) {
list_del(&page->lru);
order = page_private(page);
nr_pages = 1 << order;
post_alloc_hook(page, order, __GFP_MOVABLE);
if (order)
split_page(page, order);
for (i = 0; i < nr_pages; i++) {
list_add(&page->lru, &tmp_list);
page++;
}
}
list_splice(&tmp_list, list);
}
static inline bool migrate_async_suitable(int migratetype)
@@ -81,6 +95,44 @@ static inline bool migrate_async_suitable(int migratetype)
#ifdef CONFIG_COMPACTION
int PageMovable(struct page *page)
{
struct address_space *mapping;
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (!__PageMovable(page))
return 0;
mapping = page_mapping(page);
if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
return 1;
return 0;
}
EXPORT_SYMBOL(PageMovable);
void __SetPageMovable(struct page *page, struct address_space *mapping)
{
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
}
EXPORT_SYMBOL(__SetPageMovable);
void __ClearPageMovable(struct page *page)
{
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageMovable(page), page);
/*
* Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
* flag so that VM can catch up released page by driver after isolation.
* With it, VM migration doesn't try to put it back.
*/
page->mapping = (void *)((unsigned long)page->mapping &
PAGE_MAPPING_MOVABLE);
}
EXPORT_SYMBOL(__ClearPageMovable);
/* Do not skip compaction more than 64 times */
#define COMPACT_MAX_DEFER_SHIFT 6
@@ -368,12 +420,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
unsigned long flags = 0;
bool locked = false;
unsigned long blockpfn = *start_pfn;
unsigned int order;
cursor = pfn_to_page(blockpfn);
/* Isolate free pages. */
for (; blockpfn < end_pfn; blockpfn++, cursor++) {
int isolated, i;
int isolated;
struct page *page = cursor;
/*
@@ -439,17 +492,17 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
goto isolate_fail;
}
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
/* Found a free page, will break it into order-0 pages */
order = page_order(page);
isolated = __isolate_free_page(page, order);
if (!isolated)
break;
set_page_private(page, order);
total_isolated += isolated;
cc->nr_freepages += isolated;
for (i = 0; i < isolated; i++) {
list_add(&page->lru, freelist);
page++;
}
list_add_tail(&page->lru, freelist);
if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
blockpfn += isolated;
break;
@@ -568,7 +621,7 @@ isolate_freepages_range(struct compact_control *cc,
*/
}
/* split_free_page does not map the pages */
/* __isolate_free_page() does not map the pages */
map_pages(&freelist);
if (pfn < end_pfn) {
@@ -670,7 +723,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
bool is_lru;
if (skip_on_failure && low_pfn >= next_skip_pfn) {
/*
@@ -732,21 +784,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
continue;
}
/*
* Check may be lockless but that's ok as we recheck later.
* It's possible to migrate LRU pages and balloon pages
* Skip any other type of page
*/
is_lru = PageLRU(page);
if (!is_lru) {
if (unlikely(balloon_page_movable(page))) {
if (balloon_page_isolate(page)) {
/* Successfully isolated */
goto isolate_success;
}
}
}
/*
* Regardless of being on LRU, compound pages such as THP and
* hugetlbfs are not to be compacted. We can potentially save
@@ -763,8 +800,30 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_fail;
}
if (!is_lru)
/*
* Check may be lockless but that's ok as we recheck later.
* It's possible to migrate LRU and non-lru movable pages.
* Skip any other type of page
*/
if (!PageLRU(page)) {
/*
* __PageMovable can return false positive so we need
* to verify it under page_lock.
*/
if (unlikely(__PageMovable(page)) &&
!PageIsolated(page)) {
if (locked) {
spin_unlock_irqrestore(&zone->lru_lock,
flags);
locked = false;
}
if (isolate_movable_page(page, isolate_mode))
goto isolate_success;
}
goto isolate_fail;
}
/*
* Migration will fail if an anonymous page is pinned in memory,
@@ -1059,7 +1118,7 @@ static void isolate_freepages(struct compact_control *cc)
}
}
/* split_free_page does not map the pages */
/* __isolate_free_page() does not map the pages */
map_pages(freelist);
/*

Zobrazit soubor

@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
struct radix_tree_node *node;
int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
VM_BUG_ON(!PageLocked(page));
node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index,
shadow);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(nr != 1 && shadow, page);
if (shadow) {
mapping->nrexceptional++;
mapping->nrexceptional += nr;
/*
* Make sure the nrexceptional update is committed before
* the nrpages update so that final truncate racing
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping,
*/
smp_wmb();
}
mapping->nrpages--;
mapping->nrpages -= nr;
if (!node)
return;
workingset_node_pages_dec(node);
if (shadow)
workingset_node_shadows_inc(node);
else
if (__radix_tree_delete_node(&mapping->page_tree, node))
for (i = 0; i < nr; i++) {
node = radix_tree_replace_clear_tags(&mapping->page_tree,
page->index + i, shadow);
if (!node) {
VM_BUG_ON_PAGE(nr != 1, page);
return;
}
/*
* Track node that only contains shadow entries. DAX mappings contain
* no shadow entries and may contain other exceptional entries so skip
* those.
*
* Avoid acquiring the list_lru lock if already tracked. The
* list_empty() test is safe as node->private_list is
* protected by mapping->tree_lock.
*/
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
list_empty(&node->private_list)) {
node->private_data = mapping;
list_lru_add(&workingset_shadow_nodes, &node->private_list);
workingset_node_pages_dec(node);
if (shadow)
workingset_node_shadows_inc(node);
else
if (__radix_tree_delete_node(&mapping->page_tree, node))
continue;
/*
* Track node that only contains shadow entries. DAX mappings
* contain no shadow entries and may contain other exceptional
* entries so skip those.
*
* Avoid acquiring the list_lru lock if already tracked.
* The list_empty() test is safe as node->private_list is
* protected by mapping->tree_lock.
*/
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
list_empty(&node->private_list)) {
node->private_data = mapping;
list_lru_add(&workingset_shadow_nodes,
&node->private_list);
}
}
}
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
void __delete_from_page_cache(struct page *page, void *shadow)
{
struct address_space *mapping = page->mapping;
int nr = hpage_nr_pages(page);
trace_mm_filemap_delete_from_page_cache(page);
/*
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
else
cleancache_invalidate_page(mapping, page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(page_mapped(page), page);
if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
int mapcount;
@@ -209,9 +218,14 @@ void __delete_from_page_cache(struct page *page, void *shadow)
/* hugetlb pages do not participate in page cache accounting. */
if (!PageHuge(page))
__dec_zone_page_state(page, NR_FILE_PAGES);
if (PageSwapBacked(page))
__dec_zone_page_state(page, NR_SHMEM);
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
if (PageSwapBacked(page)) {
__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
if (PageTransHuge(page))
__dec_zone_page_state(page, NR_SHMEM_THPS);
} else {
VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
}
/*
* At this point page must be either written or cleaned by truncate.
@@ -235,9 +249,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
*/
void delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
struct address_space *mapping = page_mapping(page);
unsigned long flags;
void (*freepage)(struct page *);
BUG_ON(!PageLocked(page));
@@ -250,7 +263,13 @@ void delete_from_page_cache(struct page *page)
if (freepage)
freepage(page);
put_page(page);
if (PageTransHuge(page) && !PageHuge(page)) {
page_ref_sub(page, HPAGE_PMD_NR);
VM_BUG_ON_PAGE(page_count(page) <= 0, page);
} else {
put_page(page);
}
}
EXPORT_SYMBOL(delete_from_page_cache);
@@ -1053,7 +1072,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
void **pagep;
struct page *page;
struct page *head, *page;
rcu_read_lock();
repeat:
@@ -1073,16 +1092,24 @@ repeat:
*/
goto out;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/*
* Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != *pagep)) {
put_page(page);
put_page(head);
goto repeat;
}
}
@@ -1118,12 +1145,12 @@ repeat:
if (page && !radix_tree_exception(page)) {
lock_page(page);
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
if (unlikely(page_mapping(page) != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
VM_BUG_ON_PAGE(page->index != offset, page);
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
}
return page;
}
@@ -1255,7 +1282,7 @@ unsigned find_get_entries(struct address_space *mapping,
rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
struct page *page;
struct page *head, *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1272,12 +1299,20 @@ repeat:
*/
goto export;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
export:
@@ -1318,7 +1353,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
struct page *page;
struct page *head, *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1337,12 +1372,19 @@ repeat:
continue;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
@@ -1379,7 +1421,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
struct page *page;
struct page *head, *page;
repeat:
page = radix_tree_deref_slot(slot);
/* The hole, there no reason to continue */
@@ -1399,12 +1441,19 @@ repeat:
break;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
@@ -1413,7 +1462,7 @@ repeat:
* otherwise we can get both false positives and false
* negatives, which is just confusing to the caller.
*/
if (page->mapping == NULL || page->index != iter.index) {
if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
put_page(page);
break;
}
@@ -1451,7 +1500,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
rcu_read_lock();
radix_tree_for_each_tagged(slot, &mapping->page_tree,
&iter, *index, tag) {
struct page *page;
struct page *head, *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1476,12 +1525,19 @@ repeat:
continue;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
@@ -1525,7 +1581,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
radix_tree_for_each_tagged(slot, &mapping->page_tree,
&iter, start, tag) {
struct page *page;
struct page *head, *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1543,12 +1599,20 @@ repeat:
*/
goto export;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
export:
@@ -2128,21 +2192,21 @@ page_not_uptodate:
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
void filemap_map_pages(struct fault_env *fe,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct radix_tree_iter iter;
void **slot;
struct file *file = vma->vm_file;
struct file *file = fe->vma->vm_file;
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
loff_t size;
struct page *page;
unsigned long address = (unsigned long) vmf->virtual_address;
unsigned long addr;
pte_t *pte;
struct page *head, *page;
rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
if (iter.index > vmf->max_pgoff)
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
start_pgoff) {
if (iter.index > end_pgoff)
break;
repeat:
page = radix_tree_deref_slot(slot);
@@ -2156,12 +2220,19 @@ repeat:
goto next;
}
if (!page_cache_get_speculative(page))
head = compound_head(page);
if (!page_cache_get_speculative(head))
goto repeat;
/* The page was split under us? */
if (compound_head(page) != head) {
put_page(head);
goto repeat;
}
/* Has the page moved? */
if (unlikely(page != *slot)) {
put_page(page);
put_page(head);
goto repeat;
}
@@ -2179,14 +2250,15 @@ repeat:
if (page->index >= size >> PAGE_SHIFT)
goto unlock;
pte = vmf->pte + page->index - vmf->pgoff;
if (!pte_none(*pte))
goto unlock;
if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--;
addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
do_set_pte(vma, addr, page, pte, false, false);
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
if (fe->pte)
fe->pte += iter.index - last_pgoff;
last_pgoff = iter.index;
if (alloc_set_pte(fe, NULL, page))
goto unlock;
unlock_page(page);
goto next;
unlock:
@@ -2194,7 +2266,10 @@ unlock:
skip:
put_page(page);
next:
if (iter.index == vmf->max_pgoff)
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*fe->pmd))
break;
if (iter.index == end_pgoff)
break;
}
rcu_read_unlock();

Zobrazit soubor

@@ -20,6 +20,8 @@
#include <linux/frontswap.h>
#include <linux/swapfile.h>
DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key);
/*
* frontswap_ops are added by frontswap_register_ops, and provide the
* frontswap "backend" implementation functions. Multiple implementations
@@ -139,6 +141,8 @@ void frontswap_register_ops(struct frontswap_ops *ops)
ops->next = frontswap_ops;
} while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
static_branch_inc(&frontswap_enabled_key);
spin_lock(&swap_lock);
plist_for_each_entry(si, &swap_active_head, list) {
if (si->frontswap_map)
@@ -189,7 +193,7 @@ void __frontswap_init(unsigned type, unsigned long *map)
struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
BUG_ON(sis == NULL);
VM_BUG_ON(sis == NULL);
/*
* p->frontswap is a bitmap that we MUST have to figure out which page
@@ -248,15 +252,9 @@ int __frontswap_store(struct page *page)
pgoff_t offset = swp_offset(entry);
struct frontswap_ops *ops;
/*
* Return if no backend registed.
* Don't need to inc frontswap_failed_stores here.
*/
if (!frontswap_ops)
return -1;
BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL);
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(sis == NULL);
/*
* If a dup, we must remove the old page first; we can't leave the
@@ -303,11 +301,10 @@ int __frontswap_load(struct page *page)
pgoff_t offset = swp_offset(entry);
struct frontswap_ops *ops;
if (!frontswap_ops)
return -1;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(sis == NULL);
BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL);
if (!__frontswap_test(sis, offset))
return -1;
@@ -337,10 +334,9 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
if (!frontswap_ops)
return;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
BUG_ON(sis == NULL);
if (!__frontswap_test(sis, offset))
return;
@@ -360,10 +356,9 @@ void __frontswap_invalidate_area(unsigned type)
struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
if (!frontswap_ops)
return;
VM_BUG_ON(!frontswap_ops);
VM_BUG_ON(sis == NULL);
BUG_ON(sis == NULL);
if (sis->frontswap_map == NULL)
return;

Zobrazit soubor

@@ -279,6 +279,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
ret = 0;
split_huge_pmd(vma, pmd, address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
get_page(page);
spin_unlock(ptl);
@@ -286,6 +288,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
if (pmd_none(*pmd))
return no_page_table(vma, flags);
}
return ret ? ERR_PTR(ret) :
@@ -350,7 +354,6 @@ unmap:
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking)
{
struct mm_struct *mm = vma->vm_mm;
unsigned int fault_flags = 0;
int ret;
@@ -375,7 +378,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_TRIED;
}
ret = handle_mm_fault(mm, vma, address, fault_flags);
ret = handle_mm_fault(vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
@@ -690,7 +693,7 @@ retry:
if (!vma_permits_fault(vma, fault_flags))
return -EFAULT;
ret = handle_mm_fault(mm, vma, address, fault_flags);
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)

Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání

Zobrazit soubor

@@ -3179,7 +3179,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct page *ref_page)
{
int force_flush = 0;
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *ptep;
@@ -3198,19 +3197,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb_start_vma(tlb, vma);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
again:
for (; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep))
goto unlock;
if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl);
continue;
}
pte = huge_ptep_get(ptep);
if (huge_pte_none(pte))
goto unlock;
if (huge_pte_none(pte)) {
spin_unlock(ptl);
continue;
}
/*
* Migrating hugepage or HWPoisoned hugepage is already
@@ -3218,7 +3220,8 @@ again:
*/
if (unlikely(!pte_present(pte))) {
huge_pte_clear(mm, address, ptep);
goto unlock;
spin_unlock(ptl);
continue;
}
page = pte_page(pte);
@@ -3228,9 +3231,10 @@ again:
* are about to unmap is the actual page of interest.
*/
if (ref_page) {
if (page != ref_page)
goto unlock;
if (page != ref_page) {
spin_unlock(ptl);
continue;
}
/*
* Mark the VMA as having unmapped its page so that
* future faults in this VMA will fail rather than
@@ -3246,30 +3250,14 @@ again:
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, true);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
address += sz;
spin_unlock(ptl);
break;
}
/* Bail out after unmapping reference page if supplied */
if (ref_page) {
spin_unlock(ptl);
break;
}
unlock:
spin_unlock(ptl);
}
/*
* mmu_gather ran out of room to batch pages, we break out of
* the PTE lock to avoid doing the potential expensive TLB invalidate
* and page-free while holding it.
*/
if (force_flush) {
force_flush = 0;
tlb_flush_mmu(tlb);
if (address < end && !ref_page)
goto again;
tlb_remove_page_size(tlb, page, huge_page_size(h));
/*
* Bail out after unmapping reference page if supplied
*/
if (ref_page)
break;
}
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
tlb_end_vma(tlb, vma);

Zobrazit soubor

@@ -36,6 +36,8 @@
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
int do_swap_page(struct fault_env *fe, pte_t orig_pte);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
@@ -150,6 +152,8 @@ extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
unsigned int order);
extern void prep_compound_page(struct page *page, unsigned int order);
extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
extern int user_min_free_kbytes;
#if defined CONFIG_COMPACTION || defined CONFIG_CMA

1922
mm/khugepaged.c Normální soubor

Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání

Zobrazit soubor

@@ -376,9 +376,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
if (IS_ERR_OR_NULL(page))
break;
if (PageKsm(page))
ret = handle_mm_fault(vma->vm_mm, vma, addr,
FAULT_FLAG_WRITE |
FAULT_FLAG_REMOTE);
ret = handle_mm_fault(vma, addr,
FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
else
ret = VM_FAULT_WRITE;
put_page(page);
@@ -532,8 +531,8 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
void *expected_mapping;
unsigned long kpfn;
expected_mapping = (void *)stable_node +
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
expected_mapping = (void *)((unsigned long)stable_node |
PAGE_MAPPING_KSM);
again:
kpfn = READ_ONCE(stable_node->kpfn);
page = pfn_to_page(kpfn);

Zobrazit soubor

@@ -584,6 +584,9 @@ repeat:
nid, flags);
}
if (!nr_new)
return 0;
/*
* If this was the first round, resize array and repeat for actual
* insertions; otherwise, merge and return.

Zobrazit soubor

@@ -1259,6 +1259,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
struct oom_control oc = {
.zonelist = NULL,
.nodemask = NULL,
.memcg = memcg,
.gfp_mask = gfp_mask,
.order = order,
};
@@ -1281,7 +1282,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
goto unlock;
}
check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
@@ -1289,7 +1290,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
css_task_iter_start(&iter->css, &it);
while ((task = css_task_iter_next(&it))) {
switch (oom_scan_process_thread(&oc, task, totalpages)) {
switch (oom_scan_process_thread(&oc, task)) {
case OOM_SCAN_SELECT:
if (chosen)
put_task_struct(chosen);
@@ -1329,7 +1330,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (chosen) {
points = chosen_points * 1000 / totalpages;
oom_kill_process(&oc, chosen, points, totalpages, memcg,
oom_kill_process(&oc, chosen, points, totalpages,
"Memory cgroup out of memory");
}
unlock:
@@ -2272,20 +2273,30 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
current->memcg_kmem_skip_account = 0;
}
/*
static inline bool memcg_kmem_bypass(void)
{
if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
return true;
return false;
}
/**
* memcg_kmem_get_cache: select the correct per-memcg cache for allocation
* @cachep: the original global kmem cache
*
* Return the kmem_cache we're supposed to use for a slab allocation.
* We try to use the current memcg's version of the cache.
*
* If the cache does not exist yet, if we are the first user of it,
* we either create it immediately, if possible, or create it asynchronously
* in a workqueue.
* In the latter case, we will let the current allocation go through with
* the original cache.
* If the cache does not exist yet, if we are the first user of it, we
* create it asynchronously in a workqueue and let the current allocation
* go through with the original cache.
*
* Can't be called in interrupt context or from kernel threads.
* This function needs to be called with rcu_read_lock() held.
* This function takes a reference to the cache it returns to assure it
* won't get destroyed while we are working with it. Once the caller is
* done with it, memcg_kmem_put_cache() must be called to release the
* reference.
*/
struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
{
struct mem_cgroup *memcg;
struct kmem_cache *memcg_cachep;
@@ -2293,10 +2304,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
VM_BUG_ON(!is_root_cache(cachep));
if (cachep->flags & SLAB_ACCOUNT)
gfp |= __GFP_ACCOUNT;
if (!(gfp & __GFP_ACCOUNT))
if (memcg_kmem_bypass())
return cachep;
if (current->memcg_kmem_skip_account)
@@ -2329,14 +2337,27 @@ out:
return cachep;
}
void __memcg_kmem_put_cache(struct kmem_cache *cachep)
/**
* memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
* @cachep: the cache returned by memcg_kmem_get_cache
*/
void memcg_kmem_put_cache(struct kmem_cache *cachep)
{
if (!is_root_cache(cachep))
css_put(&cachep->memcg_params.memcg->css);
}
int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
struct mem_cgroup *memcg)
/**
* memcg_kmem_charge: charge a kmem page
* @page: page to charge
* @gfp: reclaim mode
* @order: allocation order
* @memcg: memory cgroup to charge
*
* Returns 0 on success, an error code on failure.
*/
int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
struct mem_cgroup *memcg)
{
unsigned int nr_pages = 1 << order;
struct page_counter *counter;
@@ -2357,19 +2378,34 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
return 0;
}
int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
/**
* memcg_kmem_charge: charge a kmem page to the current memory cgroup
* @page: page to charge
* @gfp: reclaim mode
* @order: allocation order
*
* Returns 0 on success, an error code on failure.
*/
int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
{
struct mem_cgroup *memcg;
int ret = 0;
if (memcg_kmem_bypass())
return 0;
memcg = get_mem_cgroup_from_mm(current->mm);
if (!mem_cgroup_is_root(memcg))
ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
css_put(&memcg->css);
return ret;
}
void __memcg_kmem_uncharge(struct page *page, int order)
/**
* memcg_kmem_uncharge: uncharge a kmem page
* @page: page to uncharge
* @order: allocation order
*/
void memcg_kmem_uncharge(struct page *page, int order)
{
struct mem_cgroup *memcg = page->mem_cgroup;
unsigned int nr_pages = 1 << order;
@@ -4409,7 +4445,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
#ifdef CONFIG_SWAP
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
pte_t ptent, swp_entry_t *entry)
{
struct page *page = NULL;
swp_entry_t ent = pte_to_swp_entry(ptent);
@@ -4428,7 +4464,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
}
#else
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
pte_t ptent, swp_entry_t *entry)
{
return NULL;
}
@@ -4471,7 +4507,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
/**
* mem_cgroup_move_account - move account of the page
* @page: the page
* @nr_pages: number of regular pages (>1 for huge pages)
* @compound: charge the page as compound or small page
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
*
@@ -4593,7 +4629,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
if (pte_present(ptent))
page = mc_handle_present_pte(vma, addr, ptent);
else if (is_swap_pte(ptent))
page = mc_handle_swap_pte(vma, addr, ptent, &ent);
page = mc_handle_swap_pte(vma, ptent, &ent);
else if (pte_none(ptent))
page = mc_handle_file_pte(vma, addr, ptent, &ent);
@@ -5333,6 +5369,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
* @mm: mm context of the victim
* @gfp_mask: reclaim mode
* @memcgp: charged memcg return
* @compound: charge the page as compound or small page
*
* Try to charge @page to the memcg that @mm belongs to, reclaiming
* pages according to @gfp_mask if necessary.
@@ -5395,6 +5432,7 @@ out:
* @page: page to charge
* @memcg: memcg to charge the page to
* @lrucare: page might be on LRU already
* @compound: charge the page as compound or small page
*
* Finalize a charge transaction started by mem_cgroup_try_charge(),
* after page->mapping has been set up. This must happen atomically
@@ -5446,6 +5484,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
* mem_cgroup_cancel_charge - cancel a page charge
* @page: page to charge
* @memcg: memcg to charge the page to
* @compound: charge the page as compound or small page
*
* Cancel a charge transaction started by mem_cgroup_try_charge().
*/
@@ -5469,15 +5508,18 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
unsigned long nr_huge, unsigned long nr_kmem,
struct page *dummy_page)
{
unsigned long nr_pages = nr_anon + nr_file;
unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
unsigned long flags;
if (!mem_cgroup_is_root(memcg)) {
page_counter_uncharge(&memcg->memory, nr_pages);
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && nr_kmem)
page_counter_uncharge(&memcg->kmem, nr_kmem);
memcg_oom_recover(memcg);
}
@@ -5500,6 +5542,7 @@ static void uncharge_list(struct list_head *page_list)
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long nr_kmem = 0;
unsigned long pgpgout = 0;
struct list_head *next;
struct page *page;
@@ -5510,8 +5553,6 @@ static void uncharge_list(struct list_head *page_list)
*/
next = page_list->next;
do {
unsigned int nr_pages = 1;
page = list_entry(next, struct page, lru);
next = page->lru.next;
@@ -5530,31 +5571,34 @@ static void uncharge_list(struct list_head *page_list)
if (memcg != page->mem_cgroup) {
if (memcg) {
uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
nr_huge, page);
pgpgout = nr_anon = nr_file = nr_huge = 0;
nr_huge, nr_kmem, page);
pgpgout = nr_anon = nr_file =
nr_huge = nr_kmem = 0;
}
memcg = page->mem_cgroup;
}
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
nr_huge += nr_pages;
}
if (!PageKmemcg(page)) {
unsigned int nr_pages = 1;
if (PageAnon(page))
nr_anon += nr_pages;
else
nr_file += nr_pages;
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
nr_huge += nr_pages;
}
if (PageAnon(page))
nr_anon += nr_pages;
else
nr_file += nr_pages;
pgpgout++;
} else
nr_kmem += 1 << compound_order(page);
page->mem_cgroup = NULL;
pgpgout++;
} while (next != page_list);
if (memcg)
uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
nr_huge, page);
nr_huge, nr_kmem, page);
}
/**

Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání

Zobrazit soubor

@@ -449,6 +449,25 @@ out_fail:
return -1;
}
static struct zone * __meminit move_pfn_range(int zone_shift,
unsigned long start_pfn, unsigned long end_pfn)
{
struct zone *zone = page_zone(pfn_to_page(start_pfn));
int ret = 0;
if (zone_shift < 0)
ret = move_pfn_range_left(zone + zone_shift, zone,
start_pfn, end_pfn);
else if (zone_shift)
ret = move_pfn_range_right(zone, zone + zone_shift,
start_pfn, end_pfn);
if (ret)
return NULL;
return zone + zone_shift;
}
static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
unsigned long end_pfn)
{
@@ -1028,6 +1047,37 @@ static void node_states_set_node(int node, struct memory_notify *arg)
node_set_state(node, N_MEMORY);
}
int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
enum zone_type target)
{
struct zone *zone = page_zone(pfn_to_page(pfn));
enum zone_type idx = zone_idx(zone);
int i;
if (idx < target) {
/* pages must be at end of current zone */
if (pfn + nr_pages != zone_end_pfn(zone))
return 0;
/* no zones in use between current zone and target */
for (i = idx + 1; i < target; i++)
if (zone_is_initialized(zone - idx + i))
return 0;
}
if (target < idx) {
/* pages must be at beginning of current zone */
if (pfn != zone->zone_start_pfn)
return 0;
/* no zones in use between current zone and target */
for (i = target + 1; i < idx; i++)
if (zone_is_initialized(zone - idx + i))
return 0;
}
return target - idx;
}
/* Must be protected by mem_hotplug_begin() */
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
@@ -1039,6 +1089,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
int nid;
int ret;
struct memory_notify arg;
int zone_shift = 0;
/*
* This doesn't need a lock to do pfn_to_page().
@@ -1052,19 +1103,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
!can_online_high_movable(zone))
return -EINVAL;
if (online_type == MMOP_ONLINE_KERNEL &&
zone_idx(zone) == ZONE_MOVABLE) {
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
return -EINVAL;
}
if (online_type == MMOP_ONLINE_MOVABLE &&
zone_idx(zone) == ZONE_MOVABLE - 1) {
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
return -EINVAL;
}
if (online_type == MMOP_ONLINE_KERNEL)
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
else if (online_type == MMOP_ONLINE_MOVABLE)
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
/* Previous code may changed the zone of the pfn range */
zone = page_zone(pfn_to_page(pfn));
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
if (!zone)
return -EINVAL;
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;

Zobrazit soubor

@@ -512,6 +512,8 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
}
}
if (pmd_trans_unstable(pmd))
return 0;
retry:
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -529,7 +531,7 @@ retry:
nid = page_to_nid(page);
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
continue;
if (PageTransCompound(page) && PageAnon(page)) {
if (PageTransCompound(page)) {
get_page(page);
pte_unmap_unlock(pte, ptl);
lock_page(page);

Zobrazit soubor

@@ -31,6 +31,7 @@
#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
#include <linux/compaction.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
@@ -73,6 +74,81 @@ int migrate_prep_local(void)
return 0;
}
bool isolate_movable_page(struct page *page, isolate_mode_t mode)
{
struct address_space *mapping;
/*
* Avoid burning cycles with pages that are yet under __free_pages(),
* or just got freed under us.
*
* In case we 'win' a race for a movable page being freed under us and
* raise its refcount preventing __free_pages() from doing its job
* the put_page() at the end of this block will take care of
* release this page, thus avoiding a nasty leakage.
*/
if (unlikely(!get_page_unless_zero(page)))
goto out;
/*
* Check PageMovable before holding a PG_lock because page's owner
* assumes anybody doesn't touch PG_lock of newly allocated page
* so unconditionally grapping the lock ruins page's owner side.
*/
if (unlikely(!__PageMovable(page)))
goto out_putpage;
/*
* As movable pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
* as well as race against the releasing a page.
*
* In order to avoid having an already isolated movable page
* being (wrongly) re-isolated while it is under migration,
* or to avoid attempting to isolate pages being released,
* lets be sure we have the page lock
* before proceeding with the movable page isolation steps.
*/
if (unlikely(!trylock_page(page)))
goto out_putpage;
if (!PageMovable(page) || PageIsolated(page))
goto out_no_isolated;
mapping = page_mapping(page);
VM_BUG_ON_PAGE(!mapping, page);
if (!mapping->a_ops->isolate_page(page, mode))
goto out_no_isolated;
/* Driver shouldn't use PG_isolated bit of page->flags */
WARN_ON_ONCE(PageIsolated(page));
__SetPageIsolated(page);
unlock_page(page);
return true;
out_no_isolated:
unlock_page(page);
out_putpage:
put_page(page);
out:
return false;
}
/* It should be called on page which is PG_movable */
void putback_movable_page(struct page *page)
{
struct address_space *mapping;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
mapping = page_mapping(page);
mapping->a_ops->putback_page(page);
__ClearPageIsolated(page);
}
/*
* Put previously isolated pages back onto the appropriate lists
* from where they were once taken off for compaction/migration.
@@ -94,10 +170,23 @@ void putback_movable_pages(struct list_head *l)
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
if (unlikely(isolated_balloon_page(page)))
balloon_page_putback(page);
else
/*
* We isolated non-lru movable page so here we can use
* __PageMovable because LRU page's mapping cannot have
* PAGE_MAPPING_MOVABLE.
*/
if (unlikely(__PageMovable(page))) {
VM_BUG_ON_PAGE(!PageIsolated(page), page);
lock_page(page);
if (PageMovable(page))
putback_movable_page(page);
else
__ClearPageIsolated(page);
unlock_page(page);
put_page(page);
} else {
putback_lru_page(page);
}
}
}
@@ -170,7 +259,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
} else if (PageAnon(new))
page_add_anon_rmap(new, vma, addr, false);
else
page_add_file_rmap(new);
page_add_file_rmap(new, false);
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);
@@ -594,7 +683,7 @@ EXPORT_SYMBOL(migrate_page_copy);
***********************************************************/
/*
* Common logic to directly migrate a single page suitable for
* Common logic to directly migrate a single LRU page suitable for
* pages that do not use PagePrivate/PagePrivate2.
*
* Pages are locked upon entry and exit.
@@ -757,33 +846,72 @@ static int move_to_new_page(struct page *newpage, struct page *page,
enum migrate_mode mode)
{
struct address_space *mapping;
int rc;
int rc = -EAGAIN;
bool is_lru = !__PageMovable(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
else if (mapping->a_ops->migratepage)
if (likely(is_lru)) {
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
else if (mapping->a_ops->migratepage)
/*
* Most pages have a mapping and most filesystems
* provide a migratepage callback. Anonymous pages
* are part of swap space which also has its own
* migratepage callback. This is the most common path
* for page migration.
*/
rc = mapping->a_ops->migratepage(mapping, newpage,
page, mode);
else
rc = fallback_migrate_page(mapping, newpage,
page, mode);
} else {
/*
* Most pages have a mapping and most filesystems provide a
* migratepage callback. Anonymous pages are part of swap
* space which also has its own migratepage callback. This
* is the most common path for page migration.
* In case of non-lru page, it could be released after
* isolation step. In that case, we shouldn't try migration.
*/
rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
else
rc = fallback_migrate_page(mapping, newpage, page, mode);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
if (!PageMovable(page)) {
rc = MIGRATEPAGE_SUCCESS;
__ClearPageIsolated(page);
goto out;
}
rc = mapping->a_ops->migratepage(mapping, newpage,
page, mode);
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
!PageIsolated(page));
}
/*
* When successful, old pagecache page->mapping must be cleared before
* page is freed; but stats require that PageAnon be left as PageAnon.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
if (!PageAnon(page))
if (__PageMovable(page)) {
VM_BUG_ON_PAGE(!PageIsolated(page), page);
/*
* We clear PG_movable under page_lock so any compactor
* cannot try to migrate this page.
*/
__ClearPageIsolated(page);
}
/*
* Anonymous and movable page->mapping will be cleard by
* free_pages_prepare so don't reset it here for keeping
* the type to work PageAnon, for example.
*/
if (!PageMappingFlags(page))
page->mapping = NULL;
}
out:
return rc;
}
@@ -793,6 +921,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
int rc = -EAGAIN;
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(page);
if (!trylock_page(page)) {
if (!force || mode == MIGRATE_ASYNC)
@@ -861,15 +990,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
if (unlikely(!trylock_page(newpage)))
goto out_unlock;
if (unlikely(isolated_balloon_page(page))) {
/*
* A ballooned page does not need any special attention from
* physical to virtual reverse mapping procedures.
* Skip any attempt to unmap PTEs or to remap swap cache,
* in order to avoid burning cycles at rmap level, and perform
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
if (unlikely(!is_lru)) {
rc = move_to_new_page(newpage, page, mode);
goto out_unlock_both;
}
@@ -915,6 +1037,19 @@ out_unlock:
put_anon_vma(anon_vma);
unlock_page(page);
out:
/*
* If migration is successful, decrease refcount of the newpage
* which will not free the page because new page owner increased
* refcounter. As well, if it is LRU page, add the page to LRU
* list in here.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
if (unlikely(__PageMovable(newpage)))
put_page(newpage);
else
putback_lru_page(newpage);
}
return rc;
}
@@ -948,6 +1083,18 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
if (page_count(page) == 1) {
/* page was freed from under us. So we are done. */
ClearPageActive(page);
ClearPageUnevictable(page);
if (unlikely(__PageMovable(page))) {
lock_page(page);
if (!PageMovable(page))
__ClearPageIsolated(page);
unlock_page(page);
}
if (put_new_page)
put_new_page(newpage, private);
else
put_page(newpage);
goto out;
}
@@ -960,10 +1107,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
}
rc = __unmap_and_move(page, newpage, force, mode);
if (rc == MIGRATEPAGE_SUCCESS) {
put_new_page = NULL;
if (rc == MIGRATEPAGE_SUCCESS)
set_page_owner_migrate_reason(newpage, reason);
}
out:
if (rc != -EAGAIN) {
@@ -976,33 +1121,45 @@ out:
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
/* Soft-offlined page shouldn't go through lru cache list */
if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
/*
* With this release, we free successfully migrated
* page and set PG_HWPoison on just freed page
* intentionally. Although it's rather weird, it's how
* HWPoison flag works at the moment.
*/
put_page(page);
if (!test_set_page_hwpoison(page))
num_poisoned_pages_inc();
} else
putback_lru_page(page);
}
/*
* If migration was not successful and there's a freeing callback, use
* it. Otherwise, putback_lru_page() will drop the reference grabbed
* during isolation.
* If migration is successful, releases reference grabbed during
* isolation. Otherwise, restore the page to right list unless
* we want to retry.
*/
if (put_new_page)
put_new_page(newpage, private);
else if (unlikely(__is_movable_balloon_page(newpage))) {
/* drop our reference, page already in the balloon */
put_page(newpage);
} else
putback_lru_page(newpage);
if (rc == MIGRATEPAGE_SUCCESS) {
put_page(page);
if (reason == MR_MEMORY_FAILURE) {
/*
* Set PG_HWPoison on just freed page
* intentionally. Although it's rather weird,
* it's how HWPoison flag works at the moment.
*/
if (!test_set_page_hwpoison(page))
num_poisoned_pages_inc();
}
} else {
if (rc != -EAGAIN) {
if (likely(!__PageMovable(page))) {
putback_lru_page(page);
goto put_new;
}
lock_page(page);
if (PageMovable(page))
putback_movable_page(page);
else
__ClearPageIsolated(page);
unlock_page(page);
put_page(page);
}
put_new:
if (put_new_page)
put_new_page(newpage, private);
else
put_page(newpage);
}
if (result) {
if (rc)
@@ -1829,8 +1986,7 @@ fail_putback:
}
orig_entry = *pmd;
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
/*

Zobrazit soubor

@@ -25,6 +25,7 @@
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/shmem_fs.h>
#include <linux/profile.h>
#include <linux/export.h>
#include <linux/mount.h>
@@ -675,6 +676,8 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
vma_adjust_trans_huge(vma, start, end, adjust_next);
if (file) {
mapping = file->f_mapping;
root = &mapping->i_mmap;
@@ -695,8 +698,6 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
vma_adjust_trans_huge(vma, start, end, adjust_next);
anon_vma = vma->anon_vma;
if (!anon_vma && adjust_next)
anon_vma = next->anon_vma;
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
return -ENOMEM;
get_area = current->mm->get_unmapped_area;
if (file && file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
if (file) {
if (file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
} else if (flags & MAP_SHARED) {
/*
* mmap_region() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge.
* do_mmap_pgoff() will clear pgoff, so match alignment.
*/
pgoff = 0;
get_area = shmem_get_unmapped_area;
}
addr = get_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
@@ -2591,6 +2603,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
/* drop PG_Mlocked flag for over-mapped range */
for (tmp = vma; tmp->vm_start >= start + size;
tmp = tmp->vm_next) {
/*
* Split pmd and munlock page on the border
* of the range.
*/
vma_adjust_trans_huge(tmp, start, start + size, 0);
munlock_vma_pages_range(tmp,
max(tmp->vm_start, start),
min(tmp->vm_end, start + size));

Zobrazit soubor

@@ -163,7 +163,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) {
split_huge_pmd(vma, pmd, addr);
if (pmd_none(*pmd))
if (pmd_trans_unstable(pmd))
continue;
} else {
int nr_ptes = change_huge_pmd(vma, pmd, addr,

Zobrazit soubor

@@ -210,9 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
}
}
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_none(*old_pmd))
if (pmd_trans_unstable(old_pmd))
continue;
VM_BUG_ON(pmd_trans_huge(*old_pmd));
}
if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
break;

Zobrazit soubor

@@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
void filemap_map_pages(struct fault_env *fe,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
BUG();
}

Zobrazit soubor

@@ -274,7 +274,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
#endif
enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
struct task_struct *task, unsigned long totalpages)
struct task_struct *task)
{
if (oom_unkillable_task(task, NULL, oc->nodemask))
return OOM_SCAN_CONTINUE;
@@ -311,7 +311,7 @@ static struct task_struct *select_bad_process(struct oom_control *oc,
for_each_process(p) {
unsigned int points;
switch (oom_scan_process_thread(oc, p, totalpages)) {
switch (oom_scan_process_thread(oc, p)) {
case OOM_SCAN_SELECT:
chosen = p;
chosen_points = ULONG_MAX;
@@ -383,8 +383,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
rcu_read_unlock();
}
static void dump_header(struct oom_control *oc, struct task_struct *p,
struct mem_cgroup *memcg)
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
@@ -392,12 +391,12 @@ static void dump_header(struct oom_control *oc, struct task_struct *p,
cpuset_print_current_mems_allowed();
dump_stack();
if (memcg)
mem_cgroup_print_oom_info(memcg, p);
if (oc->memcg)
mem_cgroup_print_oom_info(oc->memcg, p);
else
show_mem(SHOW_MEM_FILTER_NODES);
if (sysctl_oom_dump_tasks)
dump_tasks(memcg, oc->nodemask);
dump_tasks(oc->memcg, oc->nodemask);
}
/*
@@ -453,7 +452,7 @@ static bool __oom_reap_task(struct task_struct *tsk)
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
* __oom_reap_task exit_mm
* atomic_inc_not_zero
* mmget_not_zero
* mmput
* atomic_dec_and_test
* exit_oom_victim
@@ -475,12 +474,22 @@ static bool __oom_reap_task(struct task_struct *tsk)
if (!p)
goto unlock_oom;
mm = p->mm;
atomic_inc(&mm->mm_users);
atomic_inc(&mm->mm_count);
task_unlock(p);
if (!down_read_trylock(&mm->mmap_sem)) {
ret = false;
goto unlock_oom;
goto mm_drop;
}
/*
* increase mm_users only after we know we will reap something so
* that the mmput_async is called only when we have reaped something
* and delayed __mmput doesn't matter that much
*/
if (!mmget_not_zero(mm)) {
up_read(&mm->mmap_sem);
goto mm_drop;
}
tlb_gather_mmu(&tlb, mm, 0, -1);
@@ -522,15 +531,16 @@ static bool __oom_reap_task(struct task_struct *tsk)
* to release its memory.
*/
set_bit(MMF_OOM_REAPED, &mm->flags);
unlock_oom:
mutex_unlock(&oom_lock);
/*
* Drop our reference but make sure the mmput slow path is called from a
* different context because we shouldn't risk we get stuck there and
* put the oom_reaper out of the way.
*/
if (mm)
mmput_async(mm);
mmput_async(mm);
mm_drop:
mmdrop(mm);
unlock_oom:
mutex_unlock(&oom_lock);
return ret;
}
@@ -739,7 +749,7 @@ void oom_killer_enable(void)
*/
void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
struct mem_cgroup *memcg, const char *message)
const char *message)
{
struct task_struct *victim = p;
struct task_struct *child;
@@ -765,7 +775,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
task_unlock(p);
if (__ratelimit(&oom_rs))
dump_header(oc, p, memcg);
dump_header(oc, p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
message, task_pid_nr(p), p->comm, points);
@@ -786,8 +796,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
/*
* oom_badness() returns 0 if the thread is unkillable
*/
child_points = oom_badness(child, memcg, oc->nodemask,
totalpages);
child_points = oom_badness(child,
oc->memcg, oc->nodemask, totalpages);
if (child_points > victim_points) {
put_task_struct(victim);
victim = child;
@@ -865,8 +875,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
struct mem_cgroup *memcg)
void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint)
{
if (likely(!sysctl_panic_on_oom))
return;
@@ -882,7 +891,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
/* Do not panic for oom kills triggered by sysrq */
if (is_sysrq_oom(oc))
return;
dump_header(oc, NULL, memcg);
dump_header(oc, NULL);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
@@ -957,13 +966,13 @@ bool out_of_memory(struct oom_control *oc)
constraint = constrained_alloc(oc, &totalpages);
if (constraint != CONSTRAINT_MEMORY_POLICY)
oc->nodemask = NULL;
check_panic_on_oom(oc, constraint, NULL);
check_panic_on_oom(oc, constraint);
if (sysctl_oom_kill_allocating_task && current->mm &&
!oom_unkillable_task(current, NULL, oc->nodemask) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
oom_kill_process(oc, current, 0, totalpages, NULL,
oom_kill_process(oc, current, 0, totalpages,
"Out of memory (oom_kill_allocating_task)");
return true;
}
@@ -971,12 +980,11 @@ bool out_of_memory(struct oom_control *oc)
p = select_bad_process(oc, &points, totalpages);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p && !is_sysrq_oom(oc)) {
dump_header(oc, NULL, NULL);
dump_header(oc, NULL);
panic("Out of memory and no killable processes...\n");
}
if (p && p != (void *)-1UL) {
oom_kill_process(oc, p, points, totalpages, NULL,
"Out of memory");
oom_kill_process(oc, p, points, totalpages, "Out of memory");
/*
* Give the killed process a good chance to exit before trying
* to allocate memory again.
@@ -988,14 +996,15 @@ bool out_of_memory(struct oom_control *oc)
/*
* The pagefault handler calls here because it is out of memory, so kill a
* memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
* parallel oom killing is already in progress so do nothing.
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom
* killing is already in progress so do nothing.
*/
void pagefault_out_of_memory(void)
{
struct oom_control oc = {
.zonelist = NULL,
.nodemask = NULL,
.memcg = NULL,
.gfp_mask = 0,
.order = 0,
};

Zobrazit soubor

@@ -2563,6 +2563,7 @@ int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
page = compound_head(page);
if (likely(mapping)) {
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
/*
@@ -2747,6 +2748,11 @@ int test_clear_page_writeback(struct page *page)
__wb_writeout_inc(wb);
}
}
if (mapping->host && !mapping_tagged(mapping,
PAGECACHE_TAG_WRITEBACK))
sb_clear_inode_writeback(mapping->host);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestClearPageWriteback(page);
@@ -2774,11 +2780,24 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
spin_lock_irqsave(&mapping->tree_lock, flags);
ret = TestSetPageWriteback(page);
if (!ret) {
bool on_wblist;
on_wblist = mapping_tagged(mapping,
PAGECACHE_TAG_WRITEBACK);
radix_tree_tag_set(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_WRITEBACK);
if (bdi_cap_account_writeback(bdi))
__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
/*
* We can come through here when swapping anonymous
* pages, so we don't necessarily have an inode to track
* for sync.
*/
if (mapping->host && !on_wblist)
sb_mark_inode_writeback(mapping->host);
}
if (!PageDirty(page))
radix_tree_tag_clear(&mapping->page_tree,

Zobrazit soubor

@@ -63,6 +63,7 @@
#include <linux/sched/rt.h>
#include <linux/page_owner.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -1006,6 +1007,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
if (compound)
ClearPageDoubleMap(page);
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
@@ -1016,8 +1019,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
}
}
if (PageAnonHead(page))
if (PageMappingFlags(page))
page->mapping = NULL;
if (memcg_kmem_enabled() && PageKmemcg(page)) {
memcg_kmem_uncharge(page, order);
__ClearPageKmemcg(page);
}
if (check_free)
bad += free_pages_check(page);
if (bad)
@@ -1724,6 +1731,19 @@ static bool check_new_pages(struct page *page, unsigned int order)
return false;
}
inline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
set_page_private(page, 0);
set_page_refcounted(page);
arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
set_page_owner(page, order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
unsigned int alloc_flags)
{
@@ -1736,13 +1756,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
poisoned &= page_is_poisoned(p);
}
set_page_private(page, 0);
set_page_refcounted(page);
arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
post_alloc_hook(page, order, gfp_flags);
if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
for (i = 0; i < (1 << order); i++)
@@ -1751,8 +1765,6 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
set_page_owner(page, order, gfp_flags);
/*
* page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
* allocate the page. The expectation is that the caller is taking
@@ -2461,7 +2473,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
void split_page(struct page *page, unsigned int order)
{
int i;
gfp_t gfp_mask;
VM_BUG_ON_PAGE(PageCompound(page), page);
VM_BUG_ON_PAGE(!page_count(page), page);
@@ -2475,12 +2486,9 @@ void split_page(struct page *page, unsigned int order)
split_page(virt_to_page(page[0].shadow), order);
#endif
gfp_mask = get_page_owner_gfp(page);
set_page_owner(page, 0, gfp_mask);
for (i = 1; i < (1 << order); i++) {
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
set_page_owner(page + i, 0, gfp_mask);
}
split_page_owner(page, order);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -2509,8 +2517,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
zone->free_area[order].nr_free--;
rmv_page_order(page);
set_page_owner(page, order, __GFP_MOVABLE);
/* Set the pageblock if the isolated page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
@@ -2526,33 +2532,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
return 1UL << order;
}
/*
* Similar to split_page except the page is already free. As this is only
* being used for migration, the migratetype of the block also changes.
* As this is called with interrupts disabled, the caller is responsible
* for calling arch_alloc_page() and kernel_map_page() after interrupts
* are enabled.
*
* Note: this is probably too low level an operation for use in drivers.
* Please consult with lkml before using this in your driver.
*/
int split_free_page(struct page *page)
{
unsigned int order;
int nr_pages;
order = page_order(page);
nr_pages = __isolate_free_page(page, order);
if (!nr_pages)
return 0;
/* Split into individual pages */
set_page_refcounted(page);
split_page(page, order);
return nr_pages;
}
/*
* Update NUMA hit/miss statistics
*
@@ -3105,6 +3084,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
struct oom_control oc = {
.zonelist = ac->zonelist,
.nodemask = ac->nodemask,
.memcg = NULL,
.gfp_mask = gfp_mask,
.order = order,
};
@@ -3868,6 +3848,14 @@ no_zone:
}
out:
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
__free_pages(page, order);
page = NULL;
} else
__SetPageKmemcg(page);
}
if (kmemcheck_enabled && page)
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
@@ -4023,56 +4011,6 @@ void __free_page_frag(void *addr)
}
EXPORT_SYMBOL(__free_page_frag);
/*
* alloc_kmem_pages charges newly allocated pages to the kmem resource counter
* of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
* equivalent to alloc_pages.
*
* It should be used when the caller would like to use kmalloc, but since the
* allocation is large, it has to fall back to the page allocator.
*/
struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
{
struct page *page;
page = alloc_pages(gfp_mask, order);
if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
__free_pages(page, order);
page = NULL;
}
return page;
}
struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
struct page *page;
page = alloc_pages_node(nid, gfp_mask, order);
if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
__free_pages(page, order);
page = NULL;
}
return page;
}
/*
* __free_kmem_pages and free_kmem_pages will free pages allocated with
* alloc_kmem_pages.
*/
void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge(page, order);
__free_pages(page, order);
}
void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_kmem_pages(virt_to_page((void *)addr), order);
}
}
static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{
@@ -4374,6 +4312,9 @@ void show_free_areas(unsigned int filter)
" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
" anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
#endif
" free:%lu free_pcp:%lu free_cma:%lu\n",
global_page_state(NR_ACTIVE_ANON),
global_page_state(NR_INACTIVE_ANON),
@@ -4391,6 +4332,11 @@ void show_free_areas(unsigned int filter)
global_page_state(NR_SHMEM),
global_page_state(NR_PAGETABLE),
global_page_state(NR_BOUNCE),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR,
global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR,
global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR,
#endif
global_page_state(NR_FREE_PAGES),
free_pcp,
global_page_state(NR_FREE_CMA_PAGES));
@@ -4425,6 +4371,11 @@ void show_free_areas(unsigned int filter)
" writeback:%lukB"
" mapped:%lukB"
" shmem:%lukB"
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
" shmem_thp: %lukB"
" shmem_pmdmapped: %lukB"
" anon_thp: %lukB"
#endif
" slab_reclaimable:%lukB"
" slab_unreclaimable:%lukB"
" kernel_stack:%lukB"
@@ -4457,6 +4408,12 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_WRITEBACK)),
K(zone_page_state(zone, NR_FILE_MAPPED)),
K(zone_page_state(zone, NR_SHMEM)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(zone_page_state(zone, NR_SHMEM_THPS) * HPAGE_PMD_NR),
K(zone_page_state(zone, NR_SHMEM_PMDMAPPED)
* HPAGE_PMD_NR),
K(zone_page_state(zone, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif
K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
zone_page_state(zone, NR_KERNEL_STACK) *
@@ -6467,15 +6424,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
sizeof(arch_zone_lowest_possible_pfn));
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
for (i = 1; i < MAX_NR_ZONES; i++) {
start_pfn = find_min_pfn_with_active_regions();
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
arch_zone_lowest_possible_pfn[i] =
arch_zone_highest_possible_pfn[i-1];
arch_zone_highest_possible_pfn[i] =
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
end_pfn = max(max_zone_pfn[i], start_pfn);
arch_zone_lowest_possible_pfn[i] = start_pfn;
arch_zone_highest_possible_pfn[i] = end_pfn;
start_pfn = end_pfn;
}
arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;

Zobrazit soubor

@@ -7,6 +7,7 @@
#include <linux/pageblock-flags.h>
#include <linux/memory.h>
#include <linux/hugetlb.h>
#include <linux/page_owner.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -80,7 +81,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
{
struct zone *zone;
unsigned long flags, nr_pages;
struct page *isolated_page = NULL;
bool isolated_page = false;
unsigned int order;
unsigned long page_idx, buddy_idx;
struct page *buddy;
@@ -108,9 +109,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (pfn_valid_within(page_to_pfn(buddy)) &&
!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
kernel_map_pages(page, (1 << order), 1);
set_page_refcounted(page);
isolated_page = page;
isolated_page = true;
}
}
}
@@ -128,8 +127,10 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
zone->nr_isolate_pageblock--;
out:
spin_unlock_irqrestore(&zone->lock, flags);
if (isolated_page)
__free_pages(isolated_page, order);
if (isolated_page) {
post_alloc_hook(page, order, __GFP_MOVABLE);
__free_pages(page, order);
}
}
static inline struct page *

Zobrazit soubor

@@ -7,11 +7,22 @@
#include <linux/page_owner.h>
#include <linux/jump_label.h>
#include <linux/migrate.h>
#include <linux/stackdepot.h>
#include "internal.h"
/*
* TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
* to use off stack temporal storage
*/
#define PAGE_OWNER_STACK_DEPTH (16)
static bool page_owner_disabled = true;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle;
static depot_stack_handle_t failure_handle;
static void init_early_allocated_pages(void);
static int early_page_owner_param(char *buf)
@@ -34,11 +45,41 @@ static bool need_page_owner(void)
return true;
}
static noinline void register_dummy_stack(void)
{
unsigned long entries[4];
struct stack_trace dummy;
dummy.nr_entries = 0;
dummy.max_entries = ARRAY_SIZE(entries);
dummy.entries = &entries[0];
dummy.skip = 0;
save_stack_trace(&dummy);
dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
}
static noinline void register_failure_stack(void)
{
unsigned long entries[4];
struct stack_trace failure;
failure.nr_entries = 0;
failure.max_entries = ARRAY_SIZE(entries);
failure.entries = &entries[0];
failure.skip = 0;
save_stack_trace(&failure);
failure_handle = depot_save_stack(&failure, GFP_KERNEL);
}
static void init_page_owner(void)
{
if (page_owner_disabled)
return;
register_dummy_stack();
register_failure_stack();
static_branch_enable(&page_owner_inited);
init_early_allocated_pages();
}
@@ -61,25 +102,66 @@ void __reset_page_owner(struct page *page, unsigned int order)
}
}
void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
static inline bool check_recursive_alloc(struct stack_trace *trace,
unsigned long ip)
{
struct page_ext *page_ext = lookup_page_ext(page);
int i, count;
if (!trace->nr_entries)
return false;
for (i = 0, count = 0; i < trace->nr_entries; i++) {
if (trace->entries[i] == ip && ++count == 2)
return true;
}
return false;
}
static noinline depot_stack_handle_t save_stack(gfp_t flags)
{
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
struct stack_trace trace = {
.nr_entries = 0,
.max_entries = ARRAY_SIZE(page_ext->trace_entries),
.entries = &page_ext->trace_entries[0],
.skip = 3,
.entries = entries,
.max_entries = PAGE_OWNER_STACK_DEPTH,
.skip = 0
};
depot_stack_handle_t handle;
save_stack_trace(&trace);
if (trace.nr_entries != 0 &&
trace.entries[trace.nr_entries-1] == ULONG_MAX)
trace.nr_entries--;
/*
* We need to check recursion here because our request to stackdepot
* could trigger memory allocation to save new entry. New memory
* allocation would reach here and call depot_save_stack() again
* if we don't catch it. There is still not enough memory in stackdepot
* so it would try to allocate memory again and loop forever.
*/
if (check_recursive_alloc(&trace, _RET_IP_))
return dummy_handle;
handle = depot_save_stack(&trace, flags);
if (!handle)
handle = failure_handle;
return handle;
}
noinline void __set_page_owner(struct page *page, unsigned int order,
gfp_t gfp_mask)
{
struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
save_stack_trace(&trace);
page_ext->handle = save_stack(gfp_mask);
page_ext->order = order;
page_ext->gfp_mask = gfp_mask;
page_ext->nr_entries = trace.nr_entries;
page_ext->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
@@ -94,34 +176,31 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
page_ext->last_migrate_reason = reason;
}
gfp_t __get_page_owner_gfp(struct page *page)
void __split_page_owner(struct page *page, unsigned int order)
{
int i;
struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask;
if (unlikely(!page_ext))
return;
page_ext->order = 0;
for (i = 1; i < (1 << order); i++)
__copy_page_owner(page, page + i);
}
void __copy_page_owner(struct page *oldpage, struct page *newpage)
{
struct page_ext *old_ext = lookup_page_ext(oldpage);
struct page_ext *new_ext = lookup_page_ext(newpage);
int i;
if (unlikely(!old_ext || !new_ext))
return;
new_ext->order = old_ext->order;
new_ext->gfp_mask = old_ext->gfp_mask;
new_ext->nr_entries = old_ext->nr_entries;
for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
new_ext->trace_entries[i] = old_ext->trace_entries[i];
new_ext->last_migrate_reason = old_ext->last_migrate_reason;
new_ext->handle = old_ext->handle;
/*
* We don't clear the bit on the oldpage as it's going to be freed
@@ -137,14 +216,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
static ssize_t
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
struct page *page, struct page_ext *page_ext)
struct page *page, struct page_ext *page_ext,
depot_stack_handle_t handle)
{
int ret;
int pageblock_mt, page_mt;
char *kbuf;
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
struct stack_trace trace = {
.nr_entries = page_ext->nr_entries,
.entries = &page_ext->trace_entries[0],
.nr_entries = 0,
.entries = entries,
.max_entries = PAGE_OWNER_STACK_DEPTH,
.skip = 0
};
kbuf = kmalloc(count, GFP_KERNEL);
@@ -173,6 +256,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
if (ret >= count)
goto err;
depot_fetch_stack(handle, &trace);
ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
if (ret >= count)
goto err;
@@ -203,10 +287,14 @@ err:
void __dump_page_owner(struct page *page)
{
struct page_ext *page_ext = lookup_page_ext(page);
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
struct stack_trace trace = {
.nr_entries = page_ext->nr_entries,
.entries = &page_ext->trace_entries[0],
.nr_entries = 0,
.entries = entries,
.max_entries = PAGE_OWNER_STACK_DEPTH,
.skip = 0
};
depot_stack_handle_t handle;
gfp_t gfp_mask;
int mt;
@@ -222,6 +310,13 @@ void __dump_page_owner(struct page *page)
return;
}
handle = READ_ONCE(page_ext->handle);
if (!handle) {
pr_alert("page_owner info is not active (free page?)\n");
return;
}
depot_fetch_stack(handle, &trace);
pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
print_stack_trace(&trace, 0);
@@ -237,6 +332,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
unsigned long pfn;
struct page *page;
struct page_ext *page_ext;
depot_stack_handle_t handle;
if (!static_branch_unlikely(&page_owner_inited))
return -EINVAL;
@@ -285,10 +381,19 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
continue;
/*
* Access to page_ext->handle isn't synchronous so we should
* be careful to access it.
*/
handle = READ_ONCE(page_ext->handle);
if (!handle)
continue;
/* Record the next PFN to read in the file offset */
*ppos = (pfn - min_low_pfn) + 1;
return print_page_owner(buf, count, pfn, page, page_ext);
return print_page_owner(buf, count, pfn, page,
page_ext, handle);
}
return 0;

Zobrazit soubor

@@ -89,7 +89,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
page = lru_to_page(pages);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping, page->index,
mapping_gfp_constraint(mapping, GFP_KERNEL))) {
readahead_gfp_mask(mapping))) {
read_cache_pages_invalidate_page(mapping, page);
continue;
}
@@ -108,7 +108,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
EXPORT_SYMBOL(read_cache_pages);
static int read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
{
struct blk_plug plug;
unsigned page_idx;
@@ -126,10 +126,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = lru_to_page(pages);
list_del(&page->lru);
if (!add_to_page_cache_lru(page, mapping, page->index,
mapping_gfp_constraint(mapping, GFP_KERNEL))) {
if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
mapping->a_ops->readpage(filp, page);
}
put_page(page);
}
ret = 0;
@@ -159,6 +157,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
int page_idx;
int ret = 0;
loff_t isize = i_size_read(inode);
gfp_t gfp_mask = readahead_gfp_mask(mapping);
if (isize == 0)
goto out;
@@ -180,7 +179,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
if (page && !radix_tree_exceptional_entry(page))
continue;
page = page_cache_alloc_readahead(mapping);
page = __page_cache_alloc(gfp_mask);
if (!page)
break;
page->index = page_offset;
@@ -196,7 +195,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
* will then handle the error.
*/
if (ret)
read_pages(mapping, filp, &page_pool, ret);
read_pages(mapping, filp, &page_pool, ret, gfp_mask);
BUG_ON(!list_empty(&page_pool));
out:
return ret;

Zobrazit soubor

@@ -1212,10 +1212,8 @@ void do_page_add_anon_rmap(struct page *page,
* pte lock(a spinlock) is held, which implies preemption
* disabled.
*/
if (compound) {
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
}
if (compound)
__inc_zone_page_state(page, NR_ANON_THPS);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
}
if (unlikely(PageKsm(page)))
@@ -1253,7 +1251,7 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__inc_zone_page_state(page, NR_ANON_THPS);
} else {
/* Anon THP always mapped first with PMD */
VM_BUG_ON_PAGE(PageTransCompound(page), page);
@@ -1270,18 +1268,42 @@ void page_add_new_anon_rmap(struct page *page,
*
* The caller needs to hold the pte lock.
*/
void page_add_file_rmap(struct page *page)
void page_add_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
if (compound && PageTransHuge(page)) {
for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
if (atomic_inc_and_test(&page[i]._mapcount))
nr++;
}
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
__inc_zone_page_state(page, NR_SHMEM_PMDMAPPED);
} else {
if (PageTransCompound(page)) {
VM_BUG_ON_PAGE(!PageLocked(page), page);
SetPageDoubleMap(compound_head(page));
if (PageMlocked(page))
clear_page_mlock(compound_head(page));
}
if (!atomic_inc_and_test(&page->_mapcount))
goto out;
}
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, nr);
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
out:
unlock_page_memcg(page);
}
static void page_remove_file_rmap(struct page *page)
static void page_remove_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
@@ -1292,15 +1314,26 @@ static void page_remove_file_rmap(struct page *page)
}
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
if (compound && PageTransHuge(page)) {
for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
if (atomic_add_negative(-1, &page[i]._mapcount))
nr++;
}
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
goto out;
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
__dec_zone_page_state(page, NR_SHMEM_PMDMAPPED);
} else {
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
}
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__dec_zone_page_state(page, NR_FILE_MAPPED);
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, -nr);
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
if (unlikely(PageMlocked(page)))
@@ -1323,7 +1356,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return;
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__dec_zone_page_state(page, NR_ANON_THPS);
if (TestClearPageDoubleMap(page)) {
/*
@@ -1356,11 +1389,8 @@ static void page_remove_anon_compound_rmap(struct page *page)
*/
void page_remove_rmap(struct page *page, bool compound)
{
if (!PageAnon(page)) {
VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
page_remove_file_rmap(page);
return;
}
if (!PageAnon(page))
return page_remove_file_rmap(page, compound);
if (compound)
return page_remove_anon_compound_rmap(page);
@@ -1436,8 +1466,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
*/
if (!(flags & TTU_IGNORE_MLOCK)) {
if (vma->vm_flags & VM_LOCKED) {
/* Holding pte lock, we do *not* need mmap_sem here */
mlock_vma_page(page);
/* PTE-mapped THP are never mlocked */
if (!PageTransCompound(page)) {
/*
* Holding pte lock, we do *not* need
* mmap_sem here
*/
mlock_vma_page(page);
}
ret = SWAP_MLOCK;
goto out_unmap;
}

Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání

Zobrazit soubor

@@ -1236,61 +1236,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
}
}
#ifdef CONFIG_SLAB_FREELIST_RANDOM
static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
size_t count)
{
size_t i;
unsigned int rand;
for (i = 0; i < count; i++)
list[i] = i;
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(state);
rand %= (i + 1);
swap(list[i], list[rand]);
}
}
/* Create a random sequence per cache */
static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
{
unsigned int seed, count = cachep->num;
struct rnd_state state;
if (count < 2)
return 0;
/* If it fails, we will just use the global lists */
cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
if (!cachep->random_seq)
return -ENOMEM;
/* Get best entropy at this stage */
get_random_bytes_arch(&seed, sizeof(seed));
prandom_seed_state(&state, seed);
freelist_randomize(&state, cachep->random_seq, count);
return 0;
}
/* Destroy the per-cache random freelist sequence */
static void cache_random_seq_destroy(struct kmem_cache *cachep)
{
kfree(cachep->random_seq);
cachep->random_seq = NULL;
}
#else
static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
{
return 0;
}
static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
@@ -2535,7 +2480,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
union freelist_init_state {
struct {
unsigned int pos;
freelist_idx_t *list;
unsigned int *list;
unsigned int count;
unsigned int rand;
};
@@ -2554,7 +2499,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
unsigned int rand;
/* Use best entropy available to define a random shift */
get_random_bytes_arch(&rand, sizeof(rand));
rand = get_random_int();
/* Use a random state if the pre-computed list is not available */
if (!cachep->random_seq) {
@@ -2576,13 +2521,20 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
return (state->list[state->pos++] + state->rand) % state->count;
}
/* Swap two freelist entries */
static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
{
swap(((freelist_idx_t *)page->freelist)[a],
((freelist_idx_t *)page->freelist)[b]);
}
/*
* Shuffle the freelist initialization state based on pre-computed lists.
* return true if the list was successfully shuffled, false otherwise.
*/
static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
{
unsigned int objfreelist = 0, i, count = cachep->num;
unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
bool precomputed;
@@ -2607,7 +2559,15 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
* Later use a pre-computed list for speed.
*/
if (!precomputed) {
freelist_randomize(&state.rnd_state, page->freelist, count);
for (i = 0; i < count; i++)
set_free_obj(page, i, i);
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(&state.rnd_state);
rand %= (i + 1);
swap_free_obj(page, i, rand);
}
} else {
for (i = 0; i < count; i++)
set_free_obj(page, i, next_random_slot(&state));
@@ -2726,8 +2686,11 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* critical path in kmem_cache_alloc().
*/
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
BUG();
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
flags &= ~GFP_SLAB_BUG_MASK;
pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
invalid_mask, &invalid_mask, flags, &flags);
dump_stack();
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
@@ -3489,8 +3452,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
n->free_objects -= cachep->num;
page = list_last_entry(&n->slabs_free, struct page, lru);
list_del(&page->lru);
list_add(&page->lru, list);
list_move(&page->lru, list);
}
}
@@ -3979,7 +3941,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
int shared = 0;
int batchcount = 0;
err = cache_random_seq_create(cachep, gfp);
err = cache_random_seq_create(cachep, cachep->num, gfp);
if (err)
goto end;

Zobrazit soubor

@@ -42,6 +42,7 @@ struct kmem_cache {
#include <linux/kmemcheck.h>
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/random.h>
/*
* State of the slab allocator.
@@ -253,8 +254,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
if (is_root_cache(s))
return 0;
ret = __memcg_kmem_charge_memcg(page, gfp, order,
s->memcg_params.memcg);
ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
if (ret)
return ret;
@@ -268,6 +268,9 @@ static __always_inline int memcg_charge_slab(struct page *page,
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
if (!memcg_kmem_enabled())
return;
memcg_kmem_update_page_stat(page,
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
@@ -390,7 +393,11 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
if (should_failslab(s, flags))
return NULL;
return memcg_kmem_get_cache(s, flags);
if (memcg_kmem_enabled() &&
((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
return memcg_kmem_get_cache(s);
return s;
}
static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
@@ -407,7 +414,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
s->flags, flags);
kasan_slab_alloc(s, object, flags);
}
memcg_kmem_put_cache(s);
if (memcg_kmem_enabled())
memcg_kmem_put_cache(s);
}
#ifndef CONFIG_SLOB
@@ -464,4 +473,17 @@ int memcg_slab_show(struct seq_file *m, void *p);
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
gfp_t gfp);
void cache_random_seq_destroy(struct kmem_cache *cachep);
#else
static inline int cache_random_seq_create(struct kmem_cache *cachep,
unsigned int count, gfp_t gfp)
{
return 0;
}
static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
#endif /* MM_SLAB_H */

Zobrazit soubor

@@ -1012,7 +1012,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
struct page *page;
flags |= __GFP_COMP;
page = alloc_kmem_pages(flags, order);
page = alloc_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
kasan_kmalloc_large(ret, size, flags);
@@ -1030,6 +1030,53 @@ void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
EXPORT_SYMBOL(kmalloc_order_trace);
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Randomize a generic freelist */
static void freelist_randomize(struct rnd_state *state, unsigned int *list,
size_t count)
{
size_t i;
unsigned int rand;
for (i = 0; i < count; i++)
list[i] = i;
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(state);
rand %= (i + 1);
swap(list[i], list[rand]);
}
}
/* Create a random sequence per cache */
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
gfp_t gfp)
{
struct rnd_state state;
if (count < 2 || cachep->random_seq)
return 0;
cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
if (!cachep->random_seq)
return -ENOMEM;
/* Get best entropy at this stage of boot */
prandom_seed_state(&state, get_random_long());
freelist_randomize(&state, cachep->random_seq, count);
return 0;
}
/* Destroy the per-cache random freelist sequence */
void cache_random_seq_destroy(struct kmem_cache *cachep)
{
kfree(cachep->random_seq);
cachep->random_seq = NULL;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
#ifdef CONFIG_SLABINFO
#ifdef CONFIG_SLAB

145
mm/slub.c
Zobrazit soubor

@@ -1405,6 +1405,109 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
return page;
}
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Pre-initialize the random sequence cache */
static int init_cache_random_seq(struct kmem_cache *s)
{
int err;
unsigned long i, count = oo_objects(s->oo);
err = cache_random_seq_create(s, count, GFP_KERNEL);
if (err) {
pr_err("SLUB: Unable to initialize free list for %s\n",
s->name);
return err;
}
/* Transform to an offset on the set of pages */
if (s->random_seq) {
for (i = 0; i < count; i++)
s->random_seq[i] *= s->size;
}
return 0;
}
/* Initialize each random sequence freelist per cache */
static void __init init_freelist_randomization(void)
{
struct kmem_cache *s;
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list)
init_cache_random_seq(s);
mutex_unlock(&slab_mutex);
}
/* Get the next entry on the pre-computed freelist randomized */
static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
{
unsigned int idx;
/*
* If the target page allocation failed, the number of objects on the
* page might be smaller than the usual size defined by the cache.
*/
do {
idx = s->random_seq[*pos];
*pos += 1;
if (*pos >= freelist_count)
*pos = 0;
} while (unlikely(idx >= page_limit));
return (char *)start + idx;
}
/* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
{
void *start;
void *cur;
void *next;
unsigned long idx, pos, page_limit, freelist_count;
if (page->objects < 2 || !s->random_seq)
return false;
freelist_count = oo_objects(s->oo);
pos = get_random_int() % freelist_count;
page_limit = page->objects * s->size;
start = fixup_red_left(s, page_address(page));
/* First entry is used as the base of the freelist */
cur = next_freelist_entry(s, page, &pos, start, page_limit,
freelist_count);
page->freelist = cur;
for (idx = 1; idx < page->objects; idx++) {
setup_object(s, page, cur);
next = next_freelist_entry(s, page, &pos, start, page_limit,
freelist_count);
set_freepointer(s, cur, next);
cur = next;
}
setup_object(s, page, cur);
set_freepointer(s, cur, NULL);
return true;
}
#else
static inline int init_cache_random_seq(struct kmem_cache *s)
{
return 0;
}
static inline void init_freelist_randomization(void) { }
static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
@@ -1412,6 +1515,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
gfp_t alloc_gfp;
void *start, *p;
int idx, order;
bool shuffle;
flags &= gfp_allowed_mask;
@@ -1473,15 +1577,19 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
kasan_poison_slab(page);
for_each_object_idx(p, idx, s, start, page->objects) {
setup_object(s, page, p);
if (likely(idx < page->objects))
set_freepointer(s, p, p + s->size);
else
set_freepointer(s, p, NULL);
shuffle = shuffle_freelist(s, page);
if (!shuffle) {
for_each_object_idx(p, idx, s, start, page->objects) {
setup_object(s, page, p);
if (likely(idx < page->objects))
set_freepointer(s, p, p + s->size);
else
set_freepointer(s, p, NULL);
}
page->freelist = fixup_red_left(s, start);
}
page->freelist = fixup_red_left(s, start);
page->inuse = page->objects;
page->frozen = 1;
@@ -1504,8 +1612,10 @@ out:
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
BUG();
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
flags &= ~GFP_SLAB_BUG_MASK;
pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
invalid_mask, &invalid_mask, flags, &flags);
}
return allocate_slab(s,
@@ -2867,7 +2977,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(object);
__free_kmem_pages(page, compound_order(page));
__free_pages(page, compound_order(page));
p[size] = NULL; /* mark object processed */
return size;
}
@@ -3207,6 +3317,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
void __kmem_cache_release(struct kmem_cache *s)
{
cache_random_seq_destroy(s);
free_percpu(s->cpu_slab);
free_kmem_cache_nodes(s);
}
@@ -3431,6 +3542,13 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
/* Initialize the pre-computed randomized freelist if slab is up */
if (slab_state >= UP) {
if (init_cache_random_seq(s))
goto error;
}
if (!init_kmem_cache_nodes(s))
goto error;
@@ -3575,7 +3693,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
void *ptr = NULL;
flags |= __GFP_COMP | __GFP_NOTRACK;
page = alloc_kmem_pages_node(node, flags, get_order(size));
page = alloc_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3656,7 +3774,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
__free_kmem_pages(page, compound_order(page));
__free_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
@@ -3947,6 +4065,9 @@ void __init kmem_cache_init(void)
setup_kmalloc_cache_index_table();
create_kmalloc_caches(0);
/* Setup random freelists for each cache */
init_freelist_randomization();
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
#endif

Zobrazit soubor

@@ -292,6 +292,7 @@ static bool need_activate_page_drain(int cpu)
void activate_page(struct page *page)
{
page = compound_head(page);
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
@@ -316,6 +317,7 @@ void activate_page(struct page *page)
{
struct zone *zone = page_zone(page);
page = compound_head(page);
spin_lock_irq(&zone->lru_lock);
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
spin_unlock_irq(&zone->lru_lock);

Zobrazit soubor

@@ -2493,7 +2493,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
goto bad_swap;
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (frontswap_enabled)
if (IS_ENABLED(CONFIG_FRONTSWAP))
frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {

Zobrazit soubor

@@ -155,10 +155,14 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
int truncate_inode_page(struct address_space *mapping, struct page *page)
{
loff_t holelen;
VM_BUG_ON_PAGE(PageTail(page), page);
holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
if (page_mapped(page)) {
unmap_mapping_range(mapping,
(loff_t)page->index << PAGE_SHIFT,
PAGE_SIZE, 0);
holelen, 0);
}
return truncate_complete_page(mapping, page);
}
@@ -279,7 +283,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
if (!trylock_page(page))
continue;
WARN_ON(page->index != index);
WARN_ON(page_to_pgoff(page) != index);
if (PageWriteback(page)) {
unlock_page(page);
continue;
@@ -367,7 +371,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
lock_page(page);
WARN_ON(page->index != index);
WARN_ON(page_to_pgoff(page) != index);
wait_on_page_writeback(page);
truncate_inode_page(mapping, page);
unlock_page(page);
@@ -487,7 +491,21 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
if (!trylock_page(page))
continue;
WARN_ON(page->index != index);
WARN_ON(page_to_pgoff(page) != index);
/* Middle of THP: skip */
if (PageTransTail(page)) {
unlock_page(page);
continue;
} else if (PageTransHuge(page)) {
index += HPAGE_PMD_NR - 1;
i += HPAGE_PMD_NR - 1;
/* 'end' is in the middle of THP */
if (index == round_down(end, HPAGE_PMD_NR))
continue;
}
ret = invalidate_inode_page(page);
unlock_page(page);
/*
@@ -594,7 +612,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
}
lock_page(page);
WARN_ON(page->index != index);
WARN_ON(page_to_pgoff(page) != index);
if (page->mapping != mapping) {
unlock_page(page);
continue;

Zobrazit soubor

@@ -399,10 +399,12 @@ struct address_space *page_mapping(struct page *page)
}
mapping = page->mapping;
if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
if ((unsigned long)mapping & PAGE_MAPPING_ANON)
return NULL;
return mapping;
return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
}
EXPORT_SYMBOL(page_mapping);
/* Slow path of page_mapcount() for compound pages */
int __page_mapcount(struct page *page)
@@ -410,6 +412,12 @@ int __page_mapcount(struct page *page)
int ret;
ret = atomic_read(&page->_mapcount) + 1;
/*
* For file THP page->_mapcount contains total number of mapping
* of the page: no need to look into compound_mapcount.
*/
if (!PageAnon(page) && !PageHuge(page))
return ret;
page = compound_head(page);
ret += atomic_read(compound_mapcount_ptr(page)) + 1;
if (PageDoubleMap(page))

Zobrazit soubor

@@ -1501,7 +1501,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
struct page *page = area->pages[i];
BUG_ON(!page);
__free_kmem_pages(page, 0);
__free_pages(page, 0);
}
kvfree(area->pages);
@@ -1629,9 +1629,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page *page;
if (node == NUMA_NO_NODE)
page = alloc_kmem_pages(alloc_mask, order);
page = alloc_pages(alloc_mask, order);
else
page = alloc_kmem_pages_node(node, alloc_mask, order);
page = alloc_pages_node(node, alloc_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */

Zobrazit soubor

@@ -1055,8 +1055,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
/* Adding to swap updated mapping */
mapping = page_mapping(page);
} else if (unlikely(PageTransHuge(page))) {
/* Split file THP */
if (split_huge_page_to_list(page, page_list))
goto keep_locked;
}
VM_BUG_ON_PAGE(PageTransHuge(page), page);
/*
* The page is mapped into the page tables of one or more
* processes. Try to unmap it here.
@@ -1254,7 +1260,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
list_for_each_entry_safe(page, next, page_list, lru) {
if (page_is_file_cache(page) && !PageDirty(page) &&
!isolated_balloon_page(page)) {
!__PageMovable(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}

Zobrazit soubor

@@ -718,7 +718,9 @@ const char * const vmstat_text[] = {
"nr_dirtied",
"nr_written",
"nr_pages_scanned",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
#endif
#ifdef CONFIG_NUMA
"numa_hit",
"numa_miss",
@@ -731,6 +733,8 @@ const char * const vmstat_text[] = {
"workingset_activate",
"workingset_nodereclaim",
"nr_anon_transparent_hugepages",
"nr_shmem_hugepages",
"nr_shmem_pmdmapped",
"nr_free_cma",
/* enum writeback_stat_item counters */
@@ -815,6 +819,8 @@ const char * const vmstat_text[] = {
"thp_fault_fallback",
"thp_collapse_alloc",
"thp_collapse_alloc_failed",
"thp_file_alloc",
"thp_file_mapped",
"thp_split_page",
"thp_split_page_failed",
"thp_deferred_split_page",

Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání