Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - a few misc bits - ocfs2 - most(?) of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (125 commits) thp: fix comments of __pmd_trans_huge_lock() cgroup: remove unnecessary 0 check from css_from_id() cgroup: fix idr leak for the first cgroup root mm: memcontrol: fix documentation for compound parameter mm: memcontrol: remove BUG_ON in uncharge_list mm: fix build warnings in <linux/compaction.h> mm, thp: convert from optimistic swapin collapsing to conservative mm, thp: fix comment inconsistency for swapin readahead functions thp: update Documentation/{vm/transhuge,filesystems/proc}.txt shmem: split huge pages beyond i_size under memory pressure thp: introduce CONFIG_TRANSPARENT_HUGE_PAGECACHE khugepaged: add support of collapse for tmpfs/shmem pages shmem: make shmem_inode_info::lock irq-safe khugepaged: move up_read(mmap_sem) out of khugepaged_alloc_page() thp: extract khugepaged from mm/huge_memory.c shmem, thp: respect MADV_{NO,}HUGEPAGE for file mappings shmem: add huge pages support shmem: get_unmapped_area align huge page shmem: prepare huge= mount option and sysfs knob mm, rmap: account shmem thp pages ...
Tento commit je obsažen v:
@@ -439,6 +439,14 @@ choice
|
||||
benefit.
|
||||
endchoice
|
||||
|
||||
#
|
||||
# We don't deposit page tables on file THP mapping,
|
||||
# but Power makes use of them to address MMU quirk.
|
||||
#
|
||||
config TRANSPARENT_HUGE_PAGECACHE
|
||||
def_bool y
|
||||
depends on TRANSPARENT_HUGEPAGE && !PPC
|
||||
|
||||
#
|
||||
# UP and nommu archs use km based percpu allocator
|
||||
#
|
||||
|
@@ -74,7 +74,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
||||
obj-$(CONFIG_MEMTEST) += memtest.o
|
||||
obj-$(CONFIG_MIGRATION) += migrate.o
|
||||
obj-$(CONFIG_QUICKLIST) += quicklist.o
|
||||
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
|
||||
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
|
||||
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
|
||||
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
|
||||
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
|
||||
|
@@ -70,7 +70,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
|
||||
*/
|
||||
if (trylock_page(page)) {
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
if (!PagePrivate(page)) {
|
||||
if (PageIsolated(page)) {
|
||||
/* raced with isolation */
|
||||
unlock_page(page);
|
||||
continue;
|
||||
@@ -106,110 +106,50 @@ EXPORT_SYMBOL_GPL(balloon_page_dequeue);
|
||||
|
||||
#ifdef CONFIG_BALLOON_COMPACTION
|
||||
|
||||
static inline void __isolate_balloon_page(struct page *page)
|
||||
bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
|
||||
|
||||
{
|
||||
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
||||
ClearPagePrivate(page);
|
||||
list_del(&page->lru);
|
||||
b_dev_info->isolated_pages++;
|
||||
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void __putback_balloon_page(struct page *page)
|
||||
void balloon_page_putback(struct page *page)
|
||||
{
|
||||
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
||||
SetPagePrivate(page);
|
||||
list_add(&page->lru, &b_dev_info->pages);
|
||||
b_dev_info->isolated_pages--;
|
||||
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
||||
}
|
||||
|
||||
/* __isolate_lru_page() counterpart for a ballooned page */
|
||||
bool balloon_page_isolate(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Avoid burning cycles with pages that are yet under __free_pages(),
|
||||
* or just got freed under us.
|
||||
*
|
||||
* In case we 'win' a race for a balloon page being freed under us and
|
||||
* raise its refcount preventing __free_pages() from doing its job
|
||||
* the put_page() at the end of this block will take care of
|
||||
* release this page, thus avoiding a nasty leakage.
|
||||
*/
|
||||
if (likely(get_page_unless_zero(page))) {
|
||||
/*
|
||||
* As balloon pages are not isolated from LRU lists, concurrent
|
||||
* compaction threads can race against page migration functions
|
||||
* as well as race against the balloon driver releasing a page.
|
||||
*
|
||||
* In order to avoid having an already isolated balloon page
|
||||
* being (wrongly) re-isolated while it is under migration,
|
||||
* or to avoid attempting to isolate pages being released by
|
||||
* the balloon driver, lets be sure we have the page lock
|
||||
* before proceeding with the balloon page isolation steps.
|
||||
*/
|
||||
if (likely(trylock_page(page))) {
|
||||
/*
|
||||
* A ballooned page, by default, has PagePrivate set.
|
||||
* Prevent concurrent compaction threads from isolating
|
||||
* an already isolated balloon page by clearing it.
|
||||
*/
|
||||
if (balloon_page_movable(page)) {
|
||||
__isolate_balloon_page(page);
|
||||
unlock_page(page);
|
||||
return true;
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
put_page(page);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* putback_lru_page() counterpart for a ballooned page */
|
||||
void balloon_page_putback(struct page *page)
|
||||
{
|
||||
/*
|
||||
* 'lock_page()' stabilizes the page and prevents races against
|
||||
* concurrent isolation threads attempting to re-isolate it.
|
||||
*/
|
||||
lock_page(page);
|
||||
|
||||
if (__is_movable_balloon_page(page)) {
|
||||
__putback_balloon_page(page);
|
||||
/* drop the extra ref count taken for page isolation */
|
||||
put_page(page);
|
||||
} else {
|
||||
WARN_ON(1);
|
||||
dump_page(page, "not movable balloon page");
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
/* move_to_new_page() counterpart for a ballooned page */
|
||||
int balloon_page_migrate(struct page *newpage,
|
||||
struct page *page, enum migrate_mode mode)
|
||||
int balloon_page_migrate(struct address_space *mapping,
|
||||
struct page *newpage, struct page *page,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
struct balloon_dev_info *balloon = balloon_page_device(page);
|
||||
int rc = -EAGAIN;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
|
||||
|
||||
if (WARN_ON(!__is_movable_balloon_page(page))) {
|
||||
dump_page(page, "not movable balloon page");
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (balloon && balloon->migratepage)
|
||||
rc = balloon->migratepage(balloon, newpage, page, mode);
|
||||
|
||||
return rc;
|
||||
return balloon->migratepage(balloon, newpage, page, mode);
|
||||
}
|
||||
|
||||
const struct address_space_operations balloon_aops = {
|
||||
.migratepage = balloon_page_migrate,
|
||||
.isolate_page = balloon_page_isolate,
|
||||
.putback_page = balloon_page_putback,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(balloon_aops);
|
||||
|
||||
#endif /* CONFIG_BALLOON_COMPACTION */
|
||||
|
123
mm/compaction.c
123
mm/compaction.c
@@ -15,11 +15,11 @@
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/balloon_compaction.h>
|
||||
#include <linux/page-isolation.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include "internal.h"
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
@@ -65,13 +65,27 @@ static unsigned long release_freepages(struct list_head *freelist)
|
||||
|
||||
static void map_pages(struct list_head *list)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned int i, order, nr_pages;
|
||||
struct page *page, *next;
|
||||
LIST_HEAD(tmp_list);
|
||||
|
||||
list_for_each_entry(page, list, lru) {
|
||||
arch_alloc_page(page, 0);
|
||||
kernel_map_pages(page, 1, 1);
|
||||
kasan_alloc_pages(page, 0);
|
||||
list_for_each_entry_safe(page, next, list, lru) {
|
||||
list_del(&page->lru);
|
||||
|
||||
order = page_private(page);
|
||||
nr_pages = 1 << order;
|
||||
|
||||
post_alloc_hook(page, order, __GFP_MOVABLE);
|
||||
if (order)
|
||||
split_page(page, order);
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
list_add(&page->lru, &tmp_list);
|
||||
page++;
|
||||
}
|
||||
}
|
||||
|
||||
list_splice(&tmp_list, list);
|
||||
}
|
||||
|
||||
static inline bool migrate_async_suitable(int migratetype)
|
||||
@@ -81,6 +95,44 @@ static inline bool migrate_async_suitable(int migratetype)
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
|
||||
int PageMovable(struct page *page)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
if (!__PageMovable(page))
|
||||
return 0;
|
||||
|
||||
mapping = page_mapping(page);
|
||||
if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(PageMovable);
|
||||
|
||||
void __SetPageMovable(struct page *page, struct address_space *mapping)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
|
||||
page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(__SetPageMovable);
|
||||
|
||||
void __ClearPageMovable(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(!PageMovable(page), page);
|
||||
/*
|
||||
* Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
|
||||
* flag so that VM can catch up released page by driver after isolation.
|
||||
* With it, VM migration doesn't try to put it back.
|
||||
*/
|
||||
page->mapping = (void *)((unsigned long)page->mapping &
|
||||
PAGE_MAPPING_MOVABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(__ClearPageMovable);
|
||||
|
||||
/* Do not skip compaction more than 64 times */
|
||||
#define COMPACT_MAX_DEFER_SHIFT 6
|
||||
|
||||
@@ -368,12 +420,13 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
|
||||
unsigned long flags = 0;
|
||||
bool locked = false;
|
||||
unsigned long blockpfn = *start_pfn;
|
||||
unsigned int order;
|
||||
|
||||
cursor = pfn_to_page(blockpfn);
|
||||
|
||||
/* Isolate free pages. */
|
||||
for (; blockpfn < end_pfn; blockpfn++, cursor++) {
|
||||
int isolated, i;
|
||||
int isolated;
|
||||
struct page *page = cursor;
|
||||
|
||||
/*
|
||||
@@ -439,17 +492,17 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
|
||||
goto isolate_fail;
|
||||
}
|
||||
|
||||
/* Found a free page, break it into order-0 pages */
|
||||
isolated = split_free_page(page);
|
||||
/* Found a free page, will break it into order-0 pages */
|
||||
order = page_order(page);
|
||||
isolated = __isolate_free_page(page, order);
|
||||
if (!isolated)
|
||||
break;
|
||||
set_page_private(page, order);
|
||||
|
||||
total_isolated += isolated;
|
||||
cc->nr_freepages += isolated;
|
||||
for (i = 0; i < isolated; i++) {
|
||||
list_add(&page->lru, freelist);
|
||||
page++;
|
||||
}
|
||||
list_add_tail(&page->lru, freelist);
|
||||
|
||||
if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
|
||||
blockpfn += isolated;
|
||||
break;
|
||||
@@ -568,7 +621,7 @@ isolate_freepages_range(struct compact_control *cc,
|
||||
*/
|
||||
}
|
||||
|
||||
/* split_free_page does not map the pages */
|
||||
/* __isolate_free_page() does not map the pages */
|
||||
map_pages(&freelist);
|
||||
|
||||
if (pfn < end_pfn) {
|
||||
@@ -670,7 +723,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
|
||||
/* Time to isolate some pages for migration */
|
||||
for (; low_pfn < end_pfn; low_pfn++) {
|
||||
bool is_lru;
|
||||
|
||||
if (skip_on_failure && low_pfn >= next_skip_pfn) {
|
||||
/*
|
||||
@@ -732,21 +784,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check may be lockless but that's ok as we recheck later.
|
||||
* It's possible to migrate LRU pages and balloon pages
|
||||
* Skip any other type of page
|
||||
*/
|
||||
is_lru = PageLRU(page);
|
||||
if (!is_lru) {
|
||||
if (unlikely(balloon_page_movable(page))) {
|
||||
if (balloon_page_isolate(page)) {
|
||||
/* Successfully isolated */
|
||||
goto isolate_success;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Regardless of being on LRU, compound pages such as THP and
|
||||
* hugetlbfs are not to be compacted. We can potentially save
|
||||
@@ -763,8 +800,30 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
goto isolate_fail;
|
||||
}
|
||||
|
||||
if (!is_lru)
|
||||
/*
|
||||
* Check may be lockless but that's ok as we recheck later.
|
||||
* It's possible to migrate LRU and non-lru movable pages.
|
||||
* Skip any other type of page
|
||||
*/
|
||||
if (!PageLRU(page)) {
|
||||
/*
|
||||
* __PageMovable can return false positive so we need
|
||||
* to verify it under page_lock.
|
||||
*/
|
||||
if (unlikely(__PageMovable(page)) &&
|
||||
!PageIsolated(page)) {
|
||||
if (locked) {
|
||||
spin_unlock_irqrestore(&zone->lru_lock,
|
||||
flags);
|
||||
locked = false;
|
||||
}
|
||||
|
||||
if (isolate_movable_page(page, isolate_mode))
|
||||
goto isolate_success;
|
||||
}
|
||||
|
||||
goto isolate_fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Migration will fail if an anonymous page is pinned in memory,
|
||||
@@ -1059,7 +1118,7 @@ static void isolate_freepages(struct compact_control *cc)
|
||||
}
|
||||
}
|
||||
|
||||
/* split_free_page does not map the pages */
|
||||
/* __isolate_free_page() does not map the pages */
|
||||
map_pages(freelist);
|
||||
|
||||
/*
|
||||
|
217
mm/filemap.c
217
mm/filemap.c
@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
|
||||
struct page *page, void *shadow)
|
||||
{
|
||||
struct radix_tree_node *node;
|
||||
int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
|
||||
node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index,
|
||||
shadow);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
VM_BUG_ON_PAGE(nr != 1 && shadow, page);
|
||||
|
||||
if (shadow) {
|
||||
mapping->nrexceptional++;
|
||||
mapping->nrexceptional += nr;
|
||||
/*
|
||||
* Make sure the nrexceptional update is committed before
|
||||
* the nrpages update so that final truncate racing
|
||||
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping,
|
||||
*/
|
||||
smp_wmb();
|
||||
}
|
||||
mapping->nrpages--;
|
||||
mapping->nrpages -= nr;
|
||||
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
workingset_node_pages_dec(node);
|
||||
if (shadow)
|
||||
workingset_node_shadows_inc(node);
|
||||
else
|
||||
if (__radix_tree_delete_node(&mapping->page_tree, node))
|
||||
for (i = 0; i < nr; i++) {
|
||||
node = radix_tree_replace_clear_tags(&mapping->page_tree,
|
||||
page->index + i, shadow);
|
||||
if (!node) {
|
||||
VM_BUG_ON_PAGE(nr != 1, page);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Track node that only contains shadow entries. DAX mappings contain
|
||||
* no shadow entries and may contain other exceptional entries so skip
|
||||
* those.
|
||||
*
|
||||
* Avoid acquiring the list_lru lock if already tracked. The
|
||||
* list_empty() test is safe as node->private_list is
|
||||
* protected by mapping->tree_lock.
|
||||
*/
|
||||
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
|
||||
list_empty(&node->private_list)) {
|
||||
node->private_data = mapping;
|
||||
list_lru_add(&workingset_shadow_nodes, &node->private_list);
|
||||
workingset_node_pages_dec(node);
|
||||
if (shadow)
|
||||
workingset_node_shadows_inc(node);
|
||||
else
|
||||
if (__radix_tree_delete_node(&mapping->page_tree, node))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Track node that only contains shadow entries. DAX mappings
|
||||
* contain no shadow entries and may contain other exceptional
|
||||
* entries so skip those.
|
||||
*
|
||||
* Avoid acquiring the list_lru lock if already tracked.
|
||||
* The list_empty() test is safe as node->private_list is
|
||||
* protected by mapping->tree_lock.
|
||||
*/
|
||||
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
|
||||
list_empty(&node->private_list)) {
|
||||
node->private_data = mapping;
|
||||
list_lru_add(&workingset_shadow_nodes,
|
||||
&node->private_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
|
||||
void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
int nr = hpage_nr_pages(page);
|
||||
|
||||
trace_mm_filemap_delete_from_page_cache(page);
|
||||
/*
|
||||
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
else
|
||||
cleancache_invalidate_page(mapping, page);
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
VM_BUG_ON_PAGE(page_mapped(page), page);
|
||||
if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
|
||||
int mapcount;
|
||||
@@ -209,9 +218,14 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
|
||||
/* hugetlb pages do not participate in page cache accounting. */
|
||||
if (!PageHuge(page))
|
||||
__dec_zone_page_state(page, NR_FILE_PAGES);
|
||||
if (PageSwapBacked(page))
|
||||
__dec_zone_page_state(page, NR_SHMEM);
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
|
||||
if (PageSwapBacked(page)) {
|
||||
__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
|
||||
if (PageTransHuge(page))
|
||||
__dec_zone_page_state(page, NR_SHMEM_THPS);
|
||||
} else {
|
||||
VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point page must be either written or cleaned by truncate.
|
||||
@@ -235,9 +249,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
*/
|
||||
void delete_from_page_cache(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
unsigned long flags;
|
||||
|
||||
void (*freepage)(struct page *);
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
@@ -250,7 +263,13 @@ void delete_from_page_cache(struct page *page)
|
||||
|
||||
if (freepage)
|
||||
freepage(page);
|
||||
put_page(page);
|
||||
|
||||
if (PageTransHuge(page) && !PageHuge(page)) {
|
||||
page_ref_sub(page, HPAGE_PMD_NR);
|
||||
VM_BUG_ON_PAGE(page_count(page) <= 0, page);
|
||||
} else {
|
||||
put_page(page);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(delete_from_page_cache);
|
||||
|
||||
@@ -1053,7 +1072,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
|
||||
{
|
||||
void **pagep;
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
|
||||
rcu_read_lock();
|
||||
repeat:
|
||||
@@ -1073,16 +1092,24 @@ repeat:
|
||||
*/
|
||||
goto out;
|
||||
}
|
||||
if (!page_cache_get_speculative(page))
|
||||
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/*
|
||||
* Has the page moved?
|
||||
* This is part of the lockless pagecache protocol. See
|
||||
* include/linux/pagemap.h for details.
|
||||
*/
|
||||
if (unlikely(page != *pagep)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
@@ -1118,12 +1145,12 @@ repeat:
|
||||
if (page && !radix_tree_exception(page)) {
|
||||
lock_page(page);
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
if (unlikely(page_mapping(page) != mapping)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto repeat;
|
||||
}
|
||||
VM_BUG_ON_PAGE(page->index != offset, page);
|
||||
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
|
||||
}
|
||||
return page;
|
||||
}
|
||||
@@ -1255,7 +1282,7 @@ unsigned find_get_entries(struct address_space *mapping,
|
||||
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
if (unlikely(!page))
|
||||
@@ -1272,12 +1299,20 @@ repeat:
|
||||
*/
|
||||
goto export;
|
||||
}
|
||||
if (!page_cache_get_speculative(page))
|
||||
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
export:
|
||||
@@ -1318,7 +1353,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
|
||||
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
if (unlikely(!page))
|
||||
@@ -1337,12 +1372,19 @@ repeat:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@@ -1379,7 +1421,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
|
||||
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
/* The hole, there no reason to continue */
|
||||
@@ -1399,12 +1441,19 @@ repeat:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@@ -1413,7 +1462,7 @@ repeat:
|
||||
* otherwise we can get both false positives and false
|
||||
* negatives, which is just confusing to the caller.
|
||||
*/
|
||||
if (page->mapping == NULL || page->index != iter.index) {
|
||||
if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
@@ -1451,7 +1500,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_tagged(slot, &mapping->page_tree,
|
||||
&iter, *index, tag) {
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
if (unlikely(!page))
|
||||
@@ -1476,12 +1525,19 @@ repeat:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@@ -1525,7 +1581,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_tagged(slot, &mapping->page_tree,
|
||||
&iter, start, tag) {
|
||||
struct page *page;
|
||||
struct page *head, *page;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
if (unlikely(!page))
|
||||
@@ -1543,12 +1599,20 @@ repeat:
|
||||
*/
|
||||
goto export;
|
||||
}
|
||||
if (!page_cache_get_speculative(page))
|
||||
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
export:
|
||||
@@ -2128,21 +2192,21 @@ page_not_uptodate:
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fault);
|
||||
|
||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
void filemap_map_pages(struct fault_env *fe,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
{
|
||||
struct radix_tree_iter iter;
|
||||
void **slot;
|
||||
struct file *file = vma->vm_file;
|
||||
struct file *file = fe->vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
pgoff_t last_pgoff = start_pgoff;
|
||||
loff_t size;
|
||||
struct page *page;
|
||||
unsigned long address = (unsigned long) vmf->virtual_address;
|
||||
unsigned long addr;
|
||||
pte_t *pte;
|
||||
struct page *head, *page;
|
||||
|
||||
rcu_read_lock();
|
||||
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
|
||||
if (iter.index > vmf->max_pgoff)
|
||||
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
|
||||
start_pgoff) {
|
||||
if (iter.index > end_pgoff)
|
||||
break;
|
||||
repeat:
|
||||
page = radix_tree_deref_slot(slot);
|
||||
@@ -2156,12 +2220,19 @@ repeat:
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (!page_cache_get_speculative(page))
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
goto repeat;
|
||||
|
||||
/* The page was split under us? */
|
||||
if (compound_head(page) != head) {
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Has the page moved? */
|
||||
if (unlikely(page != *slot)) {
|
||||
put_page(page);
|
||||
put_page(head);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@@ -2179,14 +2250,15 @@ repeat:
|
||||
if (page->index >= size >> PAGE_SHIFT)
|
||||
goto unlock;
|
||||
|
||||
pte = vmf->pte + page->index - vmf->pgoff;
|
||||
if (!pte_none(*pte))
|
||||
goto unlock;
|
||||
|
||||
if (file->f_ra.mmap_miss > 0)
|
||||
file->f_ra.mmap_miss--;
|
||||
addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
|
||||
do_set_pte(vma, addr, page, pte, false, false);
|
||||
|
||||
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
|
||||
if (fe->pte)
|
||||
fe->pte += iter.index - last_pgoff;
|
||||
last_pgoff = iter.index;
|
||||
if (alloc_set_pte(fe, NULL, page))
|
||||
goto unlock;
|
||||
unlock_page(page);
|
||||
goto next;
|
||||
unlock:
|
||||
@@ -2194,7 +2266,10 @@ unlock:
|
||||
skip:
|
||||
put_page(page);
|
||||
next:
|
||||
if (iter.index == vmf->max_pgoff)
|
||||
/* Huge page is mapped? No need to proceed. */
|
||||
if (pmd_trans_huge(*fe->pmd))
|
||||
break;
|
||||
if (iter.index == end_pgoff)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@@ -20,6 +20,8 @@
|
||||
#include <linux/frontswap.h>
|
||||
#include <linux/swapfile.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key);
|
||||
|
||||
/*
|
||||
* frontswap_ops are added by frontswap_register_ops, and provide the
|
||||
* frontswap "backend" implementation functions. Multiple implementations
|
||||
@@ -139,6 +141,8 @@ void frontswap_register_ops(struct frontswap_ops *ops)
|
||||
ops->next = frontswap_ops;
|
||||
} while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
|
||||
|
||||
static_branch_inc(&frontswap_enabled_key);
|
||||
|
||||
spin_lock(&swap_lock);
|
||||
plist_for_each_entry(si, &swap_active_head, list) {
|
||||
if (si->frontswap_map)
|
||||
@@ -189,7 +193,7 @@ void __frontswap_init(unsigned type, unsigned long *map)
|
||||
struct swap_info_struct *sis = swap_info[type];
|
||||
struct frontswap_ops *ops;
|
||||
|
||||
BUG_ON(sis == NULL);
|
||||
VM_BUG_ON(sis == NULL);
|
||||
|
||||
/*
|
||||
* p->frontswap is a bitmap that we MUST have to figure out which page
|
||||
@@ -248,15 +252,9 @@ int __frontswap_store(struct page *page)
|
||||
pgoff_t offset = swp_offset(entry);
|
||||
struct frontswap_ops *ops;
|
||||
|
||||
/*
|
||||
* Return if no backend registed.
|
||||
* Don't need to inc frontswap_failed_stores here.
|
||||
*/
|
||||
if (!frontswap_ops)
|
||||
return -1;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(sis == NULL);
|
||||
VM_BUG_ON(!frontswap_ops);
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(sis == NULL);
|
||||
|
||||
/*
|
||||
* If a dup, we must remove the old page first; we can't leave the
|
||||
@@ -303,11 +301,10 @@ int __frontswap_load(struct page *page)
|
||||
pgoff_t offset = swp_offset(entry);
|
||||
struct frontswap_ops *ops;
|
||||
|
||||
if (!frontswap_ops)
|
||||
return -1;
|
||||
VM_BUG_ON(!frontswap_ops);
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(sis == NULL);
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(sis == NULL);
|
||||
if (!__frontswap_test(sis, offset))
|
||||
return -1;
|
||||
|
||||
@@ -337,10 +334,9 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
|
||||
struct swap_info_struct *sis = swap_info[type];
|
||||
struct frontswap_ops *ops;
|
||||
|
||||
if (!frontswap_ops)
|
||||
return;
|
||||
VM_BUG_ON(!frontswap_ops);
|
||||
VM_BUG_ON(sis == NULL);
|
||||
|
||||
BUG_ON(sis == NULL);
|
||||
if (!__frontswap_test(sis, offset))
|
||||
return;
|
||||
|
||||
@@ -360,10 +356,9 @@ void __frontswap_invalidate_area(unsigned type)
|
||||
struct swap_info_struct *sis = swap_info[type];
|
||||
struct frontswap_ops *ops;
|
||||
|
||||
if (!frontswap_ops)
|
||||
return;
|
||||
VM_BUG_ON(!frontswap_ops);
|
||||
VM_BUG_ON(sis == NULL);
|
||||
|
||||
BUG_ON(sis == NULL);
|
||||
if (sis->frontswap_map == NULL)
|
||||
return;
|
||||
|
||||
|
9
mm/gup.c
9
mm/gup.c
@@ -279,6 +279,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
spin_unlock(ptl);
|
||||
ret = 0;
|
||||
split_huge_pmd(vma, pmd, address);
|
||||
if (pmd_trans_unstable(pmd))
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
get_page(page);
|
||||
spin_unlock(ptl);
|
||||
@@ -286,6 +288,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
ret = split_huge_page(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
if (pmd_none(*pmd))
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
|
||||
return ret ? ERR_PTR(ret) :
|
||||
@@ -350,7 +354,6 @@ unmap:
|
||||
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int *flags, int *nonblocking)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned int fault_flags = 0;
|
||||
int ret;
|
||||
|
||||
@@ -375,7 +378,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||
fault_flags |= FAULT_FLAG_TRIED;
|
||||
}
|
||||
|
||||
ret = handle_mm_fault(mm, vma, address, fault_flags);
|
||||
ret = handle_mm_fault(vma, address, fault_flags);
|
||||
if (ret & VM_FAULT_ERROR) {
|
||||
if (ret & VM_FAULT_OOM)
|
||||
return -ENOMEM;
|
||||
@@ -690,7 +693,7 @@ retry:
|
||||
if (!vma_permits_fault(vma, fault_flags))
|
||||
return -EFAULT;
|
||||
|
||||
ret = handle_mm_fault(mm, vma, address, fault_flags);
|
||||
ret = handle_mm_fault(vma, address, fault_flags);
|
||||
major |= ret & VM_FAULT_MAJOR;
|
||||
if (ret & VM_FAULT_ERROR) {
|
||||
if (ret & VM_FAULT_OOM)
|
||||
|
1909
mm/huge_memory.c
1909
mm/huge_memory.c
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
54
mm/hugetlb.c
54
mm/hugetlb.c
@@ -3179,7 +3179,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
struct page *ref_page)
|
||||
{
|
||||
int force_flush = 0;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long address;
|
||||
pte_t *ptep;
|
||||
@@ -3198,19 +3197,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
tlb_start_vma(tlb, vma);
|
||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
||||
address = start;
|
||||
again:
|
||||
for (; address < end; address += sz) {
|
||||
ptep = huge_pte_offset(mm, address);
|
||||
if (!ptep)
|
||||
continue;
|
||||
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
if (huge_pmd_unshare(mm, &address, ptep))
|
||||
goto unlock;
|
||||
if (huge_pmd_unshare(mm, &address, ptep)) {
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
}
|
||||
|
||||
pte = huge_ptep_get(ptep);
|
||||
if (huge_pte_none(pte))
|
||||
goto unlock;
|
||||
if (huge_pte_none(pte)) {
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Migrating hugepage or HWPoisoned hugepage is already
|
||||
@@ -3218,7 +3220,8 @@ again:
|
||||
*/
|
||||
if (unlikely(!pte_present(pte))) {
|
||||
huge_pte_clear(mm, address, ptep);
|
||||
goto unlock;
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
}
|
||||
|
||||
page = pte_page(pte);
|
||||
@@ -3228,9 +3231,10 @@ again:
|
||||
* are about to unmap is the actual page of interest.
|
||||
*/
|
||||
if (ref_page) {
|
||||
if (page != ref_page)
|
||||
goto unlock;
|
||||
|
||||
if (page != ref_page) {
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Mark the VMA as having unmapped its page so that
|
||||
* future faults in this VMA will fail rather than
|
||||
@@ -3246,30 +3250,14 @@ again:
|
||||
|
||||
hugetlb_count_sub(pages_per_huge_page(h), mm);
|
||||
page_remove_rmap(page, true);
|
||||
force_flush = !__tlb_remove_page(tlb, page);
|
||||
if (force_flush) {
|
||||
address += sz;
|
||||
spin_unlock(ptl);
|
||||
break;
|
||||
}
|
||||
/* Bail out after unmapping reference page if supplied */
|
||||
if (ref_page) {
|
||||
spin_unlock(ptl);
|
||||
break;
|
||||
}
|
||||
unlock:
|
||||
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
/*
|
||||
* mmu_gather ran out of room to batch pages, we break out of
|
||||
* the PTE lock to avoid doing the potential expensive TLB invalidate
|
||||
* and page-free while holding it.
|
||||
*/
|
||||
if (force_flush) {
|
||||
force_flush = 0;
|
||||
tlb_flush_mmu(tlb);
|
||||
if (address < end && !ref_page)
|
||||
goto again;
|
||||
tlb_remove_page_size(tlb, page, huge_page_size(h));
|
||||
/*
|
||||
* Bail out after unmapping reference page if supplied
|
||||
*/
|
||||
if (ref_page)
|
||||
break;
|
||||
}
|
||||
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
|
||||
tlb_end_vma(tlb, vma);
|
||||
|
@@ -36,6 +36,8 @@
|
||||
/* Do not use these with a slab allocator */
|
||||
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
|
||||
|
||||
int do_swap_page(struct fault_env *fe, pte_t orig_pte);
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||
unsigned long floor, unsigned long ceiling);
|
||||
|
||||
@@ -150,6 +152,8 @@ extern int __isolate_free_page(struct page *page, unsigned int order);
|
||||
extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
|
||||
unsigned int order);
|
||||
extern void prep_compound_page(struct page *page, unsigned int order);
|
||||
extern void post_alloc_hook(struct page *page, unsigned int order,
|
||||
gfp_t gfp_flags);
|
||||
extern int user_min_free_kbytes;
|
||||
|
||||
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
|
||||
|
1922
mm/khugepaged.c
Normální soubor
1922
mm/khugepaged.c
Normální soubor
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
9
mm/ksm.c
9
mm/ksm.c
@@ -376,9 +376,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
|
||||
if (IS_ERR_OR_NULL(page))
|
||||
break;
|
||||
if (PageKsm(page))
|
||||
ret = handle_mm_fault(vma->vm_mm, vma, addr,
|
||||
FAULT_FLAG_WRITE |
|
||||
FAULT_FLAG_REMOTE);
|
||||
ret = handle_mm_fault(vma, addr,
|
||||
FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
|
||||
else
|
||||
ret = VM_FAULT_WRITE;
|
||||
put_page(page);
|
||||
@@ -532,8 +531,8 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
|
||||
void *expected_mapping;
|
||||
unsigned long kpfn;
|
||||
|
||||
expected_mapping = (void *)stable_node +
|
||||
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
|
||||
expected_mapping = (void *)((unsigned long)stable_node |
|
||||
PAGE_MAPPING_KSM);
|
||||
again:
|
||||
kpfn = READ_ONCE(stable_node->kpfn);
|
||||
page = pfn_to_page(kpfn);
|
||||
|
@@ -584,6 +584,9 @@ repeat:
|
||||
nid, flags);
|
||||
}
|
||||
|
||||
if (!nr_new)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If this was the first round, resize array and repeat for actual
|
||||
* insertions; otherwise, merge and return.
|
||||
|
134
mm/memcontrol.c
134
mm/memcontrol.c
@@ -1259,6 +1259,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
struct oom_control oc = {
|
||||
.zonelist = NULL,
|
||||
.nodemask = NULL,
|
||||
.memcg = memcg,
|
||||
.gfp_mask = gfp_mask,
|
||||
.order = order,
|
||||
};
|
||||
@@ -1281,7 +1282,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
|
||||
check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
|
||||
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
|
||||
for_each_mem_cgroup_tree(iter, memcg) {
|
||||
struct css_task_iter it;
|
||||
@@ -1289,7 +1290,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
|
||||
css_task_iter_start(&iter->css, &it);
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
switch (oom_scan_process_thread(&oc, task, totalpages)) {
|
||||
switch (oom_scan_process_thread(&oc, task)) {
|
||||
case OOM_SCAN_SELECT:
|
||||
if (chosen)
|
||||
put_task_struct(chosen);
|
||||
@@ -1329,7 +1330,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
|
||||
if (chosen) {
|
||||
points = chosen_points * 1000 / totalpages;
|
||||
oom_kill_process(&oc, chosen, points, totalpages, memcg,
|
||||
oom_kill_process(&oc, chosen, points, totalpages,
|
||||
"Memory cgroup out of memory");
|
||||
}
|
||||
unlock:
|
||||
@@ -2272,20 +2273,30 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
|
||||
current->memcg_kmem_skip_account = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
static inline bool memcg_kmem_bypass(void)
|
||||
{
|
||||
if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_get_cache: select the correct per-memcg cache for allocation
|
||||
* @cachep: the original global kmem cache
|
||||
*
|
||||
* Return the kmem_cache we're supposed to use for a slab allocation.
|
||||
* We try to use the current memcg's version of the cache.
|
||||
*
|
||||
* If the cache does not exist yet, if we are the first user of it,
|
||||
* we either create it immediately, if possible, or create it asynchronously
|
||||
* in a workqueue.
|
||||
* In the latter case, we will let the current allocation go through with
|
||||
* the original cache.
|
||||
* If the cache does not exist yet, if we are the first user of it, we
|
||||
* create it asynchronously in a workqueue and let the current allocation
|
||||
* go through with the original cache.
|
||||
*
|
||||
* Can't be called in interrupt context or from kernel threads.
|
||||
* This function needs to be called with rcu_read_lock() held.
|
||||
* This function takes a reference to the cache it returns to assure it
|
||||
* won't get destroyed while we are working with it. Once the caller is
|
||||
* done with it, memcg_kmem_put_cache() must be called to release the
|
||||
* reference.
|
||||
*/
|
||||
struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct kmem_cache *memcg_cachep;
|
||||
@@ -2293,10 +2304,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
|
||||
VM_BUG_ON(!is_root_cache(cachep));
|
||||
|
||||
if (cachep->flags & SLAB_ACCOUNT)
|
||||
gfp |= __GFP_ACCOUNT;
|
||||
|
||||
if (!(gfp & __GFP_ACCOUNT))
|
||||
if (memcg_kmem_bypass())
|
||||
return cachep;
|
||||
|
||||
if (current->memcg_kmem_skip_account)
|
||||
@@ -2329,14 +2337,27 @@ out:
|
||||
return cachep;
|
||||
}
|
||||
|
||||
void __memcg_kmem_put_cache(struct kmem_cache *cachep)
|
||||
/**
|
||||
* memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
|
||||
* @cachep: the cache returned by memcg_kmem_get_cache
|
||||
*/
|
||||
void memcg_kmem_put_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
if (!is_root_cache(cachep))
|
||||
css_put(&cachep->memcg_params.memcg->css);
|
||||
}
|
||||
|
||||
int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
|
||||
struct mem_cgroup *memcg)
|
||||
/**
|
||||
* memcg_kmem_charge: charge a kmem page
|
||||
* @page: page to charge
|
||||
* @gfp: reclaim mode
|
||||
* @order: allocation order
|
||||
* @memcg: memory cgroup to charge
|
||||
*
|
||||
* Returns 0 on success, an error code on failure.
|
||||
*/
|
||||
int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
unsigned int nr_pages = 1 << order;
|
||||
struct page_counter *counter;
|
||||
@@ -2357,19 +2378,34 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
|
||||
/**
|
||||
* memcg_kmem_charge: charge a kmem page to the current memory cgroup
|
||||
* @page: page to charge
|
||||
* @gfp: reclaim mode
|
||||
* @order: allocation order
|
||||
*
|
||||
* Returns 0 on success, an error code on failure.
|
||||
*/
|
||||
int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
int ret = 0;
|
||||
|
||||
if (memcg_kmem_bypass())
|
||||
return 0;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(current->mm);
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
|
||||
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
|
||||
css_put(&memcg->css);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __memcg_kmem_uncharge(struct page *page, int order)
|
||||
/**
|
||||
* memcg_kmem_uncharge: uncharge a kmem page
|
||||
* @page: page to uncharge
|
||||
* @order: allocation order
|
||||
*/
|
||||
void memcg_kmem_uncharge(struct page *page, int order)
|
||||
{
|
||||
struct mem_cgroup *memcg = page->mem_cgroup;
|
||||
unsigned int nr_pages = 1 << order;
|
||||
@@ -4409,7 +4445,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t ptent, swp_entry_t *entry)
|
||||
pte_t ptent, swp_entry_t *entry)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
swp_entry_t ent = pte_to_swp_entry(ptent);
|
||||
@@ -4428,7 +4464,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
||||
}
|
||||
#else
|
||||
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t ptent, swp_entry_t *entry)
|
||||
pte_t ptent, swp_entry_t *entry)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@@ -4471,7 +4507,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
||||
/**
|
||||
* mem_cgroup_move_account - move account of the page
|
||||
* @page: the page
|
||||
* @nr_pages: number of regular pages (>1 for huge pages)
|
||||
* @compound: charge the page as compound or small page
|
||||
* @from: mem_cgroup which the page is moved from.
|
||||
* @to: mem_cgroup which the page is moved to. @from != @to.
|
||||
*
|
||||
@@ -4593,7 +4629,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
|
||||
if (pte_present(ptent))
|
||||
page = mc_handle_present_pte(vma, addr, ptent);
|
||||
else if (is_swap_pte(ptent))
|
||||
page = mc_handle_swap_pte(vma, addr, ptent, &ent);
|
||||
page = mc_handle_swap_pte(vma, ptent, &ent);
|
||||
else if (pte_none(ptent))
|
||||
page = mc_handle_file_pte(vma, addr, ptent, &ent);
|
||||
|
||||
@@ -5333,6 +5369,7 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
|
||||
* @mm: mm context of the victim
|
||||
* @gfp_mask: reclaim mode
|
||||
* @memcgp: charged memcg return
|
||||
* @compound: charge the page as compound or small page
|
||||
*
|
||||
* Try to charge @page to the memcg that @mm belongs to, reclaiming
|
||||
* pages according to @gfp_mask if necessary.
|
||||
@@ -5395,6 +5432,7 @@ out:
|
||||
* @page: page to charge
|
||||
* @memcg: memcg to charge the page to
|
||||
* @lrucare: page might be on LRU already
|
||||
* @compound: charge the page as compound or small page
|
||||
*
|
||||
* Finalize a charge transaction started by mem_cgroup_try_charge(),
|
||||
* after page->mapping has been set up. This must happen atomically
|
||||
@@ -5446,6 +5484,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
* mem_cgroup_cancel_charge - cancel a page charge
|
||||
* @page: page to charge
|
||||
* @memcg: memcg to charge the page to
|
||||
* @compound: charge the page as compound or small page
|
||||
*
|
||||
* Cancel a charge transaction started by mem_cgroup_try_charge().
|
||||
*/
|
||||
@@ -5469,15 +5508,18 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
|
||||
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
|
||||
unsigned long nr_anon, unsigned long nr_file,
|
||||
unsigned long nr_huge, struct page *dummy_page)
|
||||
unsigned long nr_huge, unsigned long nr_kmem,
|
||||
struct page *dummy_page)
|
||||
{
|
||||
unsigned long nr_pages = nr_anon + nr_file;
|
||||
unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
|
||||
unsigned long flags;
|
||||
|
||||
if (!mem_cgroup_is_root(memcg)) {
|
||||
page_counter_uncharge(&memcg->memory, nr_pages);
|
||||
if (do_memsw_account())
|
||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && nr_kmem)
|
||||
page_counter_uncharge(&memcg->kmem, nr_kmem);
|
||||
memcg_oom_recover(memcg);
|
||||
}
|
||||
|
||||
@@ -5500,6 +5542,7 @@ static void uncharge_list(struct list_head *page_list)
|
||||
unsigned long nr_anon = 0;
|
||||
unsigned long nr_file = 0;
|
||||
unsigned long nr_huge = 0;
|
||||
unsigned long nr_kmem = 0;
|
||||
unsigned long pgpgout = 0;
|
||||
struct list_head *next;
|
||||
struct page *page;
|
||||
@@ -5510,8 +5553,6 @@ static void uncharge_list(struct list_head *page_list)
|
||||
*/
|
||||
next = page_list->next;
|
||||
do {
|
||||
unsigned int nr_pages = 1;
|
||||
|
||||
page = list_entry(next, struct page, lru);
|
||||
next = page->lru.next;
|
||||
|
||||
@@ -5530,31 +5571,34 @@ static void uncharge_list(struct list_head *page_list)
|
||||
if (memcg != page->mem_cgroup) {
|
||||
if (memcg) {
|
||||
uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
|
||||
nr_huge, page);
|
||||
pgpgout = nr_anon = nr_file = nr_huge = 0;
|
||||
nr_huge, nr_kmem, page);
|
||||
pgpgout = nr_anon = nr_file =
|
||||
nr_huge = nr_kmem = 0;
|
||||
}
|
||||
memcg = page->mem_cgroup;
|
||||
}
|
||||
|
||||
if (PageTransHuge(page)) {
|
||||
nr_pages <<= compound_order(page);
|
||||
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
||||
nr_huge += nr_pages;
|
||||
}
|
||||
if (!PageKmemcg(page)) {
|
||||
unsigned int nr_pages = 1;
|
||||
|
||||
if (PageAnon(page))
|
||||
nr_anon += nr_pages;
|
||||
else
|
||||
nr_file += nr_pages;
|
||||
if (PageTransHuge(page)) {
|
||||
nr_pages <<= compound_order(page);
|
||||
nr_huge += nr_pages;
|
||||
}
|
||||
if (PageAnon(page))
|
||||
nr_anon += nr_pages;
|
||||
else
|
||||
nr_file += nr_pages;
|
||||
pgpgout++;
|
||||
} else
|
||||
nr_kmem += 1 << compound_order(page);
|
||||
|
||||
page->mem_cgroup = NULL;
|
||||
|
||||
pgpgout++;
|
||||
} while (next != page_list);
|
||||
|
||||
if (memcg)
|
||||
uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
|
||||
nr_huge, page);
|
||||
nr_huge, nr_kmem, page);
|
||||
}
|
||||
|
||||
/**
|
||||
|
887
mm/memory.c
887
mm/memory.c
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
@@ -449,6 +449,25 @@ out_fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct zone * __meminit move_pfn_range(int zone_shift,
|
||||
unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
struct zone *zone = page_zone(pfn_to_page(start_pfn));
|
||||
int ret = 0;
|
||||
|
||||
if (zone_shift < 0)
|
||||
ret = move_pfn_range_left(zone + zone_shift, zone,
|
||||
start_pfn, end_pfn);
|
||||
else if (zone_shift)
|
||||
ret = move_pfn_range_right(zone, zone + zone_shift,
|
||||
start_pfn, end_pfn);
|
||||
|
||||
if (ret)
|
||||
return NULL;
|
||||
|
||||
return zone + zone_shift;
|
||||
}
|
||||
|
||||
static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
@@ -1028,6 +1047,37 @@ static void node_states_set_node(int node, struct memory_notify *arg)
|
||||
node_set_state(node, N_MEMORY);
|
||||
}
|
||||
|
||||
int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target)
|
||||
{
|
||||
struct zone *zone = page_zone(pfn_to_page(pfn));
|
||||
enum zone_type idx = zone_idx(zone);
|
||||
int i;
|
||||
|
||||
if (idx < target) {
|
||||
/* pages must be at end of current zone */
|
||||
if (pfn + nr_pages != zone_end_pfn(zone))
|
||||
return 0;
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = idx + 1; i < target; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (target < idx) {
|
||||
/* pages must be at beginning of current zone */
|
||||
if (pfn != zone->zone_start_pfn)
|
||||
return 0;
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = target + 1; i < idx; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return target - idx;
|
||||
}
|
||||
|
||||
/* Must be protected by mem_hotplug_begin() */
|
||||
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
|
||||
@@ -1039,6 +1089,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
||||
int nid;
|
||||
int ret;
|
||||
struct memory_notify arg;
|
||||
int zone_shift = 0;
|
||||
|
||||
/*
|
||||
* This doesn't need a lock to do pfn_to_page().
|
||||
@@ -1052,19 +1103,14 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
||||
!can_online_high_movable(zone))
|
||||
return -EINVAL;
|
||||
|
||||
if (online_type == MMOP_ONLINE_KERNEL &&
|
||||
zone_idx(zone) == ZONE_MOVABLE) {
|
||||
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
|
||||
return -EINVAL;
|
||||
}
|
||||
if (online_type == MMOP_ONLINE_MOVABLE &&
|
||||
zone_idx(zone) == ZONE_MOVABLE - 1) {
|
||||
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
|
||||
return -EINVAL;
|
||||
}
|
||||
if (online_type == MMOP_ONLINE_KERNEL)
|
||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
|
||||
else if (online_type == MMOP_ONLINE_MOVABLE)
|
||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
|
||||
|
||||
/* Previous code may changed the zone of the pfn range */
|
||||
zone = page_zone(pfn_to_page(pfn));
|
||||
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
|
||||
if (!zone)
|
||||
return -EINVAL;
|
||||
|
||||
arg.start_pfn = pfn;
|
||||
arg.nr_pages = nr_pages;
|
||||
|
@@ -512,6 +512,8 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
|
||||
}
|
||||
}
|
||||
|
||||
if (pmd_trans_unstable(pmd))
|
||||
return 0;
|
||||
retry:
|
||||
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
|
||||
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
||||
@@ -529,7 +531,7 @@ retry:
|
||||
nid = page_to_nid(page);
|
||||
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
|
||||
continue;
|
||||
if (PageTransCompound(page) && PageAnon(page)) {
|
||||
if (PageTransCompound(page)) {
|
||||
get_page(page);
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
lock_page(page);
|
||||
|
264
mm/migrate.c
264
mm/migrate.c
@@ -31,6 +31,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/compaction.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/hugetlb_cgroup.h>
|
||||
@@ -73,6 +74,81 @@ int migrate_prep_local(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool isolate_movable_page(struct page *page, isolate_mode_t mode)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
|
||||
/*
|
||||
* Avoid burning cycles with pages that are yet under __free_pages(),
|
||||
* or just got freed under us.
|
||||
*
|
||||
* In case we 'win' a race for a movable page being freed under us and
|
||||
* raise its refcount preventing __free_pages() from doing its job
|
||||
* the put_page() at the end of this block will take care of
|
||||
* release this page, thus avoiding a nasty leakage.
|
||||
*/
|
||||
if (unlikely(!get_page_unless_zero(page)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Check PageMovable before holding a PG_lock because page's owner
|
||||
* assumes anybody doesn't touch PG_lock of newly allocated page
|
||||
* so unconditionally grapping the lock ruins page's owner side.
|
||||
*/
|
||||
if (unlikely(!__PageMovable(page)))
|
||||
goto out_putpage;
|
||||
/*
|
||||
* As movable pages are not isolated from LRU lists, concurrent
|
||||
* compaction threads can race against page migration functions
|
||||
* as well as race against the releasing a page.
|
||||
*
|
||||
* In order to avoid having an already isolated movable page
|
||||
* being (wrongly) re-isolated while it is under migration,
|
||||
* or to avoid attempting to isolate pages being released,
|
||||
* lets be sure we have the page lock
|
||||
* before proceeding with the movable page isolation steps.
|
||||
*/
|
||||
if (unlikely(!trylock_page(page)))
|
||||
goto out_putpage;
|
||||
|
||||
if (!PageMovable(page) || PageIsolated(page))
|
||||
goto out_no_isolated;
|
||||
|
||||
mapping = page_mapping(page);
|
||||
VM_BUG_ON_PAGE(!mapping, page);
|
||||
|
||||
if (!mapping->a_ops->isolate_page(page, mode))
|
||||
goto out_no_isolated;
|
||||
|
||||
/* Driver shouldn't use PG_isolated bit of page->flags */
|
||||
WARN_ON_ONCE(PageIsolated(page));
|
||||
__SetPageIsolated(page);
|
||||
unlock_page(page);
|
||||
|
||||
return true;
|
||||
|
||||
out_no_isolated:
|
||||
unlock_page(page);
|
||||
out_putpage:
|
||||
put_page(page);
|
||||
out:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* It should be called on page which is PG_movable */
|
||||
void putback_movable_page(struct page *page)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(!PageMovable(page), page);
|
||||
VM_BUG_ON_PAGE(!PageIsolated(page), page);
|
||||
|
||||
mapping = page_mapping(page);
|
||||
mapping->a_ops->putback_page(page);
|
||||
__ClearPageIsolated(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Put previously isolated pages back onto the appropriate lists
|
||||
* from where they were once taken off for compaction/migration.
|
||||
@@ -94,10 +170,23 @@ void putback_movable_pages(struct list_head *l)
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
if (unlikely(isolated_balloon_page(page)))
|
||||
balloon_page_putback(page);
|
||||
else
|
||||
/*
|
||||
* We isolated non-lru movable page so here we can use
|
||||
* __PageMovable because LRU page's mapping cannot have
|
||||
* PAGE_MAPPING_MOVABLE.
|
||||
*/
|
||||
if (unlikely(__PageMovable(page))) {
|
||||
VM_BUG_ON_PAGE(!PageIsolated(page), page);
|
||||
lock_page(page);
|
||||
if (PageMovable(page))
|
||||
putback_movable_page(page);
|
||||
else
|
||||
__ClearPageIsolated(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
} else {
|
||||
putback_lru_page(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,7 +259,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
|
||||
} else if (PageAnon(new))
|
||||
page_add_anon_rmap(new, vma, addr, false);
|
||||
else
|
||||
page_add_file_rmap(new);
|
||||
page_add_file_rmap(new, false);
|
||||
|
||||
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
|
||||
mlock_vma_page(new);
|
||||
@@ -594,7 +683,7 @@ EXPORT_SYMBOL(migrate_page_copy);
|
||||
***********************************************************/
|
||||
|
||||
/*
|
||||
* Common logic to directly migrate a single page suitable for
|
||||
* Common logic to directly migrate a single LRU page suitable for
|
||||
* pages that do not use PagePrivate/PagePrivate2.
|
||||
*
|
||||
* Pages are locked upon entry and exit.
|
||||
@@ -757,33 +846,72 @@ static int move_to_new_page(struct page *newpage, struct page *page,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
int rc;
|
||||
int rc = -EAGAIN;
|
||||
bool is_lru = !__PageMovable(page);
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
|
||||
|
||||
mapping = page_mapping(page);
|
||||
if (!mapping)
|
||||
rc = migrate_page(mapping, newpage, page, mode);
|
||||
else if (mapping->a_ops->migratepage)
|
||||
|
||||
if (likely(is_lru)) {
|
||||
if (!mapping)
|
||||
rc = migrate_page(mapping, newpage, page, mode);
|
||||
else if (mapping->a_ops->migratepage)
|
||||
/*
|
||||
* Most pages have a mapping and most filesystems
|
||||
* provide a migratepage callback. Anonymous pages
|
||||
* are part of swap space which also has its own
|
||||
* migratepage callback. This is the most common path
|
||||
* for page migration.
|
||||
*/
|
||||
rc = mapping->a_ops->migratepage(mapping, newpage,
|
||||
page, mode);
|
||||
else
|
||||
rc = fallback_migrate_page(mapping, newpage,
|
||||
page, mode);
|
||||
} else {
|
||||
/*
|
||||
* Most pages have a mapping and most filesystems provide a
|
||||
* migratepage callback. Anonymous pages are part of swap
|
||||
* space which also has its own migratepage callback. This
|
||||
* is the most common path for page migration.
|
||||
* In case of non-lru page, it could be released after
|
||||
* isolation step. In that case, we shouldn't try migration.
|
||||
*/
|
||||
rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
|
||||
else
|
||||
rc = fallback_migrate_page(mapping, newpage, page, mode);
|
||||
VM_BUG_ON_PAGE(!PageIsolated(page), page);
|
||||
if (!PageMovable(page)) {
|
||||
rc = MIGRATEPAGE_SUCCESS;
|
||||
__ClearPageIsolated(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rc = mapping->a_ops->migratepage(mapping, newpage,
|
||||
page, mode);
|
||||
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
|
||||
!PageIsolated(page));
|
||||
}
|
||||
|
||||
/*
|
||||
* When successful, old pagecache page->mapping must be cleared before
|
||||
* page is freed; but stats require that PageAnon be left as PageAnon.
|
||||
*/
|
||||
if (rc == MIGRATEPAGE_SUCCESS) {
|
||||
if (!PageAnon(page))
|
||||
if (__PageMovable(page)) {
|
||||
VM_BUG_ON_PAGE(!PageIsolated(page), page);
|
||||
|
||||
/*
|
||||
* We clear PG_movable under page_lock so any compactor
|
||||
* cannot try to migrate this page.
|
||||
*/
|
||||
__ClearPageIsolated(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Anonymous and movable page->mapping will be cleard by
|
||||
* free_pages_prepare so don't reset it here for keeping
|
||||
* the type to work PageAnon, for example.
|
||||
*/
|
||||
if (!PageMappingFlags(page))
|
||||
page->mapping = NULL;
|
||||
}
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -793,6 +921,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
|
||||
int rc = -EAGAIN;
|
||||
int page_was_mapped = 0;
|
||||
struct anon_vma *anon_vma = NULL;
|
||||
bool is_lru = !__PageMovable(page);
|
||||
|
||||
if (!trylock_page(page)) {
|
||||
if (!force || mode == MIGRATE_ASYNC)
|
||||
@@ -861,15 +990,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
|
||||
if (unlikely(!trylock_page(newpage)))
|
||||
goto out_unlock;
|
||||
|
||||
if (unlikely(isolated_balloon_page(page))) {
|
||||
/*
|
||||
* A ballooned page does not need any special attention from
|
||||
* physical to virtual reverse mapping procedures.
|
||||
* Skip any attempt to unmap PTEs or to remap swap cache,
|
||||
* in order to avoid burning cycles at rmap level, and perform
|
||||
* the page migration right away (proteced by page lock).
|
||||
*/
|
||||
rc = balloon_page_migrate(newpage, page, mode);
|
||||
if (unlikely(!is_lru)) {
|
||||
rc = move_to_new_page(newpage, page, mode);
|
||||
goto out_unlock_both;
|
||||
}
|
||||
|
||||
@@ -915,6 +1037,19 @@ out_unlock:
|
||||
put_anon_vma(anon_vma);
|
||||
unlock_page(page);
|
||||
out:
|
||||
/*
|
||||
* If migration is successful, decrease refcount of the newpage
|
||||
* which will not free the page because new page owner increased
|
||||
* refcounter. As well, if it is LRU page, add the page to LRU
|
||||
* list in here.
|
||||
*/
|
||||
if (rc == MIGRATEPAGE_SUCCESS) {
|
||||
if (unlikely(__PageMovable(newpage)))
|
||||
put_page(newpage);
|
||||
else
|
||||
putback_lru_page(newpage);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -948,6 +1083,18 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
|
||||
|
||||
if (page_count(page) == 1) {
|
||||
/* page was freed from under us. So we are done. */
|
||||
ClearPageActive(page);
|
||||
ClearPageUnevictable(page);
|
||||
if (unlikely(__PageMovable(page))) {
|
||||
lock_page(page);
|
||||
if (!PageMovable(page))
|
||||
__ClearPageIsolated(page);
|
||||
unlock_page(page);
|
||||
}
|
||||
if (put_new_page)
|
||||
put_new_page(newpage, private);
|
||||
else
|
||||
put_page(newpage);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -960,10 +1107,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
|
||||
}
|
||||
|
||||
rc = __unmap_and_move(page, newpage, force, mode);
|
||||
if (rc == MIGRATEPAGE_SUCCESS) {
|
||||
put_new_page = NULL;
|
||||
if (rc == MIGRATEPAGE_SUCCESS)
|
||||
set_page_owner_migrate_reason(newpage, reason);
|
||||
}
|
||||
|
||||
out:
|
||||
if (rc != -EAGAIN) {
|
||||
@@ -976,33 +1121,45 @@ out:
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
/* Soft-offlined page shouldn't go through lru cache list */
|
||||
if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
|
||||
/*
|
||||
* With this release, we free successfully migrated
|
||||
* page and set PG_HWPoison on just freed page
|
||||
* intentionally. Although it's rather weird, it's how
|
||||
* HWPoison flag works at the moment.
|
||||
*/
|
||||
put_page(page);
|
||||
if (!test_set_page_hwpoison(page))
|
||||
num_poisoned_pages_inc();
|
||||
} else
|
||||
putback_lru_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* If migration was not successful and there's a freeing callback, use
|
||||
* it. Otherwise, putback_lru_page() will drop the reference grabbed
|
||||
* during isolation.
|
||||
* If migration is successful, releases reference grabbed during
|
||||
* isolation. Otherwise, restore the page to right list unless
|
||||
* we want to retry.
|
||||
*/
|
||||
if (put_new_page)
|
||||
put_new_page(newpage, private);
|
||||
else if (unlikely(__is_movable_balloon_page(newpage))) {
|
||||
/* drop our reference, page already in the balloon */
|
||||
put_page(newpage);
|
||||
} else
|
||||
putback_lru_page(newpage);
|
||||
if (rc == MIGRATEPAGE_SUCCESS) {
|
||||
put_page(page);
|
||||
if (reason == MR_MEMORY_FAILURE) {
|
||||
/*
|
||||
* Set PG_HWPoison on just freed page
|
||||
* intentionally. Although it's rather weird,
|
||||
* it's how HWPoison flag works at the moment.
|
||||
*/
|
||||
if (!test_set_page_hwpoison(page))
|
||||
num_poisoned_pages_inc();
|
||||
}
|
||||
} else {
|
||||
if (rc != -EAGAIN) {
|
||||
if (likely(!__PageMovable(page))) {
|
||||
putback_lru_page(page);
|
||||
goto put_new;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
if (PageMovable(page))
|
||||
putback_movable_page(page);
|
||||
else
|
||||
__ClearPageIsolated(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
put_new:
|
||||
if (put_new_page)
|
||||
put_new_page(newpage, private);
|
||||
else
|
||||
put_page(newpage);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
if (rc)
|
||||
@@ -1829,8 +1986,7 @@ fail_putback:
|
||||
}
|
||||
|
||||
orig_entry = *pmd;
|
||||
entry = mk_pmd(new_page, vma->vm_page_prot);
|
||||
entry = pmd_mkhuge(entry);
|
||||
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
|
||||
/*
|
||||
|
26
mm/mmap.c
26
mm/mmap.c
@@ -25,6 +25,7 @@
|
||||
#include <linux/personality.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mount.h>
|
||||
@@ -675,6 +676,8 @@ again: remove_next = 1 + (end > next->vm_end);
|
||||
}
|
||||
}
|
||||
|
||||
vma_adjust_trans_huge(vma, start, end, adjust_next);
|
||||
|
||||
if (file) {
|
||||
mapping = file->f_mapping;
|
||||
root = &mapping->i_mmap;
|
||||
@@ -695,8 +698,6 @@ again: remove_next = 1 + (end > next->vm_end);
|
||||
}
|
||||
}
|
||||
|
||||
vma_adjust_trans_huge(vma, start, end, adjust_next);
|
||||
|
||||
anon_vma = vma->anon_vma;
|
||||
if (!anon_vma && adjust_next)
|
||||
anon_vma = next->anon_vma;
|
||||
@@ -1897,8 +1898,19 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
||||
return -ENOMEM;
|
||||
|
||||
get_area = current->mm->get_unmapped_area;
|
||||
if (file && file->f_op->get_unmapped_area)
|
||||
get_area = file->f_op->get_unmapped_area;
|
||||
if (file) {
|
||||
if (file->f_op->get_unmapped_area)
|
||||
get_area = file->f_op->get_unmapped_area;
|
||||
} else if (flags & MAP_SHARED) {
|
||||
/*
|
||||
* mmap_region() will call shmem_zero_setup() to create a file,
|
||||
* so use shmem's get_unmapped_area in case it can be huge.
|
||||
* do_mmap_pgoff() will clear pgoff, so match alignment.
|
||||
*/
|
||||
pgoff = 0;
|
||||
get_area = shmem_get_unmapped_area;
|
||||
}
|
||||
|
||||
addr = get_area(file, addr, len, pgoff, flags);
|
||||
if (IS_ERR_VALUE(addr))
|
||||
return addr;
|
||||
@@ -2591,6 +2603,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
|
||||
/* drop PG_Mlocked flag for over-mapped range */
|
||||
for (tmp = vma; tmp->vm_start >= start + size;
|
||||
tmp = tmp->vm_next) {
|
||||
/*
|
||||
* Split pmd and munlock page on the border
|
||||
* of the range.
|
||||
*/
|
||||
vma_adjust_trans_huge(tmp, start, start + size, 0);
|
||||
|
||||
munlock_vma_pages_range(tmp,
|
||||
max(tmp->vm_start, start),
|
||||
min(tmp->vm_end, start + size));
|
||||
|
@@ -163,7 +163,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
||||
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
|
||||
if (next - addr != HPAGE_PMD_SIZE) {
|
||||
split_huge_pmd(vma, pmd, addr);
|
||||
if (pmd_none(*pmd))
|
||||
if (pmd_trans_unstable(pmd))
|
||||
continue;
|
||||
} else {
|
||||
int nr_ptes = change_huge_pmd(vma, pmd, addr,
|
||||
|
@@ -210,9 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
|
||||
}
|
||||
}
|
||||
split_huge_pmd(vma, old_pmd, old_addr);
|
||||
if (pmd_none(*old_pmd))
|
||||
if (pmd_trans_unstable(old_pmd))
|
||||
continue;
|
||||
VM_BUG_ON(pmd_trans_huge(*old_pmd));
|
||||
}
|
||||
if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
|
||||
break;
|
||||
|
@@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_fault);
|
||||
|
||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
void filemap_map_pages(struct fault_env *fe,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
@@ -274,7 +274,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
|
||||
#endif
|
||||
|
||||
enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
|
||||
struct task_struct *task, unsigned long totalpages)
|
||||
struct task_struct *task)
|
||||
{
|
||||
if (oom_unkillable_task(task, NULL, oc->nodemask))
|
||||
return OOM_SCAN_CONTINUE;
|
||||
@@ -311,7 +311,7 @@ static struct task_struct *select_bad_process(struct oom_control *oc,
|
||||
for_each_process(p) {
|
||||
unsigned int points;
|
||||
|
||||
switch (oom_scan_process_thread(oc, p, totalpages)) {
|
||||
switch (oom_scan_process_thread(oc, p)) {
|
||||
case OOM_SCAN_SELECT:
|
||||
chosen = p;
|
||||
chosen_points = ULONG_MAX;
|
||||
@@ -383,8 +383,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void dump_header(struct oom_control *oc, struct task_struct *p,
|
||||
struct mem_cgroup *memcg)
|
||||
static void dump_header(struct oom_control *oc, struct task_struct *p)
|
||||
{
|
||||
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
|
||||
current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
|
||||
@@ -392,12 +391,12 @@ static void dump_header(struct oom_control *oc, struct task_struct *p,
|
||||
|
||||
cpuset_print_current_mems_allowed();
|
||||
dump_stack();
|
||||
if (memcg)
|
||||
mem_cgroup_print_oom_info(memcg, p);
|
||||
if (oc->memcg)
|
||||
mem_cgroup_print_oom_info(oc->memcg, p);
|
||||
else
|
||||
show_mem(SHOW_MEM_FILTER_NODES);
|
||||
if (sysctl_oom_dump_tasks)
|
||||
dump_tasks(memcg, oc->nodemask);
|
||||
dump_tasks(oc->memcg, oc->nodemask);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -453,7 +452,7 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||
* We have to make sure to not race with the victim exit path
|
||||
* and cause premature new oom victim selection:
|
||||
* __oom_reap_task exit_mm
|
||||
* atomic_inc_not_zero
|
||||
* mmget_not_zero
|
||||
* mmput
|
||||
* atomic_dec_and_test
|
||||
* exit_oom_victim
|
||||
@@ -475,12 +474,22 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||
if (!p)
|
||||
goto unlock_oom;
|
||||
mm = p->mm;
|
||||
atomic_inc(&mm->mm_users);
|
||||
atomic_inc(&mm->mm_count);
|
||||
task_unlock(p);
|
||||
|
||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||
ret = false;
|
||||
goto unlock_oom;
|
||||
goto mm_drop;
|
||||
}
|
||||
|
||||
/*
|
||||
* increase mm_users only after we know we will reap something so
|
||||
* that the mmput_async is called only when we have reaped something
|
||||
* and delayed __mmput doesn't matter that much
|
||||
*/
|
||||
if (!mmget_not_zero(mm)) {
|
||||
up_read(&mm->mmap_sem);
|
||||
goto mm_drop;
|
||||
}
|
||||
|
||||
tlb_gather_mmu(&tlb, mm, 0, -1);
|
||||
@@ -522,15 +531,16 @@ static bool __oom_reap_task(struct task_struct *tsk)
|
||||
* to release its memory.
|
||||
*/
|
||||
set_bit(MMF_OOM_REAPED, &mm->flags);
|
||||
unlock_oom:
|
||||
mutex_unlock(&oom_lock);
|
||||
/*
|
||||
* Drop our reference but make sure the mmput slow path is called from a
|
||||
* different context because we shouldn't risk we get stuck there and
|
||||
* put the oom_reaper out of the way.
|
||||
*/
|
||||
if (mm)
|
||||
mmput_async(mm);
|
||||
mmput_async(mm);
|
||||
mm_drop:
|
||||
mmdrop(mm);
|
||||
unlock_oom:
|
||||
mutex_unlock(&oom_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -739,7 +749,7 @@ void oom_killer_enable(void)
|
||||
*/
|
||||
void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
unsigned int points, unsigned long totalpages,
|
||||
struct mem_cgroup *memcg, const char *message)
|
||||
const char *message)
|
||||
{
|
||||
struct task_struct *victim = p;
|
||||
struct task_struct *child;
|
||||
@@ -765,7 +775,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
task_unlock(p);
|
||||
|
||||
if (__ratelimit(&oom_rs))
|
||||
dump_header(oc, p, memcg);
|
||||
dump_header(oc, p);
|
||||
|
||||
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
|
||||
message, task_pid_nr(p), p->comm, points);
|
||||
@@ -786,8 +796,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
/*
|
||||
* oom_badness() returns 0 if the thread is unkillable
|
||||
*/
|
||||
child_points = oom_badness(child, memcg, oc->nodemask,
|
||||
totalpages);
|
||||
child_points = oom_badness(child,
|
||||
oc->memcg, oc->nodemask, totalpages);
|
||||
if (child_points > victim_points) {
|
||||
put_task_struct(victim);
|
||||
victim = child;
|
||||
@@ -865,8 +875,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
/*
|
||||
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
|
||||
*/
|
||||
void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
|
||||
struct mem_cgroup *memcg)
|
||||
void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint)
|
||||
{
|
||||
if (likely(!sysctl_panic_on_oom))
|
||||
return;
|
||||
@@ -882,7 +891,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint,
|
||||
/* Do not panic for oom kills triggered by sysrq */
|
||||
if (is_sysrq_oom(oc))
|
||||
return;
|
||||
dump_header(oc, NULL, memcg);
|
||||
dump_header(oc, NULL);
|
||||
panic("Out of memory: %s panic_on_oom is enabled\n",
|
||||
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
|
||||
}
|
||||
@@ -957,13 +966,13 @@ bool out_of_memory(struct oom_control *oc)
|
||||
constraint = constrained_alloc(oc, &totalpages);
|
||||
if (constraint != CONSTRAINT_MEMORY_POLICY)
|
||||
oc->nodemask = NULL;
|
||||
check_panic_on_oom(oc, constraint, NULL);
|
||||
check_panic_on_oom(oc, constraint);
|
||||
|
||||
if (sysctl_oom_kill_allocating_task && current->mm &&
|
||||
!oom_unkillable_task(current, NULL, oc->nodemask) &&
|
||||
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
|
||||
get_task_struct(current);
|
||||
oom_kill_process(oc, current, 0, totalpages, NULL,
|
||||
oom_kill_process(oc, current, 0, totalpages,
|
||||
"Out of memory (oom_kill_allocating_task)");
|
||||
return true;
|
||||
}
|
||||
@@ -971,12 +980,11 @@ bool out_of_memory(struct oom_control *oc)
|
||||
p = select_bad_process(oc, &points, totalpages);
|
||||
/* Found nothing?!?! Either we hang forever, or we panic. */
|
||||
if (!p && !is_sysrq_oom(oc)) {
|
||||
dump_header(oc, NULL, NULL);
|
||||
dump_header(oc, NULL);
|
||||
panic("Out of memory and no killable processes...\n");
|
||||
}
|
||||
if (p && p != (void *)-1UL) {
|
||||
oom_kill_process(oc, p, points, totalpages, NULL,
|
||||
"Out of memory");
|
||||
oom_kill_process(oc, p, points, totalpages, "Out of memory");
|
||||
/*
|
||||
* Give the killed process a good chance to exit before trying
|
||||
* to allocate memory again.
|
||||
@@ -988,14 +996,15 @@ bool out_of_memory(struct oom_control *oc)
|
||||
|
||||
/*
|
||||
* The pagefault handler calls here because it is out of memory, so kill a
|
||||
* memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
|
||||
* parallel oom killing is already in progress so do nothing.
|
||||
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom
|
||||
* killing is already in progress so do nothing.
|
||||
*/
|
||||
void pagefault_out_of_memory(void)
|
||||
{
|
||||
struct oom_control oc = {
|
||||
.zonelist = NULL,
|
||||
.nodemask = NULL,
|
||||
.memcg = NULL,
|
||||
.gfp_mask = 0,
|
||||
.order = 0,
|
||||
};
|
||||
|
@@ -2563,6 +2563,7 @@ int set_page_dirty(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
|
||||
page = compound_head(page);
|
||||
if (likely(mapping)) {
|
||||
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
|
||||
/*
|
||||
@@ -2747,6 +2748,11 @@ int test_clear_page_writeback(struct page *page)
|
||||
__wb_writeout_inc(wb);
|
||||
}
|
||||
}
|
||||
|
||||
if (mapping->host && !mapping_tagged(mapping,
|
||||
PAGECACHE_TAG_WRITEBACK))
|
||||
sb_clear_inode_writeback(mapping->host);
|
||||
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
} else {
|
||||
ret = TestClearPageWriteback(page);
|
||||
@@ -2774,11 +2780,24 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
|
||||
spin_lock_irqsave(&mapping->tree_lock, flags);
|
||||
ret = TestSetPageWriteback(page);
|
||||
if (!ret) {
|
||||
bool on_wblist;
|
||||
|
||||
on_wblist = mapping_tagged(mapping,
|
||||
PAGECACHE_TAG_WRITEBACK);
|
||||
|
||||
radix_tree_tag_set(&mapping->page_tree,
|
||||
page_index(page),
|
||||
PAGECACHE_TAG_WRITEBACK);
|
||||
if (bdi_cap_account_writeback(bdi))
|
||||
__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
|
||||
|
||||
/*
|
||||
* We can come through here when swapping anonymous
|
||||
* pages, so we don't necessarily have an inode to track
|
||||
* for sync.
|
||||
*/
|
||||
if (mapping->host && !on_wblist)
|
||||
sb_mark_inode_writeback(mapping->host);
|
||||
}
|
||||
if (!PageDirty(page))
|
||||
radix_tree_tag_clear(&mapping->page_tree,
|
||||
|
164
mm/page_alloc.c
164
mm/page_alloc.c
@@ -63,6 +63,7 @@
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/memcontrol.h>
|
||||
|
||||
#include <asm/sections.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -1006,6 +1007,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
|
||||
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
|
||||
|
||||
if (compound)
|
||||
ClearPageDoubleMap(page);
|
||||
for (i = 1; i < (1 << order); i++) {
|
||||
if (compound)
|
||||
bad += free_tail_pages_check(page, page + i);
|
||||
@@ -1016,8 +1019,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
|
||||
}
|
||||
}
|
||||
if (PageAnonHead(page))
|
||||
if (PageMappingFlags(page))
|
||||
page->mapping = NULL;
|
||||
if (memcg_kmem_enabled() && PageKmemcg(page)) {
|
||||
memcg_kmem_uncharge(page, order);
|
||||
__ClearPageKmemcg(page);
|
||||
}
|
||||
if (check_free)
|
||||
bad += free_pages_check(page);
|
||||
if (bad)
|
||||
@@ -1724,6 +1731,19 @@ static bool check_new_pages(struct page *page, unsigned int order)
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void post_alloc_hook(struct page *page, unsigned int order,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
set_page_private(page, 0);
|
||||
set_page_refcounted(page);
|
||||
|
||||
arch_alloc_page(page, order);
|
||||
kernel_map_pages(page, 1 << order, 1);
|
||||
kernel_poison_pages(page, 1 << order, 1);
|
||||
kasan_alloc_pages(page, order);
|
||||
set_page_owner(page, order, gfp_flags);
|
||||
}
|
||||
|
||||
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
|
||||
unsigned int alloc_flags)
|
||||
{
|
||||
@@ -1736,13 +1756,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
|
||||
poisoned &= page_is_poisoned(p);
|
||||
}
|
||||
|
||||
set_page_private(page, 0);
|
||||
set_page_refcounted(page);
|
||||
|
||||
arch_alloc_page(page, order);
|
||||
kernel_map_pages(page, 1 << order, 1);
|
||||
kernel_poison_pages(page, 1 << order, 1);
|
||||
kasan_alloc_pages(page, order);
|
||||
post_alloc_hook(page, order, gfp_flags);
|
||||
|
||||
if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
@@ -1751,8 +1765,6 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
|
||||
if (order && (gfp_flags & __GFP_COMP))
|
||||
prep_compound_page(page, order);
|
||||
|
||||
set_page_owner(page, order, gfp_flags);
|
||||
|
||||
/*
|
||||
* page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
|
||||
* allocate the page. The expectation is that the caller is taking
|
||||
@@ -2461,7 +2473,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
|
||||
void split_page(struct page *page, unsigned int order)
|
||||
{
|
||||
int i;
|
||||
gfp_t gfp_mask;
|
||||
|
||||
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||
VM_BUG_ON_PAGE(!page_count(page), page);
|
||||
@@ -2475,12 +2486,9 @@ void split_page(struct page *page, unsigned int order)
|
||||
split_page(virt_to_page(page[0].shadow), order);
|
||||
#endif
|
||||
|
||||
gfp_mask = get_page_owner_gfp(page);
|
||||
set_page_owner(page, 0, gfp_mask);
|
||||
for (i = 1; i < (1 << order); i++) {
|
||||
for (i = 1; i < (1 << order); i++)
|
||||
set_page_refcounted(page + i);
|
||||
set_page_owner(page + i, 0, gfp_mask);
|
||||
}
|
||||
split_page_owner(page, order);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(split_page);
|
||||
|
||||
@@ -2509,8 +2517,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
||||
zone->free_area[order].nr_free--;
|
||||
rmv_page_order(page);
|
||||
|
||||
set_page_owner(page, order, __GFP_MOVABLE);
|
||||
|
||||
/* Set the pageblock if the isolated page is at least a pageblock */
|
||||
if (order >= pageblock_order - 1) {
|
||||
struct page *endpage = page + (1 << order) - 1;
|
||||
@@ -2526,33 +2532,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
||||
return 1UL << order;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to split_page except the page is already free. As this is only
|
||||
* being used for migration, the migratetype of the block also changes.
|
||||
* As this is called with interrupts disabled, the caller is responsible
|
||||
* for calling arch_alloc_page() and kernel_map_page() after interrupts
|
||||
* are enabled.
|
||||
*
|
||||
* Note: this is probably too low level an operation for use in drivers.
|
||||
* Please consult with lkml before using this in your driver.
|
||||
*/
|
||||
int split_free_page(struct page *page)
|
||||
{
|
||||
unsigned int order;
|
||||
int nr_pages;
|
||||
|
||||
order = page_order(page);
|
||||
|
||||
nr_pages = __isolate_free_page(page, order);
|
||||
if (!nr_pages)
|
||||
return 0;
|
||||
|
||||
/* Split into individual pages */
|
||||
set_page_refcounted(page);
|
||||
split_page(page, order);
|
||||
return nr_pages;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update NUMA hit/miss statistics
|
||||
*
|
||||
@@ -3105,6 +3084,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
|
||||
struct oom_control oc = {
|
||||
.zonelist = ac->zonelist,
|
||||
.nodemask = ac->nodemask,
|
||||
.memcg = NULL,
|
||||
.gfp_mask = gfp_mask,
|
||||
.order = order,
|
||||
};
|
||||
@@ -3868,6 +3848,14 @@ no_zone:
|
||||
}
|
||||
|
||||
out:
|
||||
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
|
||||
if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
|
||||
__free_pages(page, order);
|
||||
page = NULL;
|
||||
} else
|
||||
__SetPageKmemcg(page);
|
||||
}
|
||||
|
||||
if (kmemcheck_enabled && page)
|
||||
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
|
||||
|
||||
@@ -4023,56 +4011,6 @@ void __free_page_frag(void *addr)
|
||||
}
|
||||
EXPORT_SYMBOL(__free_page_frag);
|
||||
|
||||
/*
|
||||
* alloc_kmem_pages charges newly allocated pages to the kmem resource counter
|
||||
* of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
|
||||
* equivalent to alloc_pages.
|
||||
*
|
||||
* It should be used when the caller would like to use kmalloc, but since the
|
||||
* allocation is large, it has to fall back to the page allocator.
|
||||
*/
|
||||
struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages(gfp_mask, order);
|
||||
if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
|
||||
__free_pages(page, order);
|
||||
page = NULL;
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages_node(nid, gfp_mask, order);
|
||||
if (page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
|
||||
__free_pages(page, order);
|
||||
page = NULL;
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
/*
|
||||
* __free_kmem_pages and free_kmem_pages will free pages allocated with
|
||||
* alloc_kmem_pages.
|
||||
*/
|
||||
void __free_kmem_pages(struct page *page, unsigned int order)
|
||||
{
|
||||
memcg_kmem_uncharge(page, order);
|
||||
__free_pages(page, order);
|
||||
}
|
||||
|
||||
void free_kmem_pages(unsigned long addr, unsigned int order)
|
||||
{
|
||||
if (addr != 0) {
|
||||
VM_BUG_ON(!virt_addr_valid((void *)addr));
|
||||
__free_kmem_pages(virt_to_page((void *)addr), order);
|
||||
}
|
||||
}
|
||||
|
||||
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
||||
size_t size)
|
||||
{
|
||||
@@ -4374,6 +4312,9 @@ void show_free_areas(unsigned int filter)
|
||||
" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
|
||||
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
|
||||
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
" anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
|
||||
#endif
|
||||
" free:%lu free_pcp:%lu free_cma:%lu\n",
|
||||
global_page_state(NR_ACTIVE_ANON),
|
||||
global_page_state(NR_INACTIVE_ANON),
|
||||
@@ -4391,6 +4332,11 @@ void show_free_areas(unsigned int filter)
|
||||
global_page_state(NR_SHMEM),
|
||||
global_page_state(NR_PAGETABLE),
|
||||
global_page_state(NR_BOUNCE),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR,
|
||||
global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR,
|
||||
global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR,
|
||||
#endif
|
||||
global_page_state(NR_FREE_PAGES),
|
||||
free_pcp,
|
||||
global_page_state(NR_FREE_CMA_PAGES));
|
||||
@@ -4425,6 +4371,11 @@ void show_free_areas(unsigned int filter)
|
||||
" writeback:%lukB"
|
||||
" mapped:%lukB"
|
||||
" shmem:%lukB"
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
" shmem_thp: %lukB"
|
||||
" shmem_pmdmapped: %lukB"
|
||||
" anon_thp: %lukB"
|
||||
#endif
|
||||
" slab_reclaimable:%lukB"
|
||||
" slab_unreclaimable:%lukB"
|
||||
" kernel_stack:%lukB"
|
||||
@@ -4457,6 +4408,12 @@ void show_free_areas(unsigned int filter)
|
||||
K(zone_page_state(zone, NR_WRITEBACK)),
|
||||
K(zone_page_state(zone, NR_FILE_MAPPED)),
|
||||
K(zone_page_state(zone, NR_SHMEM)),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
K(zone_page_state(zone, NR_SHMEM_THPS) * HPAGE_PMD_NR),
|
||||
K(zone_page_state(zone, NR_SHMEM_PMDMAPPED)
|
||||
* HPAGE_PMD_NR),
|
||||
K(zone_page_state(zone, NR_ANON_THPS) * HPAGE_PMD_NR),
|
||||
#endif
|
||||
K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
|
||||
K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
|
||||
zone_page_state(zone, NR_KERNEL_STACK) *
|
||||
@@ -6467,15 +6424,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
|
||||
sizeof(arch_zone_lowest_possible_pfn));
|
||||
memset(arch_zone_highest_possible_pfn, 0,
|
||||
sizeof(arch_zone_highest_possible_pfn));
|
||||
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
|
||||
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
|
||||
for (i = 1; i < MAX_NR_ZONES; i++) {
|
||||
|
||||
start_pfn = find_min_pfn_with_active_regions();
|
||||
|
||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
||||
if (i == ZONE_MOVABLE)
|
||||
continue;
|
||||
arch_zone_lowest_possible_pfn[i] =
|
||||
arch_zone_highest_possible_pfn[i-1];
|
||||
arch_zone_highest_possible_pfn[i] =
|
||||
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
|
||||
|
||||
end_pfn = max(max_zone_pfn[i], start_pfn);
|
||||
arch_zone_lowest_possible_pfn[i] = start_pfn;
|
||||
arch_zone_highest_possible_pfn[i] = end_pfn;
|
||||
|
||||
start_pfn = end_pfn;
|
||||
}
|
||||
arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
|
||||
arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include <linux/pageblock-flags.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/page_owner.h>
|
||||
#include "internal.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@@ -80,7 +81,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
||||
{
|
||||
struct zone *zone;
|
||||
unsigned long flags, nr_pages;
|
||||
struct page *isolated_page = NULL;
|
||||
bool isolated_page = false;
|
||||
unsigned int order;
|
||||
unsigned long page_idx, buddy_idx;
|
||||
struct page *buddy;
|
||||
@@ -108,9 +109,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
||||
if (pfn_valid_within(page_to_pfn(buddy)) &&
|
||||
!is_migrate_isolate_page(buddy)) {
|
||||
__isolate_free_page(page, order);
|
||||
kernel_map_pages(page, (1 << order), 1);
|
||||
set_page_refcounted(page);
|
||||
isolated_page = page;
|
||||
isolated_page = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -128,8 +127,10 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
||||
zone->nr_isolate_pageblock--;
|
||||
out:
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
if (isolated_page)
|
||||
__free_pages(isolated_page, order);
|
||||
if (isolated_page) {
|
||||
post_alloc_hook(page, order, __GFP_MOVABLE);
|
||||
__free_pages(page, order);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct page *
|
||||
|
159
mm/page_owner.c
159
mm/page_owner.c
@@ -7,11 +7,22 @@
|
||||
#include <linux/page_owner.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/stackdepot.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
|
||||
* to use off stack temporal storage
|
||||
*/
|
||||
#define PAGE_OWNER_STACK_DEPTH (16)
|
||||
|
||||
static bool page_owner_disabled = true;
|
||||
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
|
||||
|
||||
static depot_stack_handle_t dummy_handle;
|
||||
static depot_stack_handle_t failure_handle;
|
||||
|
||||
static void init_early_allocated_pages(void);
|
||||
|
||||
static int early_page_owner_param(char *buf)
|
||||
@@ -34,11 +45,41 @@ static bool need_page_owner(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
static noinline void register_dummy_stack(void)
|
||||
{
|
||||
unsigned long entries[4];
|
||||
struct stack_trace dummy;
|
||||
|
||||
dummy.nr_entries = 0;
|
||||
dummy.max_entries = ARRAY_SIZE(entries);
|
||||
dummy.entries = &entries[0];
|
||||
dummy.skip = 0;
|
||||
|
||||
save_stack_trace(&dummy);
|
||||
dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static noinline void register_failure_stack(void)
|
||||
{
|
||||
unsigned long entries[4];
|
||||
struct stack_trace failure;
|
||||
|
||||
failure.nr_entries = 0;
|
||||
failure.max_entries = ARRAY_SIZE(entries);
|
||||
failure.entries = &entries[0];
|
||||
failure.skip = 0;
|
||||
|
||||
save_stack_trace(&failure);
|
||||
failure_handle = depot_save_stack(&failure, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void init_page_owner(void)
|
||||
{
|
||||
if (page_owner_disabled)
|
||||
return;
|
||||
|
||||
register_dummy_stack();
|
||||
register_failure_stack();
|
||||
static_branch_enable(&page_owner_inited);
|
||||
init_early_allocated_pages();
|
||||
}
|
||||
@@ -61,25 +102,66 @@ void __reset_page_owner(struct page *page, unsigned int order)
|
||||
}
|
||||
}
|
||||
|
||||
void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
|
||||
static inline bool check_recursive_alloc(struct stack_trace *trace,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct page_ext *page_ext = lookup_page_ext(page);
|
||||
int i, count;
|
||||
|
||||
if (!trace->nr_entries)
|
||||
return false;
|
||||
|
||||
for (i = 0, count = 0; i < trace->nr_entries; i++) {
|
||||
if (trace->entries[i] == ip && ++count == 2)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static noinline depot_stack_handle_t save_stack(gfp_t flags)
|
||||
{
|
||||
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
|
||||
struct stack_trace trace = {
|
||||
.nr_entries = 0,
|
||||
.max_entries = ARRAY_SIZE(page_ext->trace_entries),
|
||||
.entries = &page_ext->trace_entries[0],
|
||||
.skip = 3,
|
||||
.entries = entries,
|
||||
.max_entries = PAGE_OWNER_STACK_DEPTH,
|
||||
.skip = 0
|
||||
};
|
||||
depot_stack_handle_t handle;
|
||||
|
||||
save_stack_trace(&trace);
|
||||
if (trace.nr_entries != 0 &&
|
||||
trace.entries[trace.nr_entries-1] == ULONG_MAX)
|
||||
trace.nr_entries--;
|
||||
|
||||
/*
|
||||
* We need to check recursion here because our request to stackdepot
|
||||
* could trigger memory allocation to save new entry. New memory
|
||||
* allocation would reach here and call depot_save_stack() again
|
||||
* if we don't catch it. There is still not enough memory in stackdepot
|
||||
* so it would try to allocate memory again and loop forever.
|
||||
*/
|
||||
if (check_recursive_alloc(&trace, _RET_IP_))
|
||||
return dummy_handle;
|
||||
|
||||
handle = depot_save_stack(&trace, flags);
|
||||
if (!handle)
|
||||
handle = failure_handle;
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
noinline void __set_page_owner(struct page *page, unsigned int order,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page_ext *page_ext = lookup_page_ext(page);
|
||||
|
||||
if (unlikely(!page_ext))
|
||||
return;
|
||||
|
||||
save_stack_trace(&trace);
|
||||
|
||||
page_ext->handle = save_stack(gfp_mask);
|
||||
page_ext->order = order;
|
||||
page_ext->gfp_mask = gfp_mask;
|
||||
page_ext->nr_entries = trace.nr_entries;
|
||||
page_ext->last_migrate_reason = -1;
|
||||
|
||||
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
||||
@@ -94,34 +176,31 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
|
||||
page_ext->last_migrate_reason = reason;
|
||||
}
|
||||
|
||||
gfp_t __get_page_owner_gfp(struct page *page)
|
||||
void __split_page_owner(struct page *page, unsigned int order)
|
||||
{
|
||||
int i;
|
||||
struct page_ext *page_ext = lookup_page_ext(page);
|
||||
if (unlikely(!page_ext))
|
||||
/*
|
||||
* The caller just returns 0 if no valid gfp
|
||||
* So return 0 here too.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
return page_ext->gfp_mask;
|
||||
if (unlikely(!page_ext))
|
||||
return;
|
||||
|
||||
page_ext->order = 0;
|
||||
for (i = 1; i < (1 << order); i++)
|
||||
__copy_page_owner(page, page + i);
|
||||
}
|
||||
|
||||
void __copy_page_owner(struct page *oldpage, struct page *newpage)
|
||||
{
|
||||
struct page_ext *old_ext = lookup_page_ext(oldpage);
|
||||
struct page_ext *new_ext = lookup_page_ext(newpage);
|
||||
int i;
|
||||
|
||||
if (unlikely(!old_ext || !new_ext))
|
||||
return;
|
||||
|
||||
new_ext->order = old_ext->order;
|
||||
new_ext->gfp_mask = old_ext->gfp_mask;
|
||||
new_ext->nr_entries = old_ext->nr_entries;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
|
||||
new_ext->trace_entries[i] = old_ext->trace_entries[i];
|
||||
new_ext->last_migrate_reason = old_ext->last_migrate_reason;
|
||||
new_ext->handle = old_ext->handle;
|
||||
|
||||
/*
|
||||
* We don't clear the bit on the oldpage as it's going to be freed
|
||||
@@ -137,14 +216,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
|
||||
|
||||
static ssize_t
|
||||
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
|
||||
struct page *page, struct page_ext *page_ext)
|
||||
struct page *page, struct page_ext *page_ext,
|
||||
depot_stack_handle_t handle)
|
||||
{
|
||||
int ret;
|
||||
int pageblock_mt, page_mt;
|
||||
char *kbuf;
|
||||
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
|
||||
struct stack_trace trace = {
|
||||
.nr_entries = page_ext->nr_entries,
|
||||
.entries = &page_ext->trace_entries[0],
|
||||
.nr_entries = 0,
|
||||
.entries = entries,
|
||||
.max_entries = PAGE_OWNER_STACK_DEPTH,
|
||||
.skip = 0
|
||||
};
|
||||
|
||||
kbuf = kmalloc(count, GFP_KERNEL);
|
||||
@@ -173,6 +256,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
|
||||
depot_fetch_stack(handle, &trace);
|
||||
ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
@@ -203,10 +287,14 @@ err:
|
||||
void __dump_page_owner(struct page *page)
|
||||
{
|
||||
struct page_ext *page_ext = lookup_page_ext(page);
|
||||
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
|
||||
struct stack_trace trace = {
|
||||
.nr_entries = page_ext->nr_entries,
|
||||
.entries = &page_ext->trace_entries[0],
|
||||
.nr_entries = 0,
|
||||
.entries = entries,
|
||||
.max_entries = PAGE_OWNER_STACK_DEPTH,
|
||||
.skip = 0
|
||||
};
|
||||
depot_stack_handle_t handle;
|
||||
gfp_t gfp_mask;
|
||||
int mt;
|
||||
|
||||
@@ -222,6 +310,13 @@ void __dump_page_owner(struct page *page)
|
||||
return;
|
||||
}
|
||||
|
||||
handle = READ_ONCE(page_ext->handle);
|
||||
if (!handle) {
|
||||
pr_alert("page_owner info is not active (free page?)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
depot_fetch_stack(handle, &trace);
|
||||
pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
|
||||
page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
|
||||
print_stack_trace(&trace, 0);
|
||||
@@ -237,6 +332,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
unsigned long pfn;
|
||||
struct page *page;
|
||||
struct page_ext *page_ext;
|
||||
depot_stack_handle_t handle;
|
||||
|
||||
if (!static_branch_unlikely(&page_owner_inited))
|
||||
return -EINVAL;
|
||||
@@ -285,10 +381,19 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Access to page_ext->handle isn't synchronous so we should
|
||||
* be careful to access it.
|
||||
*/
|
||||
handle = READ_ONCE(page_ext->handle);
|
||||
if (!handle)
|
||||
continue;
|
||||
|
||||
/* Record the next PFN to read in the file offset */
|
||||
*ppos = (pfn - min_low_pfn) + 1;
|
||||
|
||||
return print_page_owner(buf, count, pfn, page, page_ext);
|
||||
return print_page_owner(buf, count, pfn, page,
|
||||
page_ext, handle);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -89,7 +89,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
|
||||
page = lru_to_page(pages);
|
||||
list_del(&page->lru);
|
||||
if (add_to_page_cache_lru(page, mapping, page->index,
|
||||
mapping_gfp_constraint(mapping, GFP_KERNEL))) {
|
||||
readahead_gfp_mask(mapping))) {
|
||||
read_cache_pages_invalidate_page(mapping, page);
|
||||
continue;
|
||||
}
|
||||
@@ -108,7 +108,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
|
||||
EXPORT_SYMBOL(read_cache_pages);
|
||||
|
||||
static int read_pages(struct address_space *mapping, struct file *filp,
|
||||
struct list_head *pages, unsigned nr_pages)
|
||||
struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
unsigned page_idx;
|
||||
@@ -126,10 +126,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
|
||||
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
|
||||
struct page *page = lru_to_page(pages);
|
||||
list_del(&page->lru);
|
||||
if (!add_to_page_cache_lru(page, mapping, page->index,
|
||||
mapping_gfp_constraint(mapping, GFP_KERNEL))) {
|
||||
if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
|
||||
mapping->a_ops->readpage(filp, page);
|
||||
}
|
||||
put_page(page);
|
||||
}
|
||||
ret = 0;
|
||||
@@ -159,6 +157,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
|
||||
int page_idx;
|
||||
int ret = 0;
|
||||
loff_t isize = i_size_read(inode);
|
||||
gfp_t gfp_mask = readahead_gfp_mask(mapping);
|
||||
|
||||
if (isize == 0)
|
||||
goto out;
|
||||
@@ -180,7 +179,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
|
||||
if (page && !radix_tree_exceptional_entry(page))
|
||||
continue;
|
||||
|
||||
page = page_cache_alloc_readahead(mapping);
|
||||
page = __page_cache_alloc(gfp_mask);
|
||||
if (!page)
|
||||
break;
|
||||
page->index = page_offset;
|
||||
@@ -196,7 +195,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
|
||||
* will then handle the error.
|
||||
*/
|
||||
if (ret)
|
||||
read_pages(mapping, filp, &page_pool, ret);
|
||||
read_pages(mapping, filp, &page_pool, ret, gfp_mask);
|
||||
BUG_ON(!list_empty(&page_pool));
|
||||
out:
|
||||
return ret;
|
||||
|
78
mm/rmap.c
78
mm/rmap.c
@@ -1212,10 +1212,8 @@ void do_page_add_anon_rmap(struct page *page,
|
||||
* pte lock(a spinlock) is held, which implies preemption
|
||||
* disabled.
|
||||
*/
|
||||
if (compound) {
|
||||
__inc_zone_page_state(page,
|
||||
NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
}
|
||||
if (compound)
|
||||
__inc_zone_page_state(page, NR_ANON_THPS);
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
|
||||
}
|
||||
if (unlikely(PageKsm(page)))
|
||||
@@ -1253,7 +1251,7 @@ void page_add_new_anon_rmap(struct page *page,
|
||||
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
||||
/* increment count (starts at -1) */
|
||||
atomic_set(compound_mapcount_ptr(page), 0);
|
||||
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
__inc_zone_page_state(page, NR_ANON_THPS);
|
||||
} else {
|
||||
/* Anon THP always mapped first with PMD */
|
||||
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
||||
@@ -1270,18 +1268,42 @@ void page_add_new_anon_rmap(struct page *page,
|
||||
*
|
||||
* The caller needs to hold the pte lock.
|
||||
*/
|
||||
void page_add_file_rmap(struct page *page)
|
||||
void page_add_file_rmap(struct page *page, bool compound)
|
||||
{
|
||||
int i, nr = 1;
|
||||
|
||||
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
|
||||
lock_page_memcg(page);
|
||||
if (atomic_inc_and_test(&page->_mapcount)) {
|
||||
__inc_zone_page_state(page, NR_FILE_MAPPED);
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
if (compound && PageTransHuge(page)) {
|
||||
for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
|
||||
if (atomic_inc_and_test(&page[i]._mapcount))
|
||||
nr++;
|
||||
}
|
||||
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__inc_zone_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
} else {
|
||||
if (PageTransCompound(page)) {
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
SetPageDoubleMap(compound_head(page));
|
||||
if (PageMlocked(page))
|
||||
clear_page_mlock(compound_head(page));
|
||||
}
|
||||
if (!atomic_inc_and_test(&page->_mapcount))
|
||||
goto out;
|
||||
}
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, nr);
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
out:
|
||||
unlock_page_memcg(page);
|
||||
}
|
||||
|
||||
static void page_remove_file_rmap(struct page *page)
|
||||
static void page_remove_file_rmap(struct page *page, bool compound)
|
||||
{
|
||||
int i, nr = 1;
|
||||
|
||||
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
|
||||
lock_page_memcg(page);
|
||||
|
||||
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
|
||||
@@ -1292,15 +1314,26 @@ static void page_remove_file_rmap(struct page *page)
|
||||
}
|
||||
|
||||
/* page still mapped by someone else? */
|
||||
if (!atomic_add_negative(-1, &page->_mapcount))
|
||||
goto out;
|
||||
if (compound && PageTransHuge(page)) {
|
||||
for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
|
||||
if (atomic_add_negative(-1, &page[i]._mapcount))
|
||||
nr++;
|
||||
}
|
||||
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__dec_zone_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
} else {
|
||||
if (!atomic_add_negative(-1, &page->_mapcount))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
|
||||
* these counters are not modified in interrupt context, and
|
||||
* pte lock(a spinlock) is held, which implies preemption disabled.
|
||||
*/
|
||||
__dec_zone_page_state(page, NR_FILE_MAPPED);
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, -nr);
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
|
||||
if (unlikely(PageMlocked(page)))
|
||||
@@ -1323,7 +1356,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
|
||||
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
|
||||
return;
|
||||
|
||||
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
__dec_zone_page_state(page, NR_ANON_THPS);
|
||||
|
||||
if (TestClearPageDoubleMap(page)) {
|
||||
/*
|
||||
@@ -1356,11 +1389,8 @@ static void page_remove_anon_compound_rmap(struct page *page)
|
||||
*/
|
||||
void page_remove_rmap(struct page *page, bool compound)
|
||||
{
|
||||
if (!PageAnon(page)) {
|
||||
VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
|
||||
page_remove_file_rmap(page);
|
||||
return;
|
||||
}
|
||||
if (!PageAnon(page))
|
||||
return page_remove_file_rmap(page, compound);
|
||||
|
||||
if (compound)
|
||||
return page_remove_anon_compound_rmap(page);
|
||||
@@ -1436,8 +1466,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
*/
|
||||
if (!(flags & TTU_IGNORE_MLOCK)) {
|
||||
if (vma->vm_flags & VM_LOCKED) {
|
||||
/* Holding pte lock, we do *not* need mmap_sem here */
|
||||
mlock_vma_page(page);
|
||||
/* PTE-mapped THP are never mlocked */
|
||||
if (!PageTransCompound(page)) {
|
||||
/*
|
||||
* Holding pte lock, we do *not* need
|
||||
* mmap_sem here
|
||||
*/
|
||||
mlock_vma_page(page);
|
||||
}
|
||||
ret = SWAP_MLOCK;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
920
mm/shmem.c
920
mm/shmem.c
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
90
mm/slab.c
90
mm/slab.c
@@ -1236,61 +1236,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
|
||||
size_t count)
|
||||
{
|
||||
size_t i;
|
||||
unsigned int rand;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
list[i] = i;
|
||||
|
||||
/* Fisher-Yates shuffle */
|
||||
for (i = count - 1; i > 0; i--) {
|
||||
rand = prandom_u32_state(state);
|
||||
rand %= (i + 1);
|
||||
swap(list[i], list[rand]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a random sequence per cache */
|
||||
static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
unsigned int seed, count = cachep->num;
|
||||
struct rnd_state state;
|
||||
|
||||
if (count < 2)
|
||||
return 0;
|
||||
|
||||
/* If it fails, we will just use the global lists */
|
||||
cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
|
||||
if (!cachep->random_seq)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Get best entropy at this stage */
|
||||
get_random_bytes_arch(&seed, sizeof(seed));
|
||||
prandom_seed_state(&state, seed);
|
||||
|
||||
freelist_randomize(&state, cachep->random_seq, count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Destroy the per-cache random freelist sequence */
|
||||
static void cache_random_seq_destroy(struct kmem_cache *cachep)
|
||||
{
|
||||
kfree(cachep->random_seq);
|
||||
cachep->random_seq = NULL;
|
||||
}
|
||||
#else
|
||||
static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
|
||||
/*
|
||||
* Initialisation. Called after the page allocator have been initialised and
|
||||
* before smp_init().
|
||||
@@ -2535,7 +2480,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
|
||||
union freelist_init_state {
|
||||
struct {
|
||||
unsigned int pos;
|
||||
freelist_idx_t *list;
|
||||
unsigned int *list;
|
||||
unsigned int count;
|
||||
unsigned int rand;
|
||||
};
|
||||
@@ -2554,7 +2499,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
|
||||
unsigned int rand;
|
||||
|
||||
/* Use best entropy available to define a random shift */
|
||||
get_random_bytes_arch(&rand, sizeof(rand));
|
||||
rand = get_random_int();
|
||||
|
||||
/* Use a random state if the pre-computed list is not available */
|
||||
if (!cachep->random_seq) {
|
||||
@@ -2576,13 +2521,20 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
|
||||
return (state->list[state->pos++] + state->rand) % state->count;
|
||||
}
|
||||
|
||||
/* Swap two freelist entries */
|
||||
static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
|
||||
{
|
||||
swap(((freelist_idx_t *)page->freelist)[a],
|
||||
((freelist_idx_t *)page->freelist)[b]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Shuffle the freelist initialization state based on pre-computed lists.
|
||||
* return true if the list was successfully shuffled, false otherwise.
|
||||
*/
|
||||
static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
|
||||
{
|
||||
unsigned int objfreelist = 0, i, count = cachep->num;
|
||||
unsigned int objfreelist = 0, i, rand, count = cachep->num;
|
||||
union freelist_init_state state;
|
||||
bool precomputed;
|
||||
|
||||
@@ -2607,7 +2559,15 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
|
||||
* Later use a pre-computed list for speed.
|
||||
*/
|
||||
if (!precomputed) {
|
||||
freelist_randomize(&state.rnd_state, page->freelist, count);
|
||||
for (i = 0; i < count; i++)
|
||||
set_free_obj(page, i, i);
|
||||
|
||||
/* Fisher-Yates shuffle */
|
||||
for (i = count - 1; i > 0; i--) {
|
||||
rand = prandom_u32_state(&state.rnd_state);
|
||||
rand %= (i + 1);
|
||||
swap_free_obj(page, i, rand);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < count; i++)
|
||||
set_free_obj(page, i, next_random_slot(&state));
|
||||
@@ -2726,8 +2686,11 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
|
||||
* critical path in kmem_cache_alloc().
|
||||
*/
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
|
||||
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
|
||||
BUG();
|
||||
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
|
||||
flags &= ~GFP_SLAB_BUG_MASK;
|
||||
pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
|
||||
invalid_mask, &invalid_mask, flags, &flags);
|
||||
dump_stack();
|
||||
}
|
||||
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
|
||||
|
||||
@@ -3489,8 +3452,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
|
||||
n->free_objects -= cachep->num;
|
||||
|
||||
page = list_last_entry(&n->slabs_free, struct page, lru);
|
||||
list_del(&page->lru);
|
||||
list_add(&page->lru, list);
|
||||
list_move(&page->lru, list);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3979,7 +3941,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
int shared = 0;
|
||||
int batchcount = 0;
|
||||
|
||||
err = cache_random_seq_create(cachep, gfp);
|
||||
err = cache_random_seq_create(cachep, cachep->num, gfp);
|
||||
if (err)
|
||||
goto end;
|
||||
|
||||
|
30
mm/slab.h
30
mm/slab.h
@@ -42,6 +42,7 @@ struct kmem_cache {
|
||||
#include <linux/kmemcheck.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
/*
|
||||
* State of the slab allocator.
|
||||
@@ -253,8 +254,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
|
||||
if (is_root_cache(s))
|
||||
return 0;
|
||||
|
||||
ret = __memcg_kmem_charge_memcg(page, gfp, order,
|
||||
s->memcg_params.memcg);
|
||||
ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -268,6 +268,9 @@ static __always_inline int memcg_charge_slab(struct page *page,
|
||||
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return;
|
||||
|
||||
memcg_kmem_update_page_stat(page,
|
||||
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
|
||||
MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
|
||||
@@ -390,7 +393,11 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
if (should_failslab(s, flags))
|
||||
return NULL;
|
||||
|
||||
return memcg_kmem_get_cache(s, flags);
|
||||
if (memcg_kmem_enabled() &&
|
||||
((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
|
||||
return memcg_kmem_get_cache(s);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
|
||||
@@ -407,7 +414,9 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
|
||||
s->flags, flags);
|
||||
kasan_slab_alloc(s, object, flags);
|
||||
}
|
||||
memcg_kmem_put_cache(s);
|
||||
|
||||
if (memcg_kmem_enabled())
|
||||
memcg_kmem_put_cache(s);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_SLOB
|
||||
@@ -464,4 +473,17 @@ int memcg_slab_show(struct seq_file *m, void *p);
|
||||
|
||||
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
|
||||
gfp_t gfp);
|
||||
void cache_random_seq_destroy(struct kmem_cache *cachep);
|
||||
#else
|
||||
static inline int cache_random_seq_create(struct kmem_cache *cachep,
|
||||
unsigned int count, gfp_t gfp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
#endif /* MM_SLAB_H */
|
||||
|
@@ -1012,7 +1012,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
struct page *page;
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
page = alloc_kmem_pages(flags, order);
|
||||
page = alloc_pages(flags, order);
|
||||
ret = page ? page_address(page) : NULL;
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
kasan_kmalloc_large(ret, size, flags);
|
||||
@@ -1030,6 +1030,53 @@ void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
||||
EXPORT_SYMBOL(kmalloc_order_trace);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
/* Randomize a generic freelist */
|
||||
static void freelist_randomize(struct rnd_state *state, unsigned int *list,
|
||||
size_t count)
|
||||
{
|
||||
size_t i;
|
||||
unsigned int rand;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
list[i] = i;
|
||||
|
||||
/* Fisher-Yates shuffle */
|
||||
for (i = count - 1; i > 0; i--) {
|
||||
rand = prandom_u32_state(state);
|
||||
rand %= (i + 1);
|
||||
swap(list[i], list[rand]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a random sequence per cache */
|
||||
int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct rnd_state state;
|
||||
|
||||
if (count < 2 || cachep->random_seq)
|
||||
return 0;
|
||||
|
||||
cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
|
||||
if (!cachep->random_seq)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Get best entropy at this stage of boot */
|
||||
prandom_seed_state(&state, get_random_long());
|
||||
|
||||
freelist_randomize(&state, cachep->random_seq, count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Destroy the per-cache random freelist sequence */
|
||||
void cache_random_seq_destroy(struct kmem_cache *cachep)
|
||||
{
|
||||
kfree(cachep->random_seq);
|
||||
cachep->random_seq = NULL;
|
||||
}
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
#ifdef CONFIG_SLABINFO
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
|
145
mm/slub.c
145
mm/slub.c
@@ -1405,6 +1405,109 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
|
||||
return page;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
/* Pre-initialize the random sequence cache */
|
||||
static int init_cache_random_seq(struct kmem_cache *s)
|
||||
{
|
||||
int err;
|
||||
unsigned long i, count = oo_objects(s->oo);
|
||||
|
||||
err = cache_random_seq_create(s, count, GFP_KERNEL);
|
||||
if (err) {
|
||||
pr_err("SLUB: Unable to initialize free list for %s\n",
|
||||
s->name);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Transform to an offset on the set of pages */
|
||||
if (s->random_seq) {
|
||||
for (i = 0; i < count; i++)
|
||||
s->random_seq[i] *= s->size;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize each random sequence freelist per cache */
|
||||
static void __init init_freelist_randomization(void)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
list_for_each_entry(s, &slab_caches, list)
|
||||
init_cache_random_seq(s);
|
||||
|
||||
mutex_unlock(&slab_mutex);
|
||||
}
|
||||
|
||||
/* Get the next entry on the pre-computed freelist randomized */
|
||||
static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
|
||||
unsigned long *pos, void *start,
|
||||
unsigned long page_limit,
|
||||
unsigned long freelist_count)
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
/*
|
||||
* If the target page allocation failed, the number of objects on the
|
||||
* page might be smaller than the usual size defined by the cache.
|
||||
*/
|
||||
do {
|
||||
idx = s->random_seq[*pos];
|
||||
*pos += 1;
|
||||
if (*pos >= freelist_count)
|
||||
*pos = 0;
|
||||
} while (unlikely(idx >= page_limit));
|
||||
|
||||
return (char *)start + idx;
|
||||
}
|
||||
|
||||
/* Shuffle the single linked freelist based on a random pre-computed sequence */
|
||||
static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
|
||||
{
|
||||
void *start;
|
||||
void *cur;
|
||||
void *next;
|
||||
unsigned long idx, pos, page_limit, freelist_count;
|
||||
|
||||
if (page->objects < 2 || !s->random_seq)
|
||||
return false;
|
||||
|
||||
freelist_count = oo_objects(s->oo);
|
||||
pos = get_random_int() % freelist_count;
|
||||
|
||||
page_limit = page->objects * s->size;
|
||||
start = fixup_red_left(s, page_address(page));
|
||||
|
||||
/* First entry is used as the base of the freelist */
|
||||
cur = next_freelist_entry(s, page, &pos, start, page_limit,
|
||||
freelist_count);
|
||||
page->freelist = cur;
|
||||
|
||||
for (idx = 1; idx < page->objects; idx++) {
|
||||
setup_object(s, page, cur);
|
||||
next = next_freelist_entry(s, page, &pos, start, page_limit,
|
||||
freelist_count);
|
||||
set_freepointer(s, cur, next);
|
||||
cur = next;
|
||||
}
|
||||
setup_object(s, page, cur);
|
||||
set_freepointer(s, cur, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline int init_cache_random_seq(struct kmem_cache *s)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void init_freelist_randomization(void) { }
|
||||
static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
{
|
||||
struct page *page;
|
||||
@@ -1412,6 +1515,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
gfp_t alloc_gfp;
|
||||
void *start, *p;
|
||||
int idx, order;
|
||||
bool shuffle;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
@@ -1473,15 +1577,19 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
|
||||
kasan_poison_slab(page);
|
||||
|
||||
for_each_object_idx(p, idx, s, start, page->objects) {
|
||||
setup_object(s, page, p);
|
||||
if (likely(idx < page->objects))
|
||||
set_freepointer(s, p, p + s->size);
|
||||
else
|
||||
set_freepointer(s, p, NULL);
|
||||
shuffle = shuffle_freelist(s, page);
|
||||
|
||||
if (!shuffle) {
|
||||
for_each_object_idx(p, idx, s, start, page->objects) {
|
||||
setup_object(s, page, p);
|
||||
if (likely(idx < page->objects))
|
||||
set_freepointer(s, p, p + s->size);
|
||||
else
|
||||
set_freepointer(s, p, NULL);
|
||||
}
|
||||
page->freelist = fixup_red_left(s, start);
|
||||
}
|
||||
|
||||
page->freelist = fixup_red_left(s, start);
|
||||
page->inuse = page->objects;
|
||||
page->frozen = 1;
|
||||
|
||||
@@ -1504,8 +1612,10 @@ out:
|
||||
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
{
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
|
||||
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
|
||||
BUG();
|
||||
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
|
||||
flags &= ~GFP_SLAB_BUG_MASK;
|
||||
pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
|
||||
invalid_mask, &invalid_mask, flags, &flags);
|
||||
}
|
||||
|
||||
return allocate_slab(s,
|
||||
@@ -2867,7 +2977,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
|
||||
if (unlikely(!PageSlab(page))) {
|
||||
BUG_ON(!PageCompound(page));
|
||||
kfree_hook(object);
|
||||
__free_kmem_pages(page, compound_order(page));
|
||||
__free_pages(page, compound_order(page));
|
||||
p[size] = NULL; /* mark object processed */
|
||||
return size;
|
||||
}
|
||||
@@ -3207,6 +3317,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
|
||||
|
||||
void __kmem_cache_release(struct kmem_cache *s)
|
||||
{
|
||||
cache_random_seq_destroy(s);
|
||||
free_percpu(s->cpu_slab);
|
||||
free_kmem_cache_nodes(s);
|
||||
}
|
||||
@@ -3431,6 +3542,13 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
|
||||
#ifdef CONFIG_NUMA
|
||||
s->remote_node_defrag_ratio = 1000;
|
||||
#endif
|
||||
|
||||
/* Initialize the pre-computed randomized freelist if slab is up */
|
||||
if (slab_state >= UP) {
|
||||
if (init_cache_random_seq(s))
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!init_kmem_cache_nodes(s))
|
||||
goto error;
|
||||
|
||||
@@ -3575,7 +3693,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
void *ptr = NULL;
|
||||
|
||||
flags |= __GFP_COMP | __GFP_NOTRACK;
|
||||
page = alloc_kmem_pages_node(node, flags, get_order(size));
|
||||
page = alloc_pages_node(node, flags, get_order(size));
|
||||
if (page)
|
||||
ptr = page_address(page);
|
||||
|
||||
@@ -3656,7 +3774,7 @@ void kfree(const void *x)
|
||||
if (unlikely(!PageSlab(page))) {
|
||||
BUG_ON(!PageCompound(page));
|
||||
kfree_hook(x);
|
||||
__free_kmem_pages(page, compound_order(page));
|
||||
__free_pages(page, compound_order(page));
|
||||
return;
|
||||
}
|
||||
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
|
||||
@@ -3947,6 +4065,9 @@ void __init kmem_cache_init(void)
|
||||
setup_kmalloc_cache_index_table();
|
||||
create_kmalloc_caches(0);
|
||||
|
||||
/* Setup random freelists for each cache */
|
||||
init_freelist_randomization();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
register_cpu_notifier(&slab_notifier);
|
||||
#endif
|
||||
|
@@ -292,6 +292,7 @@ static bool need_activate_page_drain(int cpu)
|
||||
|
||||
void activate_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
|
||||
|
||||
@@ -316,6 +317,7 @@ void activate_page(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
|
||||
page = compound_head(page);
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
|
@@ -2493,7 +2493,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
goto bad_swap;
|
||||
}
|
||||
/* frontswap enabled? set up bit-per-page map for frontswap */
|
||||
if (frontswap_enabled)
|
||||
if (IS_ENABLED(CONFIG_FRONTSWAP))
|
||||
frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
|
||||
|
||||
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
|
||||
|
@@ -155,10 +155,14 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
|
||||
|
||||
int truncate_inode_page(struct address_space *mapping, struct page *page)
|
||||
{
|
||||
loff_t holelen;
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
||||
holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
|
||||
if (page_mapped(page)) {
|
||||
unmap_mapping_range(mapping,
|
||||
(loff_t)page->index << PAGE_SHIFT,
|
||||
PAGE_SIZE, 0);
|
||||
holelen, 0);
|
||||
}
|
||||
return truncate_complete_page(mapping, page);
|
||||
}
|
||||
@@ -279,7 +283,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
||||
|
||||
if (!trylock_page(page))
|
||||
continue;
|
||||
WARN_ON(page->index != index);
|
||||
WARN_ON(page_to_pgoff(page) != index);
|
||||
if (PageWriteback(page)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
@@ -367,7 +371,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
WARN_ON(page->index != index);
|
||||
WARN_ON(page_to_pgoff(page) != index);
|
||||
wait_on_page_writeback(page);
|
||||
truncate_inode_page(mapping, page);
|
||||
unlock_page(page);
|
||||
@@ -487,7 +491,21 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
||||
|
||||
if (!trylock_page(page))
|
||||
continue;
|
||||
WARN_ON(page->index != index);
|
||||
|
||||
WARN_ON(page_to_pgoff(page) != index);
|
||||
|
||||
/* Middle of THP: skip */
|
||||
if (PageTransTail(page)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
} else if (PageTransHuge(page)) {
|
||||
index += HPAGE_PMD_NR - 1;
|
||||
i += HPAGE_PMD_NR - 1;
|
||||
/* 'end' is in the middle of THP */
|
||||
if (index == round_down(end, HPAGE_PMD_NR))
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = invalidate_inode_page(page);
|
||||
unlock_page(page);
|
||||
/*
|
||||
@@ -594,7 +612,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
WARN_ON(page->index != index);
|
||||
WARN_ON(page_to_pgoff(page) != index);
|
||||
if (page->mapping != mapping) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
|
12
mm/util.c
12
mm/util.c
@@ -399,10 +399,12 @@ struct address_space *page_mapping(struct page *page)
|
||||
}
|
||||
|
||||
mapping = page->mapping;
|
||||
if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
|
||||
if ((unsigned long)mapping & PAGE_MAPPING_ANON)
|
||||
return NULL;
|
||||
return mapping;
|
||||
|
||||
return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
|
||||
}
|
||||
EXPORT_SYMBOL(page_mapping);
|
||||
|
||||
/* Slow path of page_mapcount() for compound pages */
|
||||
int __page_mapcount(struct page *page)
|
||||
@@ -410,6 +412,12 @@ int __page_mapcount(struct page *page)
|
||||
int ret;
|
||||
|
||||
ret = atomic_read(&page->_mapcount) + 1;
|
||||
/*
|
||||
* For file THP page->_mapcount contains total number of mapping
|
||||
* of the page: no need to look into compound_mapcount.
|
||||
*/
|
||||
if (!PageAnon(page) && !PageHuge(page))
|
||||
return ret;
|
||||
page = compound_head(page);
|
||||
ret += atomic_read(compound_mapcount_ptr(page)) + 1;
|
||||
if (PageDoubleMap(page))
|
||||
|
@@ -1501,7 +1501,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
|
||||
struct page *page = area->pages[i];
|
||||
|
||||
BUG_ON(!page);
|
||||
__free_kmem_pages(page, 0);
|
||||
__free_pages(page, 0);
|
||||
}
|
||||
|
||||
kvfree(area->pages);
|
||||
@@ -1629,9 +1629,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
struct page *page;
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
page = alloc_kmem_pages(alloc_mask, order);
|
||||
page = alloc_pages(alloc_mask, order);
|
||||
else
|
||||
page = alloc_kmem_pages_node(node, alloc_mask, order);
|
||||
page = alloc_pages_node(node, alloc_mask, order);
|
||||
|
||||
if (unlikely(!page)) {
|
||||
/* Successfully allocated i pages, free them in __vunmap() */
|
||||
|
@@ -1055,8 +1055,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
|
||||
/* Adding to swap updated mapping */
|
||||
mapping = page_mapping(page);
|
||||
} else if (unlikely(PageTransHuge(page))) {
|
||||
/* Split file THP */
|
||||
if (split_huge_page_to_list(page, page_list))
|
||||
goto keep_locked;
|
||||
}
|
||||
|
||||
VM_BUG_ON_PAGE(PageTransHuge(page), page);
|
||||
|
||||
/*
|
||||
* The page is mapped into the page tables of one or more
|
||||
* processes. Try to unmap it here.
|
||||
@@ -1254,7 +1260,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
|
||||
|
||||
list_for_each_entry_safe(page, next, page_list, lru) {
|
||||
if (page_is_file_cache(page) && !PageDirty(page) &&
|
||||
!isolated_balloon_page(page)) {
|
||||
!__PageMovable(page)) {
|
||||
ClearPageActive(page);
|
||||
list_move(&page->lru, &clean_pages);
|
||||
}
|
||||
|
@@ -718,7 +718,9 @@ const char * const vmstat_text[] = {
|
||||
"nr_dirtied",
|
||||
"nr_written",
|
||||
"nr_pages_scanned",
|
||||
|
||||
#if IS_ENABLED(CONFIG_ZSMALLOC)
|
||||
"nr_zspages",
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
"numa_hit",
|
||||
"numa_miss",
|
||||
@@ -731,6 +733,8 @@ const char * const vmstat_text[] = {
|
||||
"workingset_activate",
|
||||
"workingset_nodereclaim",
|
||||
"nr_anon_transparent_hugepages",
|
||||
"nr_shmem_hugepages",
|
||||
"nr_shmem_pmdmapped",
|
||||
"nr_free_cma",
|
||||
|
||||
/* enum writeback_stat_item counters */
|
||||
@@ -815,6 +819,8 @@ const char * const vmstat_text[] = {
|
||||
"thp_fault_fallback",
|
||||
"thp_collapse_alloc",
|
||||
"thp_collapse_alloc_failed",
|
||||
"thp_file_alloc",
|
||||
"thp_file_mapped",
|
||||
"thp_split_page",
|
||||
"thp_split_page_failed",
|
||||
"thp_deferred_split_page",
|
||||
|
1374
mm/zsmalloc.c
1374
mm/zsmalloc.c
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
Odkázat v novém úkolu
Zablokovat Uživatele