Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton:

 - a few hot fixes

 - ocfs2 updates

 - almost all of -mm (slab-generic, slab, slub, kmemleak, kasan,
   cleanups, debug, pagecache, memcg, gup, pagemap, memory-hotplug,
   sparsemem, vmalloc, initialization, z3fold, compaction, mempolicy,
   oom-kill, hugetlb, migration, thp, mmap, madvise, shmem, zswap,
   zsmalloc)

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (132 commits)
  mm/zsmalloc.c: fix a -Wunused-function warning
  zswap: do not map same object twice
  zswap: use movable memory if zpool support allocate movable memory
  zpool: add malloc_support_movable to zpool_driver
  shmem: fix obsolete comment in shmem_getpage_gfp()
  mm/madvise: reduce code duplication in error handling paths
  mm: mmap: increase sockets maximum memory size pgoff for 32bits
  mm/mmap.c: refine find_vma_prev() with rb_last()
  riscv: make mmap allocation top-down by default
  mips: use generic mmap top-down layout and brk randomization
  mips: replace arch specific way to determine 32bit task with generic version
  mips: adjust brk randomization offset to fit generic version
  mips: use STACK_TOP when computing mmap base address
  mips: properly account for stack randomization and stack guard gap
  arm: use generic mmap top-down layout and brk randomization
  arm: use STACK_TOP when computing mmap base address
  arm: properly account for stack randomization and stack guard gap
  arm64, mm: make randomization selected by generic topdown mmap layout
  arm64, mm: move generic mmap layout functions to mm
  arm64: consider stack randomization for mmap base only when necessary
  ...
This commit is contained in:
Linus Torvalds
2019-09-24 16:10:23 -07:00
204 changed files with 2275 additions and 2446 deletions

View File

@@ -129,11 +129,8 @@ static inline bool compaction_failed(enum compact_result result)
return false;
}
/*
* Compaction has backed off for some reason. It might be throttling or
* lock contention. Retrying is still worthwhile.
*/
static inline bool compaction_withdrawn(enum compact_result result)
/* Compaction needs reclaim to be performed first, so it can continue. */
static inline bool compaction_needs_reclaim(enum compact_result result)
{
/*
* Compaction backed off due to watermark checks for order-0
@@ -142,6 +139,16 @@ static inline bool compaction_withdrawn(enum compact_result result)
if (result == COMPACT_SKIPPED)
return true;
return false;
}
/*
* Compaction has backed off for some reason after doing some work or none
* at all. It might be throttling or lock contention. Retrying might be still
* worthwhile, but with a higher priority if allowed.
*/
static inline bool compaction_withdrawn(enum compact_result result)
{
/*
* If compaction is deferred for high-order allocations, it is
* because sync compaction recently failed. If this is the case
@@ -207,6 +214,11 @@ static inline bool compaction_failed(enum compact_result result)
return false;
}
static inline bool compaction_needs_reclaim(enum compact_result result)
{
return false;
}
static inline bool compaction_withdrawn(enum compact_result result)
{
return true;

View File

@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
* @i_pages: Cached pages.
* @gfp_mask: Memory allocation flags to use for allocating pages.
* @i_mmap_writable: Number of VM_SHARED mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
* @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
struct xarray i_pages;
gfp_t gfp_mask;
atomic_t i_mmap_writable;
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
/* number of thp, only for non-shmem files */
atomic_t nr_thps;
#endif
struct rb_root_cached i_mmap;
struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;
@@ -2798,6 +2803,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
return errseq_sample(&mapping->wb_err);
}
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
return atomic_read(&mapping->nr_thps);
#else
return 0;
#endif
}
static inline void filemap_nr_thps_inc(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
atomic_inc(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
static inline void filemap_nr_thps_dec(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
atomic_dec(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);

View File

@@ -267,6 +267,15 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
static inline struct list_head *page_deferred_list(struct page *page)
{
/*
* Global or memcg deferred list in the second tail pages is
* occupied by compound_head.
*/
return &page[2].deferred_list;
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })

View File

@@ -454,7 +454,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
static inline struct hstate *page_hstate(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
return size_to_hstate(PAGE_SIZE << compound_order(page));
return size_to_hstate(page_size(page));
}
static inline unsigned hstate_index_to_shift(unsigned index)

View File

@@ -1410,8 +1410,6 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *);
extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *inode, loff_t start_byte,
loff_t length);

View File

@@ -15,6 +15,14 @@ extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags);
#ifdef CONFIG_SHMEM
extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
#else
static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
unsigned long addr)
{
}
#endif
#define khugepaged_enabled() \
(transparent_hugepage_flags & \
@@ -73,6 +81,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
{
return 0;
}
static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
unsigned long addr)
{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_KHUGEPAGED_H */

View File

@@ -128,9 +128,8 @@ struct mem_cgroup_per_node {
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
#ifdef CONFIG_MEMCG_KMEM
struct memcg_shrinker_map __rcu *shrinker_map;
#endif
struct rb_node tree_node; /* RB tree node */
unsigned long usage_in_excess;/* Set to the value by which */
/* the soft limit is exceeded*/
@@ -331,6 +330,10 @@ struct mem_cgroup {
struct list_head event_list;
spinlock_t event_list_lock;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split deferred_split_queue;
#endif
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
@@ -1311,6 +1314,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
} while ((memcg = parent_mem_cgroup(memcg)));
return false;
}
extern int memcg_expand_shrinker_maps(int new_id);
extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id);
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1319,6 +1327,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
return false;
}
static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id)
{
}
#endif
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
@@ -1390,10 +1403,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
return memcg ? memcg->kmemcg_id : -1;
}
extern int memcg_expand_shrinker_maps(int new_id);
extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id);
#else
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1435,8 +1444,6 @@ static inline void memcg_put_cache_ids(void)
{
}
static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id) { }
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */

View File

@@ -25,7 +25,6 @@
struct memory_block {
unsigned long start_section_nr;
unsigned long end_section_nr;
unsigned long state; /* serialized by the dev->lock */
int section_count; /* serialized by mem_sysfs_mutex */
int online_type; /* for passing data to online routine */
@@ -80,9 +79,9 @@ struct mem_section;
#define IPC_CALLBACK_PRI 10
#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
static inline int memory_dev_init(void)
static inline void memory_dev_init(void)
{
return 0;
return;
}
static inline int register_memory_notifier(struct notifier_block *nb)
{
@@ -113,7 +112,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern int memory_dev_init(void);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
extern int memory_isolate_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block(struct mem_section *);

View File

@@ -805,6 +805,24 @@ static inline void set_compound_order(struct page *page, unsigned int order)
page[1].compound_order = order;
}
/* Returns the number of pages in this potentially compound page. */
static inline unsigned long compound_nr(struct page *page)
{
return 1UL << compound_order(page);
}
/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
return PAGE_SIZE << compound_order(page);
}
/* Returns the number of bits needed for the number of bytes in a page */
static inline unsigned int page_shift(struct page *page)
{
return PAGE_SHIFT + compound_order(page);
}
void free_compound_page(struct page *page);
#ifdef CONFIG_MMU
@@ -1057,8 +1075,9 @@ static inline void put_user_page(struct page *page)
put_page(page);
}
void put_user_pages_dirty(struct page **pages, unsigned long npages);
void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty);
void put_user_pages(struct page **pages, unsigned long npages);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -1405,7 +1424,11 @@ extern void pagefault_out_of_memory(void);
extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
static inline bool can_do_mlock(void) { return false; }
#endif
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
@@ -2305,6 +2328,8 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
unsigned long randomize_stack_top(unsigned long stack_top);
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2568,6 +2593,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
/*
* NOTE on FOLL_LONGTERM:
@@ -2845,5 +2871,12 @@ void __init setup_nr_node_ids(void);
static inline void setup_nr_node_ids(void) {}
#endif
extern int memcmp_pages(struct page *page1, struct page *page2);
static inline int pages_identical(struct page *page1, struct page *page2)
{
return !memcmp_pages(page1, page2);
}
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */

View File

@@ -138,6 +138,7 @@ struct page {
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2;
/* For both global and memcg */
struct list_head deferred_list;
};
struct { /* Page table pages */

View File

@@ -235,6 +235,8 @@ enum node_stat_item {
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
NR_FILE_THPS,
NR_FILE_PMDMAPPED,
NR_ANON_THPS,
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_VMSCAN_WRITE,
@@ -677,6 +679,14 @@ struct zonelist {
extern struct page *mem_map;
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split {
spinlock_t split_queue_lock;
struct list_head split_queue;
unsigned long split_queue_len;
};
#endif
/*
* On NUMA machines, each NUMA node would have a pg_data_t to describe
* it's memory layout. On UMA machines there is a single pglist_data which
@@ -756,9 +766,7 @@ typedef struct pglist_data {
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
spinlock_t split_queue_lock;
struct list_head split_queue;
unsigned long split_queue_len;
struct deferred_split deferred_split_queue;
#endif
/* Fields commonly accessed by the page reclaim scanner */

View File

@@ -18,6 +18,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
PAGE_EXT_OWNER_ACTIVE,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,

View File

@@ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
static inline struct page *find_subpage(struct page *page, pgoff_t offset)
{
if (PageHuge(page))
return page;
VM_BUG_ON_PAGE(PageTail(page), page);
return page + (offset & (compound_nr(page) - 1));
}
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,

View File

@@ -1,94 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_QUICKLIST_H
#define LINUX_QUICKLIST_H
/*
* Fast allocations and disposal of pages. Pages must be in the condition
* as needed after allocation when they are freed. Per cpu lists of pages
* are kept that only contain node local pages.
*
* (C) 2007, SGI. Christoph Lameter <cl@linux.com>
*/
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/percpu.h>
#ifdef CONFIG_QUICKLIST
struct quicklist {
void *page;
int nr_pages;
};
DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
/*
* The two key functions quicklist_alloc and quicklist_free are inline so
* that they may be custom compiled for the platform.
* Specifying a NULL ctor can remove constructor support. Specifying
* a constant quicklist allows the determination of the exact address
* in the per cpu area.
*
* The fast patch in quicklist_alloc touched only a per cpu cacheline and
* the first cacheline of the page itself. There is minmal overhead involved.
*/
static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
{
struct quicklist *q;
void **p = NULL;
q =&get_cpu_var(quicklist)[nr];
p = q->page;
if (likely(p)) {
q->page = p[0];
p[0] = NULL;
q->nr_pages--;
}
put_cpu_var(quicklist);
if (likely(p))
return p;
p = (void *)__get_free_page(flags | __GFP_ZERO);
if (ctor && p)
ctor(p);
return p;
}
static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
struct page *page)
{
struct quicklist *q;
q = &get_cpu_var(quicklist)[nr];
*(void **)p = q->page;
q->page = p;
q->nr_pages++;
put_cpu_var(quicklist);
}
static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
{
__quicklist_free(nr, dtor, pp, virt_to_page(pp));
}
static inline void quicklist_free_page(int nr, void (*dtor)(void *),
struct page *page)
{
__quicklist_free(nr, dtor, page_address(page), page);
}
void quicklist_trim(int nr, void (*dtor)(void *),
unsigned long min_pages, unsigned long max_free);
unsigned long quicklist_total_size(void);
#else
static inline unsigned long quicklist_total_size(void)
{
return 0;
}
#endif
#endif /* LINUX_QUICKLIST_H */

View File

@@ -69,7 +69,7 @@ struct shrinker {
/* These are for internal use */
struct list_head list;
#ifdef CONFIG_MEMCG_KMEM
#ifdef CONFIG_MEMCG
/* ID in shrinker_idr */
int id;
#endif
@@ -81,6 +81,11 @@ struct shrinker {
/* Flags */
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)
/*
* It just makes sense when the shrinker is also MEMCG_AWARE for now,
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
#define SHRINKER_NONSLAB (1 << 2)
extern int prealloc_shrinker(struct shrinker *shrinker);
extern void register_shrinker_prepared(struct shrinker *shrinker);

View File

@@ -595,68 +595,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
return __kmalloc_node(size, flags, node);
}
struct memcg_cache_array {
struct rcu_head rcu;
struct kmem_cache *entries[0];
};
/*
* This is the main placeholder for memcg-related information in kmem caches.
* Both the root cache and the child caches will have it. For the root cache,
* this will hold a dynamically allocated array large enough to hold
* information about the currently limited memcgs in the system. To allow the
* array to be accessed without taking any locks, on relocation we free the old
* version only after a grace period.
*
* Root and child caches hold different metadata.
*
* @root_cache: Common to root and child caches. NULL for root, pointer to
* the root cache for children.
*
* The following fields are specific to root caches.
*
* @memcg_caches: kmemcg ID indexed table of child caches. This table is
* used to index child cachces during allocation and cleared
* early during shutdown.
*
* @root_caches_node: List node for slab_root_caches list.
*
* @children: List of all child caches. While the child caches are also
* reachable through @memcg_caches, a child cache remains on
* this list until it is actually destroyed.
*
* The following fields are specific to child caches.
*
* @memcg: Pointer to the memcg this cache belongs to.
*
* @children_node: List node for @root_cache->children list.
*
* @kmem_caches_node: List node for @memcg->kmem_caches list.
*/
struct memcg_cache_params {
struct kmem_cache *root_cache;
union {
struct {
struct memcg_cache_array __rcu *memcg_caches;
struct list_head __root_caches_node;
struct list_head children;
bool dying;
};
struct {
struct mem_cgroup *memcg;
struct list_head children_node;
struct list_head kmem_caches_node;
struct percpu_ref refcnt;
void (*work_fn)(struct kmem_cache *);
union {
struct rcu_head rcu_head;
struct work_struct work;
};
};
};
};
int memcg_update_all_caches(int num_memcgs);
/**

View File

@@ -53,15 +53,21 @@ struct vmap_area {
unsigned long va_start;
unsigned long va_end;
/*
* Largest available free size in subtree.
*/
unsigned long subtree_max_size;
unsigned long flags;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
struct llist_node purge_list; /* "lazy purge" list */
struct vm_struct *vm;
/*
* The following three variables can be packed, because
* a vmap_area object is always one of the three states:
* 1) in "free" tree (root is vmap_area_root)
* 2) in "busy" tree (root is free_vmap_area_root)
* 3) in purge list (head is vmap_purge_list)
*/
union {
unsigned long subtree_max_size; /* in "free" tree */
struct vm_struct *vm; /* in "busy" tree */
struct llist_node purge_list; /* in purge list */
};
};
/*

View File

@@ -46,6 +46,8 @@ const char *zpool_get_type(struct zpool *pool);
void zpool_destroy_pool(struct zpool *pool);
bool zpool_malloc_support_movable(struct zpool *pool);
int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
unsigned long *handle);
@@ -90,6 +92,7 @@ struct zpool_driver {
struct zpool *zpool);
void (*destroy)(void *pool);
bool malloc_support_movable;
int (*malloc)(void *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void (*free)(void *pool, unsigned long handle);