Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "VM: - z3fold fixes and enhancements by Henry Burns and Vitaly Wool - more accurate reclaimed slab caches calculations by Yafang Shao - fix MAP_UNINITIALIZED UAPI symbol to not depend on config, by Christoph Hellwig - !CONFIG_MMU fixes by Christoph Hellwig - new novmcoredd parameter to omit device dumps from vmcore, by Kairui Song - new test_meminit module for testing heap and pagealloc initialization, by Alexander Potapenko - ioremap improvements for huge mappings, by Anshuman Khandual - generalize kprobe page fault handling, by Anshuman Khandual - device-dax hotplug fixes and improvements, by Pavel Tatashin - enable synchronous DAX fault on powerpc, by Aneesh Kumar K.V - add pte_devmap() support for arm64, by Robin Murphy - unify locked_vm accounting with a helper, by Daniel Jordan - several misc fixes core/lib: - new typeof_member() macro including some users, by Alexey Dobriyan - make BIT() and GENMASK() available in asm, by Masahiro Yamada - changed LIST_POISON2 on x86_64 to 0xdead000000000122 for better code generation, by Alexey Dobriyan - rbtree code size optimizations, by Michel Lespinasse - convert struct pid count to refcount_t, by Joel Fernandes get_maintainer.pl: - add --no-moderated switch to skip moderated ML's, by Joe Perches misc: - ptrace PTRACE_GET_SYSCALL_INFO interface - coda updates - gdb scripts, various" [ Using merge message suggestion from Vlastimil Babka, with some editing - Linus ] * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (100 commits) fs/select.c: use struct_size() in kmalloc() mm: add account_locked_vm utility function arm64: mm: implement pte_devmap support mm: introduce ARCH_HAS_PTE_DEVMAP mm: clean up is_device_*_page() definitions mm/mmap: move common defines to mman-common.h mm: move MAP_SYNC to asm-generic/mman-common.h device-dax: "Hotremove" persistent memory that is used like normal RAM mm/hotplug: make remove_memory() interface usable device-dax: fix memory and resource leak if hotplug fails include/linux/lz4.h: fix spelling and copy-paste errors in documentation ipc/mqueue.c: only perform resource calculation if user valid include/asm-generic/bug.h: fix "cut here" for WARN_ON for __WARN_TAINT architectures scripts/gdb: add helpers to find and list devices scripts/gdb: add lx-genpd-summary command drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl kernel/pid.c: convert struct pid count to refcount_t drivers/rapidio/devices/rio_mport_cdev.c: NUL terminate some strings select: shift restore_saved_sigmask_unless() into poll_select_copy_remaining() select: change do_poll() to return -ERESTARTNOHAND rather than -EINTR ...
This commit is contained in:
@@ -649,8 +649,7 @@ config IDLE_PAGE_TRACKING
|
||||
See Documentation/admin-guide/mm/idle_page_tracking.rst for
|
||||
more details.
|
||||
|
||||
# arch_add_memory() comprehends device memory
|
||||
config ARCH_HAS_ZONE_DEVICE
|
||||
config ARCH_HAS_PTE_DEVMAP
|
||||
bool
|
||||
|
||||
config ZONE_DEVICE
|
||||
@@ -658,7 +657,7 @@ config ZONE_DEVICE
|
||||
depends on MEMORY_HOTPLUG
|
||||
depends on MEMORY_HOTREMOVE
|
||||
depends on SPARSEMEM_VMEMMAP
|
||||
depends on ARCH_HAS_ZONE_DEVICE
|
||||
depends on ARCH_HAS_PTE_DEVMAP
|
||||
select XARRAY_MULTI
|
||||
|
||||
help
|
||||
|
15
mm/cma.c
15
mm/cma.c
@@ -278,6 +278,12 @@ int __init cma_declare_contiguous(phys_addr_t base,
|
||||
*/
|
||||
alignment = max(alignment, (phys_addr_t)PAGE_SIZE <<
|
||||
max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
|
||||
if (fixed && base & (alignment - 1)) {
|
||||
ret = -EINVAL;
|
||||
pr_err("Region at %pa must be aligned to %pa bytes\n",
|
||||
&base, &alignment);
|
||||
goto err;
|
||||
}
|
||||
base = ALIGN(base, alignment);
|
||||
size = ALIGN(size, alignment);
|
||||
limit &= ~(alignment - 1);
|
||||
@@ -308,6 +314,13 @@ int __init cma_declare_contiguous(phys_addr_t base,
|
||||
if (limit == 0 || limit > memblock_end)
|
||||
limit = memblock_end;
|
||||
|
||||
if (base + size > limit) {
|
||||
ret = -EINVAL;
|
||||
pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
|
||||
&size, &base, &limit);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Reserve memory */
|
||||
if (fixed) {
|
||||
if (memblock_is_region_reserved(base, size) ||
|
||||
@@ -494,7 +507,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
|
||||
* @pages: Allocated pages.
|
||||
* @count: Number of allocated pages.
|
||||
*
|
||||
* This function releases memory allocated by alloc_cma().
|
||||
* This function releases memory allocated by cma_alloc().
|
||||
* It returns false when provided pages do not belong to contiguous area and
|
||||
* true otherwise.
|
||||
*/
|
||||
|
2
mm/gup.c
2
mm/gup.c
@@ -1895,7 +1895,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
}
|
||||
#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
|
||||
|
||||
#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
static int __gup_device_huge(unsigned long pfn, unsigned long addr,
|
||||
unsigned long end, struct page **pages, int *nr)
|
||||
{
|
||||
|
@@ -695,12 +695,15 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
__this_cpu_add(memcg->vmstats_local->stat[idx], val);
|
||||
|
||||
x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
|
||||
if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
|
||||
struct mem_cgroup *mi;
|
||||
|
||||
/*
|
||||
* Batch local counters to keep them in sync with
|
||||
* the hierarchical ones.
|
||||
*/
|
||||
__this_cpu_add(memcg->vmstats_local->stat[idx], x);
|
||||
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
|
||||
atomic_long_add(x, &mi->vmstats[idx]);
|
||||
x = 0;
|
||||
@@ -749,13 +752,15 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
|
||||
/* Update memcg */
|
||||
__mod_memcg_state(memcg, idx, val);
|
||||
|
||||
/* Update lruvec */
|
||||
__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
|
||||
|
||||
x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
|
||||
if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
|
||||
struct mem_cgroup_per_node *pi;
|
||||
|
||||
/*
|
||||
* Batch local counters to keep them in sync with
|
||||
* the hierarchical ones.
|
||||
*/
|
||||
__this_cpu_add(pn->lruvec_stat_local->count[idx], x);
|
||||
for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
|
||||
atomic_long_add(x, &pi->lruvec_stat[idx]);
|
||||
x = 0;
|
||||
@@ -777,12 +782,15 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
__this_cpu_add(memcg->vmstats_local->events[idx], count);
|
||||
|
||||
x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
|
||||
if (unlikely(x > MEMCG_CHARGE_BATCH)) {
|
||||
struct mem_cgroup *mi;
|
||||
|
||||
/*
|
||||
* Batch local counters to keep them in sync with
|
||||
* the hierarchical ones.
|
||||
*/
|
||||
__this_cpu_add(memcg->vmstats_local->events[idx], x);
|
||||
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
|
||||
atomic_long_add(x, &mi->vmevents[idx]);
|
||||
x = 0;
|
||||
|
@@ -1734,9 +1734,10 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
|
||||
endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
|
||||
pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
|
||||
&beginpa, &endpa);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return -EBUSY;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_cpu_on_node(pg_data_t *pgdat)
|
||||
@@ -1819,19 +1820,9 @@ static void __release_memory_resource(resource_size_t start,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* remove_memory
|
||||
* @nid: the node ID
|
||||
* @start: physical address of the region to remove
|
||||
* @size: size of the region to remove
|
||||
*
|
||||
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
|
||||
* and online/offline operations before this call, as required by
|
||||
* try_offline_node().
|
||||
*/
|
||||
void __ref __remove_memory(int nid, u64 start, u64 size)
|
||||
static int __ref try_remove_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
int ret;
|
||||
int rc = 0;
|
||||
|
||||
BUG_ON(check_hotplug_memory_range(start, size));
|
||||
|
||||
@@ -1839,13 +1830,13 @@ void __ref __remove_memory(int nid, u64 start, u64 size)
|
||||
|
||||
/*
|
||||
* All memory blocks must be offlined before removing memory. Check
|
||||
* whether all memory blocks in question are offline and trigger a BUG()
|
||||
* whether all memory blocks in question are offline and return error
|
||||
* if this is not the case.
|
||||
*/
|
||||
ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
|
||||
check_memblock_offlined_cb);
|
||||
if (ret)
|
||||
BUG();
|
||||
rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
|
||||
check_memblock_offlined_cb);
|
||||
if (rc)
|
||||
goto done;
|
||||
|
||||
/* remove memmap entry */
|
||||
firmware_map_remove(start, start + size, "System RAM");
|
||||
@@ -1857,14 +1848,45 @@ void __ref __remove_memory(int nid, u64 start, u64 size)
|
||||
|
||||
try_offline_node(nid);
|
||||
|
||||
done:
|
||||
mem_hotplug_done();
|
||||
return rc;
|
||||
}
|
||||
|
||||
void remove_memory(int nid, u64 start, u64 size)
|
||||
/**
|
||||
* remove_memory
|
||||
* @nid: the node ID
|
||||
* @start: physical address of the region to remove
|
||||
* @size: size of the region to remove
|
||||
*
|
||||
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
|
||||
* and online/offline operations before this call, as required by
|
||||
* try_offline_node().
|
||||
*/
|
||||
void __remove_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
|
||||
/*
|
||||
* trigger BUG() is some memory is not offlined prior to calling this
|
||||
* function
|
||||
*/
|
||||
if (try_remove_memory(nid, start, size))
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove memory if every memory block is offline, otherwise return -EBUSY is
|
||||
* some memory is not offline
|
||||
*/
|
||||
int remove_memory(int nid, u64 start, u64 size)
|
||||
{
|
||||
int rc;
|
||||
|
||||
lock_device_hotplug();
|
||||
__remove_memory(nid, start, size);
|
||||
rc = try_remove_memory(nid, start, size);
|
||||
unlock_device_hotplug();
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(remove_memory);
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
@@ -1261,7 +1261,9 @@ unsigned long do_mmap(struct file *file,
|
||||
add_nommu_region(region);
|
||||
|
||||
/* clear anonymous mappings that don't ask for uninitialized data */
|
||||
if (!vma->vm_file && !(flags & MAP_UNINITIALIZED))
|
||||
if (!vma->vm_file &&
|
||||
(!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) ||
|
||||
!(flags & MAP_UNINITIALIZED)))
|
||||
memset((void *)region->vm_start, 0,
|
||||
region->vm_end - region->vm_start);
|
||||
|
||||
|
@@ -4102,7 +4102,6 @@ static int
|
||||
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
|
||||
const struct alloc_context *ac)
|
||||
{
|
||||
struct reclaim_state reclaim_state;
|
||||
int progress;
|
||||
unsigned int noreclaim_flag;
|
||||
unsigned long pflags;
|
||||
@@ -4114,13 +4113,10 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
|
||||
psi_memstall_enter(&pflags);
|
||||
fs_reclaim_acquire(gfp_mask);
|
||||
noreclaim_flag = memalloc_noreclaim_save();
|
||||
reclaim_state.reclaimed_slab = 0;
|
||||
current->reclaim_state = &reclaim_state;
|
||||
|
||||
progress = try_to_free_pages(ac->zonelist, order, gfp_mask,
|
||||
ac->nodemask);
|
||||
|
||||
current->reclaim_state = NULL;
|
||||
memalloc_noreclaim_restore(noreclaim_flag);
|
||||
fs_reclaim_release(gfp_mask);
|
||||
psi_memstall_leave(&pflags);
|
||||
|
@@ -400,7 +400,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
|
||||
|
||||
static int shmem_huge __read_mostly;
|
||||
|
||||
#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
|
||||
#if defined(CONFIG_SYSFS)
|
||||
static int shmem_parse_huge(const char *str)
|
||||
{
|
||||
if (!strcmp(str, "never"))
|
||||
@@ -417,7 +417,9 @@ static int shmem_parse_huge(const char *str)
|
||||
return SHMEM_HUGE_FORCE;
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
|
||||
static const char *shmem_format_huge(int huge)
|
||||
{
|
||||
switch (huge) {
|
||||
|
@@ -1028,7 +1028,8 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
|
||||
}
|
||||
|
||||
struct kmem_cache *
|
||||
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
|
||||
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
|
||||
{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
|
||||
EXPORT_SYMBOL(kmalloc_caches);
|
||||
|
||||
/*
|
||||
|
75
mm/util.c
75
mm/util.c
@@ -7,6 +7,7 @@
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/swap.h>
|
||||
@@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __account_locked_vm - account locked pages to an mm's locked_vm
|
||||
* @mm: mm to account against
|
||||
* @pages: number of pages to account
|
||||
* @inc: %true if @pages should be considered positive, %false if not
|
||||
* @task: task used to check RLIMIT_MEMLOCK
|
||||
* @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
|
||||
*
|
||||
* Assumes @task and @mm are valid (i.e. at least one reference on each), and
|
||||
* that mmap_sem is held as writer.
|
||||
*
|
||||
* Return:
|
||||
* * 0 on success
|
||||
* * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
|
||||
*/
|
||||
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
|
||||
struct task_struct *task, bool bypass_rlim)
|
||||
{
|
||||
unsigned long locked_vm, limit;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held_write(&mm->mmap_sem);
|
||||
|
||||
locked_vm = mm->locked_vm;
|
||||
if (inc) {
|
||||
if (!bypass_rlim) {
|
||||
limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
if (locked_vm + pages > limit)
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
if (!ret)
|
||||
mm->locked_vm = locked_vm + pages;
|
||||
} else {
|
||||
WARN_ON_ONCE(pages > locked_vm);
|
||||
mm->locked_vm = locked_vm - pages;
|
||||
}
|
||||
|
||||
pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid,
|
||||
(void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT,
|
||||
locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK),
|
||||
ret ? " - exceeded" : "");
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__account_locked_vm);
|
||||
|
||||
/**
|
||||
* account_locked_vm - account locked pages to an mm's locked_vm
|
||||
* @mm: mm to account against, may be NULL
|
||||
* @pages: number of pages to account
|
||||
* @inc: %true if @pages should be considered positive, %false if not
|
||||
*
|
||||
* Assumes a non-NULL @mm is valid (i.e. at least one reference on it).
|
||||
*
|
||||
* Return:
|
||||
* * 0 on success, or if mm is NULL
|
||||
* * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
|
||||
*/
|
||||
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (pages == 0 || !mm)
|
||||
return 0;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
ret = __account_locked_vm(mm, pages, inc, current,
|
||||
capable(CAP_IPC_LOCK));
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(account_locked_vm);
|
||||
|
||||
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
|
||||
unsigned long len, unsigned long prot,
|
||||
unsigned long flag, unsigned long pgoff)
|
||||
|
44
mm/vmscan.c
44
mm/vmscan.c
@@ -131,6 +131,9 @@ struct scan_control {
|
||||
unsigned int file_taken;
|
||||
unsigned int taken;
|
||||
} nr;
|
||||
|
||||
/* for recording the reclaimed slab by now */
|
||||
struct reclaim_state reclaim_state;
|
||||
};
|
||||
|
||||
#ifdef ARCH_HAS_PREFETCH
|
||||
@@ -238,6 +241,18 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
static void set_task_reclaim_state(struct task_struct *task,
|
||||
struct reclaim_state *rs)
|
||||
{
|
||||
/* Check for an overwrite */
|
||||
WARN_ON_ONCE(rs && task->reclaim_state);
|
||||
|
||||
/* Check for the nulling of an already-nulled member */
|
||||
WARN_ON_ONCE(!rs && !task->reclaim_state);
|
||||
|
||||
task->reclaim_state = rs;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
@@ -3191,11 +3206,13 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
||||
if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask))
|
||||
return 1;
|
||||
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
|
||||
|
||||
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
|
||||
|
||||
trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
|
||||
set_task_reclaim_state(current, NULL);
|
||||
|
||||
return nr_reclaimed;
|
||||
}
|
||||
@@ -3218,6 +3235,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
|
||||
};
|
||||
unsigned long lru_pages;
|
||||
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
|
||||
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
|
||||
|
||||
@@ -3235,7 +3253,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
|
||||
|
||||
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
|
||||
|
||||
set_task_reclaim_state(current, NULL);
|
||||
*nr_scanned = sc.nr_scanned;
|
||||
|
||||
return sc.nr_reclaimed;
|
||||
}
|
||||
|
||||
@@ -3262,6 +3282,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
.may_shrinkslab = 1,
|
||||
};
|
||||
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
/*
|
||||
* Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
|
||||
* take care of from where we get pages. So the node where we start the
|
||||
@@ -3282,6 +3303,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
psi_memstall_leave(&pflags);
|
||||
|
||||
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
|
||||
set_task_reclaim_state(current, NULL);
|
||||
|
||||
return nr_reclaimed;
|
||||
}
|
||||
@@ -3483,6 +3505,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
|
||||
.may_unmap = 1,
|
||||
};
|
||||
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
psi_memstall_enter(&pflags);
|
||||
__fs_reclaim_acquire();
|
||||
|
||||
@@ -3664,6 +3687,8 @@ out:
|
||||
snapshot_refaults(NULL, pgdat);
|
||||
__fs_reclaim_release();
|
||||
psi_memstall_leave(&pflags);
|
||||
set_task_reclaim_state(current, NULL);
|
||||
|
||||
/*
|
||||
* Return the order kswapd stopped reclaiming at as
|
||||
* prepare_kswapd_sleep() takes it into account. If another caller
|
||||
@@ -3787,15 +3812,10 @@ static int kswapd(void *p)
|
||||
unsigned int classzone_idx = MAX_NR_ZONES - 1;
|
||||
pg_data_t *pgdat = (pg_data_t*)p;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
struct reclaim_state reclaim_state = {
|
||||
.reclaimed_slab = 0,
|
||||
};
|
||||
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
|
||||
|
||||
if (!cpumask_empty(cpumask))
|
||||
set_cpus_allowed_ptr(tsk, cpumask);
|
||||
current->reclaim_state = &reclaim_state;
|
||||
|
||||
/*
|
||||
* Tell the memory management that we're a "memory allocator",
|
||||
@@ -3857,7 +3877,6 @@ kswapd_try_sleep:
|
||||
}
|
||||
|
||||
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
|
||||
current->reclaim_state = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3922,7 +3941,6 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
|
||||
*/
|
||||
unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
|
||||
{
|
||||
struct reclaim_state reclaim_state;
|
||||
struct scan_control sc = {
|
||||
.nr_to_reclaim = nr_to_reclaim,
|
||||
.gfp_mask = GFP_HIGHUSER_MOVABLE,
|
||||
@@ -3934,18 +3952,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
|
||||
.hibernation_mode = 1,
|
||||
};
|
||||
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
|
||||
struct task_struct *p = current;
|
||||
unsigned long nr_reclaimed;
|
||||
unsigned int noreclaim_flag;
|
||||
|
||||
fs_reclaim_acquire(sc.gfp_mask);
|
||||
noreclaim_flag = memalloc_noreclaim_save();
|
||||
reclaim_state.reclaimed_slab = 0;
|
||||
p->reclaim_state = &reclaim_state;
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
|
||||
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
|
||||
|
||||
p->reclaim_state = NULL;
|
||||
set_task_reclaim_state(current, NULL);
|
||||
memalloc_noreclaim_restore(noreclaim_flag);
|
||||
fs_reclaim_release(sc.gfp_mask);
|
||||
|
||||
@@ -4110,7 +4126,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
|
||||
/* Minimum pages needed in order to stay on node */
|
||||
const unsigned long nr_pages = 1 << order;
|
||||
struct task_struct *p = current;
|
||||
struct reclaim_state reclaim_state;
|
||||
unsigned int noreclaim_flag;
|
||||
struct scan_control sc = {
|
||||
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
|
||||
@@ -4135,8 +4150,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
|
||||
*/
|
||||
noreclaim_flag = memalloc_noreclaim_save();
|
||||
p->flags |= PF_SWAPWRITE;
|
||||
reclaim_state.reclaimed_slab = 0;
|
||||
p->reclaim_state = &reclaim_state;
|
||||
set_task_reclaim_state(p, &sc.reclaim_state);
|
||||
|
||||
if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
|
||||
/*
|
||||
@@ -4148,7 +4162,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
|
||||
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
|
||||
}
|
||||
|
||||
p->reclaim_state = NULL;
|
||||
set_task_reclaim_state(p, NULL);
|
||||
current->flags &= ~PF_SWAPWRITE;
|
||||
memalloc_noreclaim_restore(noreclaim_flag);
|
||||
fs_reclaim_release(sc.gfp_mask);
|
||||
|
29
mm/z3fold.c
29
mm/z3fold.c
@@ -101,6 +101,7 @@ struct z3fold_buddy_slots {
|
||||
* @refcount: reference count for the z3fold page
|
||||
* @work: work_struct for page layout optimization
|
||||
* @slots: pointer to the structure holding buddy slots
|
||||
* @pool: pointer to the containing pool
|
||||
* @cpu: CPU which this page "belongs" to
|
||||
* @first_chunks: the size of the first buddy in chunks, 0 if free
|
||||
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
|
||||
@@ -114,6 +115,7 @@ struct z3fold_header {
|
||||
struct kref refcount;
|
||||
struct work_struct work;
|
||||
struct z3fold_buddy_slots *slots;
|
||||
struct z3fold_pool *pool;
|
||||
short cpu;
|
||||
unsigned short first_chunks;
|
||||
unsigned short middle_chunks;
|
||||
@@ -193,8 +195,10 @@ static void compact_page_work(struct work_struct *w);
|
||||
static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle,
|
||||
gfp);
|
||||
struct z3fold_buddy_slots *slots;
|
||||
|
||||
slots = kmem_cache_alloc(pool->c_handle,
|
||||
(gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE)));
|
||||
|
||||
if (slots) {
|
||||
memset(slots->slot, 0, sizeof(slots->slot));
|
||||
@@ -320,6 +324,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
|
||||
zhdr->start_middle = 0;
|
||||
zhdr->cpu = -1;
|
||||
zhdr->slots = slots;
|
||||
zhdr->pool = pool;
|
||||
INIT_LIST_HEAD(&zhdr->buddy);
|
||||
INIT_WORK(&zhdr->work, compact_page_work);
|
||||
return zhdr;
|
||||
@@ -426,7 +431,7 @@ static enum buddy handle_to_buddy(unsigned long handle)
|
||||
|
||||
static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr)
|
||||
{
|
||||
return slots_to_pool(zhdr->slots);
|
||||
return zhdr->pool;
|
||||
}
|
||||
|
||||
static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
|
||||
@@ -850,7 +855,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
|
||||
enum buddy bud;
|
||||
bool can_sleep = gfpflags_allow_blocking(gfp);
|
||||
|
||||
if (!size || (gfp & __GFP_HIGHMEM))
|
||||
if (!size)
|
||||
return -EINVAL;
|
||||
|
||||
if (size > PAGE_SIZE)
|
||||
@@ -1345,24 +1350,29 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
|
||||
zhdr = page_address(page);
|
||||
pool = zhdr_to_pool(zhdr);
|
||||
|
||||
if (!trylock_page(page))
|
||||
return -EAGAIN;
|
||||
|
||||
if (!z3fold_page_trylock(zhdr)) {
|
||||
unlock_page(page);
|
||||
return -EAGAIN;
|
||||
}
|
||||
if (zhdr->mapped_count != 0) {
|
||||
z3fold_page_unlock(zhdr);
|
||||
unlock_page(page);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (work_pending(&zhdr->work)) {
|
||||
z3fold_page_unlock(zhdr);
|
||||
return -EAGAIN;
|
||||
}
|
||||
new_zhdr = page_address(newpage);
|
||||
memcpy(new_zhdr, zhdr, PAGE_SIZE);
|
||||
newpage->private = page->private;
|
||||
page->private = 0;
|
||||
z3fold_page_unlock(zhdr);
|
||||
spin_lock_init(&new_zhdr->page_lock);
|
||||
INIT_WORK(&new_zhdr->work, compact_page_work);
|
||||
/*
|
||||
* z3fold_page_isolate() ensures that new_zhdr->buddy is empty,
|
||||
* so we only have to reinitialize it.
|
||||
*/
|
||||
INIT_LIST_HEAD(&new_zhdr->buddy);
|
||||
new_mapping = page_mapping(page);
|
||||
__ClearPageMovable(page);
|
||||
ClearPagePrivate(page);
|
||||
@@ -1386,7 +1396,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
|
||||
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
|
||||
|
||||
page_mapcount_reset(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user