Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - the rest of MM

 - various misc things

 - procfs updates

 - lib/ updates

 - checkpatch updates

 - kdump/kexec updates

 - add kvmalloc helpers, use them

 - time helper updates for Y2038 issues. We're almost ready to remove
   current_fs_time() but that awaits a btrfs merge.

 - add tracepoints to DAX

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (114 commits)
  drivers/staging/ccree/ssi_hash.c: fix build with gcc-4.4.4
  selftests/vm: add a test for virtual address range mapping
  dax: add tracepoint to dax_insert_mapping()
  dax: add tracepoint to dax_writeback_one()
  dax: add tracepoints to dax_writeback_mapping_range()
  dax: add tracepoints to dax_load_hole()
  dax: add tracepoints to dax_pfn_mkwrite()
  dax: add tracepoints to dax_iomap_pte_fault()
  mtd: nand: nandsim: convert to memalloc_noreclaim_*()
  treewide: convert PF_MEMALLOC manipulations to new helpers
  mm: introduce memalloc_noreclaim_{save,restore}
  mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC
  mm/huge_memory.c: deposit a pgtable for DAX PMD faults when required
  mm/huge_memory.c: use zap_deposited_table() more
  time: delete CURRENT_TIME_SEC and CURRENT_TIME
  gfs2: replace CURRENT_TIME with current_time
  apparmorfs: replace CURRENT_TIME with current_time()
  lustre: replace CURRENT_TIME macro
  fs: ubifs: replace CURRENT_TIME_SEC with current_time
  fs: ufs: use ktime_get_real_ts64() for birthtime
  ...
This commit is contained in:
Linus Torvalds
2017-05-08 18:17:56 -07:00
323 zmienionych plików z 2447 dodań i 2105 usunięć

Wyświetl plik

@@ -89,11 +89,6 @@ static void map_pages(struct list_head *list)
list_splice(&tmp_list, list);
}
static inline bool migrate_async_suitable(int migratetype)
{
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
}
#ifdef CONFIG_COMPACTION
int PageMovable(struct page *page)
@@ -988,6 +983,22 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
#ifdef CONFIG_COMPACTION
static bool suitable_migration_source(struct compact_control *cc,
struct page *page)
{
int block_mt;
if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
return true;
block_mt = get_pageblock_migratetype(page);
if (cc->migratetype == MIGRATE_MOVABLE)
return is_migrate_movable(block_mt);
else
return block_mt == cc->migratetype;
}
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct compact_control *cc,
struct page *page)
@@ -1007,7 +1018,7 @@ static bool suitable_migration_target(struct compact_control *cc,
return true;
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
if (migrate_async_suitable(get_pageblock_migratetype(page)))
if (is_migrate_movable(get_pageblock_migratetype(page)))
return true;
/* Otherwise skip the block */
@@ -1242,8 +1253,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
* Async compaction is optimistic to see if the minimum amount
* of work satisfies the allocation.
*/
if (cc->mode == MIGRATE_ASYNC &&
!migrate_async_suitable(get_pageblock_migratetype(page)))
if (!suitable_migration_source(cc, page))
continue;
/* Perform the isolation */
@@ -1276,11 +1286,11 @@ static inline bool is_via_compact_memory(int order)
return order == -1;
}
static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc,
const int migratetype)
static enum compact_result __compact_finished(struct zone *zone,
struct compact_control *cc)
{
unsigned int order;
unsigned long watermark;
const int migratetype = cc->migratetype;
if (cc->contended || fatal_signal_pending(current))
return COMPACT_CONTENDED;
@@ -1308,12 +1318,16 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
if (is_via_compact_memory(cc->order))
return COMPACT_CONTINUE;
/* Compaction run is not finished if the watermark is not met */
watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK];
if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
cc->alloc_flags))
return COMPACT_CONTINUE;
if (cc->finishing_block) {
/*
* We have finished the pageblock, but better check again that
* we really succeeded.
*/
if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
cc->finishing_block = false;
else
return COMPACT_CONTINUE;
}
/* Direct compactor: Is a suitable page free? */
for (order = cc->order; order < MAX_ORDER; order++) {
@@ -1335,20 +1349,40 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
* other migratetype buddy lists.
*/
if (find_suitable_fallback(area, order, migratetype,
true, &can_steal) != -1)
return COMPACT_SUCCESS;
true, &can_steal) != -1) {
/* movable pages are OK in any pageblock */
if (migratetype == MIGRATE_MOVABLE)
return COMPACT_SUCCESS;
/*
* We are stealing for a non-movable allocation. Make
* sure we finish compacting the current pageblock
* first so it is as free as possible and we won't
* have to steal another one soon. This only applies
* to sync compaction, as async compaction operates
* on pageblocks of the same migratetype.
*/
if (cc->mode == MIGRATE_ASYNC ||
IS_ALIGNED(cc->migrate_pfn,
pageblock_nr_pages)) {
return COMPACT_SUCCESS;
}
cc->finishing_block = true;
return COMPACT_CONTINUE;
}
}
return COMPACT_NO_SUITABLE_PAGE;
}
static enum compact_result compact_finished(struct zone *zone,
struct compact_control *cc,
const int migratetype)
struct compact_control *cc)
{
int ret;
ret = __compact_finished(zone, cc, migratetype);
ret = __compact_finished(zone, cc);
trace_mm_compaction_finished(zone, cc->order, ret);
if (ret == COMPACT_NO_SUITABLE_PAGE)
ret = COMPACT_CONTINUE;
@@ -1481,9 +1515,9 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
enum compact_result ret;
unsigned long start_pfn = zone->zone_start_pfn;
unsigned long end_pfn = zone_end_pfn(zone);
const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
const bool sync = cc->mode != MIGRATE_ASYNC;
cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
cc->classzone_idx);
/* Compaction is likely to fail */
@@ -1533,8 +1567,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
migrate_prep_local();
while ((ret = compact_finished(zone, cc, migratetype)) ==
COMPACT_CONTINUE) {
while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
int err;
switch (isolate_migratepages(zone, cc)) {

Wyświetl plik

@@ -2791,12 +2791,6 @@ ssize_t generic_perform_write(struct file *file,
ssize_t written = 0;
unsigned int flags = 0;
/*
* Copies from kernel address space cannot fail (NFSD is a big user).
*/
if (!iter_is_iovec(i))
flags |= AOP_FLAG_UNINTERRUPTIBLE;
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */

Wyświetl plik

@@ -200,10 +200,7 @@ struct frame_vector *frame_vector_create(unsigned int nr_frames)
* Avoid higher order allocations, use vmalloc instead. It should
* be rare anyway.
*/
if (size <= PAGE_SIZE)
vec = kmalloc(size, GFP_KERNEL);
else
vec = vmalloc(size);
vec = kvmalloc(size, GFP_KERNEL);
if (!vec)
return NULL;
vec->nr_allocated = nr_frames;

Wyświetl plik

@@ -715,7 +715,8 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
}
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write)
pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
pgtable_t pgtable)
{
struct mm_struct *mm = vma->vm_mm;
pmd_t entry;
@@ -729,6 +730,12 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
entry = pmd_mkyoung(pmd_mkdirty(entry));
entry = maybe_pmd_mkwrite(entry, vma);
}
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
atomic_long_inc(&mm->nr_ptes);
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
spin_unlock(ptl);
@@ -738,6 +745,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, pfn_t pfn, bool write)
{
pgprot_t pgprot = vma->vm_page_prot;
pgtable_t pgtable = NULL;
/*
* If we had pmd_special, we could avoid all these restrictions,
* but we need to be consistent with PTEs and architectures that
@@ -752,9 +760,15 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return VM_FAULT_SIGBUS;
if (arch_needs_pgtable_deposit()) {
pgtable = pte_alloc_one(vma->vm_mm, addr);
if (!pgtable)
return VM_FAULT_OOM;
}
track_pfn_insert(vma, &pgprot, pfn);
insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
@@ -1611,12 +1625,13 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (vma_is_dax(vma)) {
if (arch_needs_pgtable_deposit())
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
if (is_huge_zero_pmd(orig_pmd))
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else if (is_huge_zero_pmd(orig_pmd)) {
pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
atomic_long_dec(&tlb->mm->nr_ptes);
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else {
@@ -1625,10 +1640,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page)) {
pgtable_t pgtable;
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
pte_free(tlb->mm, pgtable);
atomic_long_dec(&tlb->mm->nr_ptes);
zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
} else {
if (arch_needs_pgtable_deposit())

Wyświetl plik

@@ -183,6 +183,7 @@ extern int user_min_free_kbytes;
struct compact_control {
struct list_head freepages; /* List of free pages to migrate to */
struct list_head migratepages; /* List of pages being migrated */
struct zone *zone;
unsigned long nr_freepages; /* Number of isolated free pages */
unsigned long nr_migratepages; /* Number of pages to migrate */
unsigned long total_migrate_scanned;
@@ -190,17 +191,18 @@ struct compact_control {
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
unsigned long last_migrated_pfn;/* Not yet flushed page being freed */
const gfp_t gfp_mask; /* gfp mask of a direct compactor */
int order; /* order a direct compactor needs */
int migratetype; /* migratetype of direct compactor */
const unsigned int alloc_flags; /* alloc flags of a direct compactor */
const int classzone_idx; /* zone index of a direct compactor */
enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */
bool ignore_block_suitable; /* Scan blocks considered unsuitable */
bool direct_compaction; /* False from kcompactd or /proc/... */
bool whole_zone; /* Whole zone should/has been scanned */
int order; /* order a direct compactor needs */
const gfp_t gfp_mask; /* gfp mask of a direct compactor */
const unsigned int alloc_flags; /* alloc flags of a direct compactor */
const int classzone_idx; /* zone index of a direct compactor */
struct zone *zone;
bool contended; /* Signal lock or sched contention */
bool finishing_block; /* Finishing current pageblock */
};
unsigned long

Wyświetl plik

@@ -691,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size)
ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
shadow_start + shadow_size,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
__builtin_return_address(0));

Wyświetl plik

@@ -237,12 +237,16 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
}
EXPORT_SYMBOL(__vmalloc);
void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
{
return __vmalloc(size, flags, PAGE_KERNEL);
}
void *vmalloc_user(unsigned long size)
{
void *ret;
ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
if (ret) {
struct vm_area_struct *vma;

Wyświetl plik

@@ -1832,9 +1832,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
int move_freepages(struct zone *zone,
static int move_freepages(struct zone *zone,
struct page *start_page, struct page *end_page,
int migratetype)
int migratetype, int *num_movable)
{
struct page *page;
unsigned int order;
@@ -1851,6 +1851,9 @@ int move_freepages(struct zone *zone,
VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
#endif
if (num_movable)
*num_movable = 0;
for (page = start_page; page <= end_page;) {
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
@@ -1861,6 +1864,15 @@ int move_freepages(struct zone *zone,
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
if (!PageBuddy(page)) {
/*
* We assume that pages that could be isolated for
* migration are movable. But we don't actually try
* isolating, as that would be expensive.
*/
if (num_movable &&
(PageLRU(page) || __PageMovable(page)))
(*num_movable)++;
page++;
continue;
}
@@ -1876,7 +1888,7 @@ int move_freepages(struct zone *zone,
}
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype)
int migratetype, int *num_movable)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
@@ -1893,7 +1905,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
if (!zone_spans_pfn(zone, end_pfn))
return 0;
return move_freepages(zone, start_page, end_page, migratetype);
return move_freepages(zone, start_page, end_page, migratetype,
num_movable);
}
static void change_pageblock_range(struct page *pageblock_page,
@@ -1943,28 +1956,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
/*
* This function implements actual steal behaviour. If order is large enough,
* we can steal whole pageblock. If not, we first move freepages in this
* pageblock and check whether half of pages are moved or not. If half of
* pages are moved, we can change migratetype of pageblock and permanently
* use it's pages as requested migratetype in the future.
* pageblock to our migratetype and determine how many already-allocated pages
* are there in the pageblock with a compatible migratetype. If at least half
* of pages are free or compatible, we can change migratetype of the pageblock
* itself, so pages freed in the future will be put on the correct free list.
*/
static void steal_suitable_fallback(struct zone *zone, struct page *page,
int start_type)
int start_type, bool whole_block)
{
unsigned int current_order = page_order(page);
int pages;
struct free_area *area;
int free_pages, movable_pages, alike_pages;
int old_block_type;
old_block_type = get_pageblock_migratetype(page);
/*
* This can happen due to races and we want to prevent broken
* highatomic accounting.
*/
if (is_migrate_highatomic(old_block_type))
goto single_page;
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
return;
goto single_page;
}
pages = move_freepages_block(zone, page, start_type);
/* We are not allowed to try stealing from the whole block */
if (!whole_block)
goto single_page;
/* Claim the whole block if over half of it is free */
if (pages >= (1 << (pageblock_order-1)) ||
free_pages = move_freepages_block(zone, page, start_type,
&movable_pages);
/*
* Determine how many pages are compatible with our allocation.
* For movable allocation, it's the number of movable pages which
* we just obtained. For other types it's a bit more tricky.
*/
if (start_type == MIGRATE_MOVABLE) {
alike_pages = movable_pages;
} else {
/*
* If we are falling back a RECLAIMABLE or UNMOVABLE allocation
* to MOVABLE pageblock, consider all non-movable pages as
* compatible. If it's UNMOVABLE falling back to RECLAIMABLE or
* vice versa, be conservative since we can't distinguish the
* exact migratetype of non-movable pages.
*/
if (old_block_type == MIGRATE_MOVABLE)
alike_pages = pageblock_nr_pages
- (free_pages + movable_pages);
else
alike_pages = 0;
}
/* moving whole block can fail due to zone boundary conditions */
if (!free_pages)
goto single_page;
/*
* If a sufficient number of pages in the block are either free or of
* comparable migratability as our allocation, claim the whole block.
*/
if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
page_group_by_mobility_disabled)
set_pageblock_migratetype(page, start_type);
return;
single_page:
area = &zone->free_area[current_order];
list_move(&page->lru, &area->free_list[start_type]);
}
/*
@@ -2034,7 +2098,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
&& !is_migrate_cma(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages;
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
}
out_unlock:
@@ -2111,7 +2175,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* may increase.
*/
set_pageblock_migratetype(page, ac->migratetype);
ret = move_freepages_block(zone, page, ac->migratetype);
ret = move_freepages_block(zone, page, ac->migratetype,
NULL);
if (ret) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
@@ -2123,8 +2188,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
return false;
}
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
/*
* Try finding a free buddy page on the fallback list and put it on the free
* list of requested migratetype, possibly along with other pages from the same
* block, depending on fragmentation avoidance heuristics. Returns true if
* fallback was found so that __rmqueue_smallest() can grab it.
*/
static inline bool
__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
{
struct free_area *area;
@@ -2145,32 +2215,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
page = list_first_entry(&area->free_list[fallback_mt],
struct page, lru);
if (can_steal && !is_migrate_highatomic_page(page))
steal_suitable_fallback(zone, page, start_migratetype);
/* Remove the page from the freelists */
area->nr_free--;
list_del(&page->lru);
rmv_page_order(page);
expand(zone, page, order, current_order, area,
start_migratetype);
/*
* The pcppage_migratetype may differ from pageblock's
* migratetype depending on the decisions in
* find_suitable_fallback(). This is OK as long as it does not
* differ for MIGRATE_CMA pageblocks. Those can be used as
* fallback only via special __rmqueue_cma_fallback() function
*/
set_pcppage_migratetype(page, start_migratetype);
steal_suitable_fallback(zone, page, start_migratetype,
can_steal);
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, fallback_mt);
return page;
return true;
}
return NULL;
return false;
}
/*
@@ -2182,13 +2237,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
{
struct page *page;
retry:
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) {
if (migratetype == MIGRATE_MOVABLE)
page = __rmqueue_cma_fallback(zone, order);
if (!page)
page = __rmqueue_fallback(zone, order, migratetype);
if (!page && __rmqueue_fallback(zone, order, migratetype))
goto retry;
}
trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3227,14 +3283,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
enum compact_priority prio, enum compact_result *compact_result)
{
struct page *page;
unsigned int noreclaim_flag;
if (!order)
return NULL;
current->flags |= PF_MEMALLOC;
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
prio);
current->flags &= ~PF_MEMALLOC;
memalloc_noreclaim_restore(noreclaim_flag);
if (*compact_result <= COMPACT_INACTIVE)
return NULL;
@@ -3381,12 +3438,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
{
struct reclaim_state reclaim_state;
int progress;
unsigned int noreclaim_flag;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
current->flags |= PF_MEMALLOC;
noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@@ -3396,7 +3454,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
current->flags &= ~PF_MEMALLOC;
memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
@@ -3609,6 +3667,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
unsigned long did_some_progress;
@@ -3676,12 +3735,17 @@ retry_cpuset:
/*
* For costly allocations, try direct compaction first, as it's likely
* that we have enough base pages and don't need to reclaim. Don't try
* that for allocations that are allowed to ignore watermarks, as the
* ALLOC_NO_WATERMARKS attempt didn't yet happen.
* that we have enough base pages and don't need to reclaim. For non-
* movable high-order allocations, do that as well, as compaction will
* try prevent permanent fragmentation by migrating from blocks of the
* same migratetype.
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER &&
!gfp_pfmemalloc_allowed(gfp_mask)) {
if (can_direct_reclaim &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
page = __alloc_pages_direct_compact(gfp_mask, order,
alloc_flags, ac,
INIT_COMPACT_PRIORITY,
@@ -3693,7 +3757,7 @@ retry_cpuset:
* Checks for costly allocations with __GFP_NORETRY, which
* includes THP page fault allocations
*/
if (gfp_mask & __GFP_NORETRY) {
if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If compaction is deferred for high-order allocations,
* it is because sync compaction recently failed. If
@@ -3774,7 +3838,7 @@ retry:
* Do not retry costly high order allocations unless they are
* __GFP_REPEAT
*/
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
if (costly_order && !(gfp_mask & __GFP_REPEAT))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,

Wyświetl plik

@@ -66,7 +66,8 @@ out:
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
zone->nr_isolate_pageblock++;
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
NULL);
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
}
@@ -120,7 +121,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* pageblock scanning for freepage moving.
*/
if (!isolated_page) {
nr_pages = move_freepages_block(zone, page, migratetype);
nr_pages = move_freepages_block(zone, page, migratetype, NULL);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
set_pageblock_migratetype(page, migratetype);

Wyświetl plik

@@ -31,6 +31,7 @@
#include <linux/cpumask.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
#ifdef CONFIG_SWAP
@@ -119,16 +120,18 @@ static int alloc_swap_slot_cache(unsigned int cpu)
/*
* Do allocation outside swap_slots_cache_mutex
* as vzalloc could trigger reclaim and get_swap_page,
* as kvzalloc could trigger reclaim and get_swap_page,
* which can lock swap_slots_cache_mutex.
*/
slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
GFP_KERNEL);
if (!slots)
return -ENOMEM;
slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
GFP_KERNEL);
if (!slots_ret) {
vfree(slots);
kvfree(slots);
return -ENOMEM;
}
@@ -152,9 +155,9 @@ static int alloc_swap_slot_cache(unsigned int cpu)
out:
mutex_unlock(&swap_slots_cache_mutex);
if (slots)
vfree(slots);
kvfree(slots);
if (slots_ret)
vfree(slots_ret);
kvfree(slots_ret);
return 0;
}
@@ -171,7 +174,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
cache->cur = 0;
cache->nr = 0;
if (free_slots && cache->slots) {
vfree(cache->slots);
kvfree(cache->slots);
cache->slots = NULL;
}
mutex_unlock(&cache->alloc_lock);
@@ -186,7 +189,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
}
spin_unlock_irq(&cache->free_lock);
if (slots)
vfree(slots);
kvfree(slots);
}
}

Wyświetl plik

@@ -523,7 +523,7 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = vzalloc(sizeof(struct address_space) * nr);
spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
if (!spaces)
return -ENOMEM;
for (i = 0; i < nr; i++) {

Wyświetl plik

@@ -2270,8 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL;
vfree(swap_map);
vfree(cluster_info);
vfree(frontswap_map);
kvfree(cluster_info);
kvfree(frontswap_map);
/* Destroy swap account information */
swap_cgroup_swapoff(p->type);
exit_swap_address_space(p->type);
@@ -2794,7 +2794,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
GFP_KERNEL);
if (!cluster_info) {
error = -ENOMEM;
goto bad_swap;
@@ -2827,7 +2828,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP))
frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
GFP_KERNEL);
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
/*

Wyświetl plik

@@ -329,6 +329,63 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(vm_mmap);
/**
* kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
* Uses kmalloc to get the memory but if the allocation fails then falls back
* to the vmalloc allocator. Use kvfree for freeing the memory.
*
* Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
* is supported only for large (>32kB) allocations, and it should be used only if
* kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
*
* Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
*/
void *kvmalloc_node(size_t size, gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
/*
* vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
* so the given set of flags has to be compatible.
*/
WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
/*
* Make sure that larger requests are not too disruptive - no OOM
* killer and no allocation failure warnings as we have a fallback
*/
if (size > PAGE_SIZE) {
kmalloc_flags |= __GFP_NOWARN;
/*
* We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
* requests because there is no other way to tell the allocator
* that we want to fail rather than retry endlessly.
*/
if (!(kmalloc_flags & __GFP_REPEAT) ||
(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
kmalloc_flags |= __GFP_NORETRY;
}
ret = kmalloc_node(size, kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
* requests
*/
if (ret || size <= PAGE_SIZE)
return ret;
return __vmalloc_node_flags(size, node, flags);
}
EXPORT_SYMBOL(kvmalloc_node);
void kvfree(const void *addr)
{
if (is_vmalloc_addr(addr))

Wyświetl plik

@@ -1649,16 +1649,13 @@ void *vmap(struct page **pages, unsigned int count,
}
EXPORT_SYMBOL(vmap);
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1786,8 +1783,15 @@ fail:
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*
* Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
* and __GFP_NOFAIL are not supported
*
* Any use of gfp flags outside of GFP_KERNEL should be consulted
* with mm people.
*
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller)
{
@@ -1802,13 +1806,6 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
}
EXPORT_SYMBOL(__vmalloc);
static inline void *__vmalloc_node_flags(unsigned long size,
int node, gfp_t flags)
{
return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
node, __builtin_return_address(0));
}
/**
* vmalloc - allocate virtually contiguous memory
* @size: allocation size
@@ -1821,7 +1818,7 @@ static inline void *__vmalloc_node_flags(unsigned long size,
void *vmalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
GFP_KERNEL | __GFP_HIGHMEM);
GFP_KERNEL);
}
EXPORT_SYMBOL(vmalloc);
@@ -1838,7 +1835,7 @@ EXPORT_SYMBOL(vmalloc);
void *vzalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc);
@@ -1855,7 +1852,7 @@ void *vmalloc_user(unsigned long size)
void *ret;
ret = __vmalloc_node(size, SHMLBA,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (ret) {
@@ -1879,7 +1876,7 @@ EXPORT_SYMBOL(vmalloc_user);
*/
void *vmalloc_node(unsigned long size, int node)
{
return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
@@ -1899,7 +1896,7 @@ EXPORT_SYMBOL(vmalloc_node);
void *vzalloc_node(unsigned long size, int node)
{
return __vmalloc_node_flags(size, node,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc_node);
@@ -1921,7 +1918,7 @@ EXPORT_SYMBOL(vzalloc_node);
void *vmalloc_exec(unsigned long size)
{
return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
NUMA_NO_NODE, __builtin_return_address(0));
}

Wyświetl plik

@@ -3036,6 +3036,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
struct zonelist *zonelist;
unsigned long nr_reclaimed;
int nid;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
@@ -3062,9 +3063,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.gfp_mask,
sc.reclaim_idx);
current->flags |= PF_MEMALLOC;
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
current->flags &= ~PF_MEMALLOC;
memalloc_noreclaim_restore(noreclaim_flag);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
@@ -3589,8 +3590,9 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
p->flags |= PF_MEMALLOC;
noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3599,7 +3601,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
p->flags &= ~PF_MEMALLOC;
memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
}
@@ -3764,6 +3766,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
struct task_struct *p = current;
struct reclaim_state reclaim_state;
int classzone_idx = gfp_zone(gfp_mask);
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)),
@@ -3781,7 +3784,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
* and we also need to be able to write out pages for RECLAIM_WRITE
* and RECLAIM_UNMAP.
*/
p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3797,7 +3801,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
p->reclaim_state = NULL;
current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}