Merge 5.4-rc1-prelrease into android-mainline
To make the 5.4-rc1 merge easier, merge at a prerelease point in time before the final release happens. Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: If613d657fd0abf9910c5bf3435a745f01b89765e
This commit is contained in:
320
mm/madvise.c
320
mm/madvise.c
@@ -11,6 +11,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/page-isolation.h>
|
||||
#include <linux/page_idle.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/falloc.h>
|
||||
@@ -31,6 +32,11 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
struct madvise_walk_private {
|
||||
struct mmu_gather *tlb;
|
||||
bool pageout;
|
||||
};
|
||||
|
||||
/*
|
||||
* Any behaviour which results in changes to the vma->vm_flags needs to
|
||||
* take mmap_sem for writing. Others, which simply traverse vmas, need
|
||||
@@ -42,6 +48,8 @@ static int madvise_need_mmap_write(int behavior)
|
||||
case MADV_REMOVE:
|
||||
case MADV_WILLNEED:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_COLD:
|
||||
case MADV_PAGEOUT:
|
||||
case MADV_FREE:
|
||||
return 0;
|
||||
default:
|
||||
@@ -107,28 +115,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
|
||||
case MADV_MERGEABLE:
|
||||
case MADV_UNMERGEABLE:
|
||||
error = ksm_madvise(vma, start, end, behavior, &new_flags);
|
||||
if (error) {
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_convert_errno;
|
||||
break;
|
||||
case MADV_HUGEPAGE:
|
||||
case MADV_NOHUGEPAGE:
|
||||
error = hugepage_madvise(vma, &new_flags, behavior);
|
||||
if (error) {
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_convert_errno;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -154,15 +148,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
|
||||
goto out;
|
||||
}
|
||||
error = __split_vma(mm, vma, start, 1);
|
||||
if (error) {
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_convert_errno;
|
||||
}
|
||||
|
||||
if (end != vma->vm_end) {
|
||||
@@ -171,15 +158,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
|
||||
goto out;
|
||||
}
|
||||
error = __split_vma(mm, vma, end, 0);
|
||||
if (error) {
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
if (error)
|
||||
goto out_convert_errno;
|
||||
}
|
||||
|
||||
success:
|
||||
@@ -187,6 +167,14 @@ success:
|
||||
* vm_flags is protected by the mmap_sem held in write mode.
|
||||
*/
|
||||
vma->vm_flags = new_flags;
|
||||
|
||||
out_convert_errno:
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
@@ -309,6 +297,254 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
|
||||
unsigned long addr, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
struct madvise_walk_private *private = walk->private;
|
||||
struct mmu_gather *tlb = private->tlb;
|
||||
bool pageout = private->pageout;
|
||||
struct mm_struct *mm = tlb->mm;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
pte_t *orig_pte, *pte, ptent;
|
||||
spinlock_t *ptl;
|
||||
struct page *page = NULL;
|
||||
LIST_HEAD(page_list);
|
||||
|
||||
if (fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
pmd_t orig_pmd;
|
||||
unsigned long next = pmd_addr_end(addr, end);
|
||||
|
||||
tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (!ptl)
|
||||
return 0;
|
||||
|
||||
orig_pmd = *pmd;
|
||||
if (is_huge_zero_pmd(orig_pmd))
|
||||
goto huge_unlock;
|
||||
|
||||
if (unlikely(!pmd_present(orig_pmd))) {
|
||||
VM_BUG_ON(thp_migration_supported() &&
|
||||
!is_pmd_migration_entry(orig_pmd));
|
||||
goto huge_unlock;
|
||||
}
|
||||
|
||||
page = pmd_page(orig_pmd);
|
||||
if (next - addr != HPAGE_PMD_SIZE) {
|
||||
int err;
|
||||
|
||||
if (page_mapcount(page) != 1)
|
||||
goto huge_unlock;
|
||||
|
||||
get_page(page);
|
||||
spin_unlock(ptl);
|
||||
lock_page(page);
|
||||
err = split_huge_page(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
if (!err)
|
||||
goto regular_page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmd_young(orig_pmd)) {
|
||||
pmdp_invalidate(vma, addr, pmd);
|
||||
orig_pmd = pmd_mkold(orig_pmd);
|
||||
|
||||
set_pmd_at(mm, addr, pmd, orig_pmd);
|
||||
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
|
||||
}
|
||||
|
||||
ClearPageReferenced(page);
|
||||
test_and_clear_page_young(page);
|
||||
if (pageout) {
|
||||
if (!isolate_lru_page(page))
|
||||
list_add(&page->lru, &page_list);
|
||||
} else
|
||||
deactivate_page(page);
|
||||
huge_unlock:
|
||||
spin_unlock(ptl);
|
||||
if (pageout)
|
||||
reclaim_pages(&page_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmd_trans_unstable(pmd))
|
||||
return 0;
|
||||
regular_page:
|
||||
#endif
|
||||
tlb_change_page_size(tlb, PAGE_SIZE);
|
||||
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
flush_tlb_batched_pending(mm);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; addr < end; pte++, addr += PAGE_SIZE) {
|
||||
ptent = *pte;
|
||||
|
||||
if (pte_none(ptent))
|
||||
continue;
|
||||
|
||||
if (!pte_present(ptent))
|
||||
continue;
|
||||
|
||||
page = vm_normal_page(vma, addr, ptent);
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Creating a THP page is expensive so split it only if we
|
||||
* are sure it's worth. Split it if we are only owner.
|
||||
*/
|
||||
if (PageTransCompound(page)) {
|
||||
if (page_mapcount(page) != 1)
|
||||
break;
|
||||
get_page(page);
|
||||
if (!trylock_page(page)) {
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (split_huge_page(page)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
break;
|
||||
}
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
pte--;
|
||||
addr -= PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
||||
|
||||
if (pte_young(ptent)) {
|
||||
ptent = ptep_get_and_clear_full(mm, addr, pte,
|
||||
tlb->fullmm);
|
||||
ptent = pte_mkold(ptent);
|
||||
set_pte_at(mm, addr, pte, ptent);
|
||||
tlb_remove_tlb_entry(tlb, pte, addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are deactivating a page for accelerating reclaiming.
|
||||
* VM couldn't reclaim the page unless we clear PG_young.
|
||||
* As a side effect, it makes confuse idle-page tracking
|
||||
* because they will miss recent referenced history.
|
||||
*/
|
||||
ClearPageReferenced(page);
|
||||
test_and_clear_page_young(page);
|
||||
if (pageout) {
|
||||
if (!isolate_lru_page(page))
|
||||
list_add(&page->lru, &page_list);
|
||||
} else
|
||||
deactivate_page(page);
|
||||
}
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
if (pageout)
|
||||
reclaim_pages(&page_list);
|
||||
cond_resched();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops cold_walk_ops = {
|
||||
.pmd_entry = madvise_cold_or_pageout_pte_range,
|
||||
};
|
||||
|
||||
static void madvise_cold_page_range(struct mmu_gather *tlb,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
struct madvise_walk_private walk_private = {
|
||||
.pageout = false,
|
||||
.tlb = tlb,
|
||||
};
|
||||
|
||||
tlb_start_vma(tlb, vma);
|
||||
walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
|
||||
tlb_end_vma(tlb, vma);
|
||||
}
|
||||
|
||||
static long madvise_cold(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev,
|
||||
unsigned long start_addr, unsigned long end_addr)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct mmu_gather tlb;
|
||||
|
||||
*prev = vma;
|
||||
if (!can_madv_lru_vma(vma))
|
||||
return -EINVAL;
|
||||
|
||||
lru_add_drain();
|
||||
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
|
||||
madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
|
||||
tlb_finish_mmu(&tlb, start_addr, end_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void madvise_pageout_page_range(struct mmu_gather *tlb,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
struct madvise_walk_private walk_private = {
|
||||
.pageout = true,
|
||||
.tlb = tlb,
|
||||
};
|
||||
|
||||
tlb_start_vma(tlb, vma);
|
||||
walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
|
||||
tlb_end_vma(tlb, vma);
|
||||
}
|
||||
|
||||
static inline bool can_do_pageout(struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma_is_anonymous(vma))
|
||||
return true;
|
||||
if (!vma->vm_file)
|
||||
return false;
|
||||
/*
|
||||
* paging out pagecache only for non-anonymous mappings that correspond
|
||||
* to the files the calling process could (if tried) open for writing;
|
||||
* otherwise we'd be including shared non-exclusive mappings, which
|
||||
* opens a side channel.
|
||||
*/
|
||||
return inode_owner_or_capable(file_inode(vma->vm_file)) ||
|
||||
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
|
||||
}
|
||||
|
||||
static long madvise_pageout(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev,
|
||||
unsigned long start_addr, unsigned long end_addr)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct mmu_gather tlb;
|
||||
|
||||
*prev = vma;
|
||||
if (!can_madv_lru_vma(vma))
|
||||
return -EINVAL;
|
||||
|
||||
if (!can_do_pageout(vma))
|
||||
return 0;
|
||||
|
||||
lru_add_drain();
|
||||
tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
|
||||
madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
|
||||
tlb_finish_mmu(&tlb, start_addr, end_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
|
||||
@@ -513,7 +749,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
int behavior)
|
||||
{
|
||||
*prev = vma;
|
||||
if (!can_madv_dontneed_vma(vma))
|
||||
if (!can_madv_lru_vma(vma))
|
||||
return -EINVAL;
|
||||
|
||||
if (!userfaultfd_remove(vma, start, end)) {
|
||||
@@ -535,7 +771,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
*/
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!can_madv_dontneed_vma(vma))
|
||||
if (!can_madv_lru_vma(vma))
|
||||
return -EINVAL;
|
||||
if (end > vma->vm_end) {
|
||||
/*
|
||||
@@ -689,6 +925,10 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
|
||||
return madvise_remove(vma, prev, start, end);
|
||||
case MADV_WILLNEED:
|
||||
return madvise_willneed(vma, prev, start, end);
|
||||
case MADV_COLD:
|
||||
return madvise_cold(vma, prev, start, end);
|
||||
case MADV_PAGEOUT:
|
||||
return madvise_pageout(vma, prev, start, end);
|
||||
case MADV_FREE:
|
||||
case MADV_DONTNEED:
|
||||
return madvise_dontneed_free(vma, prev, start, end, behavior);
|
||||
@@ -710,6 +950,8 @@ madvise_behavior_valid(int behavior)
|
||||
case MADV_WILLNEED:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_FREE:
|
||||
case MADV_COLD:
|
||||
case MADV_PAGEOUT:
|
||||
#ifdef CONFIG_KSM
|
||||
case MADV_MERGEABLE:
|
||||
case MADV_UNMERGEABLE:
|
||||
@@ -804,6 +1046,8 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
||||
size_t len;
|
||||
struct blk_plug plug;
|
||||
|
||||
start = untagged_addr(start);
|
||||
|
||||
if (!madvise_behavior_valid(behavior))
|
||||
return error;
|
||||
|
||||
|
Reference in New Issue
Block a user