mm, thp: remove infrastructure for handling splitting PMDs

With new refcounting we don't need to mark PMDs splitting.  Let's drop
code to handle this.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Kirill A. Shutemov
2016-01-15 16:53:39 -08:00
committed by Linus Torvalds
parent 1f19617d77
commit 4b471e8898
13 changed files with 41 additions and 183 deletions

View File

@@ -241,13 +241,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags);
}
if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return follow_page_pte(vma, address, pmd, flags);
}
if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
@@ -1068,9 +1061,6 @@ struct page *get_dump_page(unsigned long addr)
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
* pages containing page tables.
*
* *) THP splits will broadcast an IPI, this can be achieved by overriding
* pmdp_splitting_flush.
*
* *) ptes can be read atomically by the architecture.
*
* *) access_ok is sufficient to validate userspace address ranges.
@@ -1267,7 +1257,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {

View File

@@ -986,15 +986,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
goto out_unlock;
}
if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
pte_free(dst_mm, pgtable);
wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
goto out;
}
src_page = pmd_page(pmd);
VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
get_page(src_page);
@@ -1470,7 +1461,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t orig_pmd;
spinlock_t *ptl;
if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
return 0;
/*
* For architectures like ppc64 we look at deposited pgtable
@@ -1504,13 +1495,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
return 1;
}
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
int ret = 0;
pmd_t pmd;
struct mm_struct *mm = vma->vm_mm;
@@ -1519,7 +1509,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
(new_addr & ~HPAGE_PMD_MASK) ||
old_end - old_addr < HPAGE_PMD_SIZE ||
(new_vma->vm_flags & VM_NOHUGEPAGE))
goto out;
return false;
/*
* The destination pmd shouldn't be established, free_pgtables()
@@ -1527,15 +1517,14 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
*/
if (WARN_ON(!pmd_none(*new_pmd))) {
VM_BUG_ON(pmd_trans_huge(*new_pmd));
goto out;
return false;
}
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock.
*/
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
if (ret == 1) {
if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
@@ -1551,9 +1540,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
}
out:
return ret;
return false;
}
/*
@@ -1569,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
spinlock_t *ptl;
int ret = 0;
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
@@ -1600,29 +1589,19 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
}
/*
* Returns 1 if a given pmd maps a stable (not under splitting) thp.
* Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
* Returns true if a given pmd maps a thp, false otherwise.
*
* Note that if it returns 1, this routine returns without unlocking page
* table locks. So callers must unlock them.
* Note that if it returns true, this routine returns without unlocking page
* table lock. So callers must unlock it.
*/
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
*ptl = pmd_lock(vma->vm_mm, pmd);
if (likely(pmd_trans_huge(*pmd))) {
if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(*ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return -1;
} else {
/* Thp mapped by 'pmd' is stable, so we can
* handle it as it is. */
return 1;
}
}
if (likely(pmd_trans_huge(*pmd)))
return true;
spin_unlock(*ptl);
return 0;
return false;
}
/*
@@ -1636,7 +1615,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl)
{
pgd_t *pgd;
@@ -1659,21 +1637,8 @@ pmd_t *page_check_address_pmd(struct page *page,
goto unlock;
if (pmd_page(*pmd) != page)
goto unlock;
/*
* split_vma() may create temporary aliased mappings. There is
* no risk as long as all huge pmd are found and have their
* splitting bit set before __split_huge_page_refcount
* runs. Finding the same huge pmd more than once during the
* same rmap walk is not a problem.
*/
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
pmd_trans_splitting(*pmd))
goto unlock;
if (pmd_trans_huge(*pmd)) {
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
!pmd_trans_splitting(*pmd));
if (pmd_trans_huge(*pmd))
return pmd;
}
unlock:
spin_unlock(*ptl);
return NULL;

View File

@@ -4675,7 +4675,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
pte_t *pte;
spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
@@ -4863,16 +4863,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
union mc_target target;
struct page *page;
/*
* No race with splitting thp happens because:
* - if pmd_trans_huge_lock() returns 1, the relevant thp is not
* under splitting, which means there's no concurrent thp split,
* - if another thread runs into split_huge_page() just after we
* entered this if-block, the thread must wait for page table lock
* to be unlocked in __split_huge_page_splitting(), where the main
* part of thp split is not executed yet.
*/
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl);
return 0;

View File

@@ -566,7 +566,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address);
int wait_split_huge_page;
if (!new)
return -ENOMEM;
@@ -586,18 +585,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
ptl = pmd_lock(mm, pmd);
wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
atomic_long_inc(&mm->nr_ptes);
pmd_populate(mm, pmd, new);
new = NULL;
} else if (unlikely(pmd_trans_splitting(*pmd)))
wait_split_huge_page = 1;
}
spin_unlock(ptl);
if (new)
pte_free(mm, new);
if (wait_split_huge_page)
wait_split_huge_page(vma->anon_vma, pmd);
return 0;
}
@@ -613,8 +608,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
new = NULL;
} else
VM_BUG_ON(pmd_trans_splitting(*pmd));
}
spin_unlock(&init_mm.page_table_lock);
if (new)
pte_free_kernel(&init_mm, new);
@@ -3374,14 +3368,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (pmd_trans_huge(orig_pmd)) {
unsigned int dirty = flags & FAULT_FLAG_WRITE;
/*
* If the pmd is splitting, return and retry the
* the fault. Alternative: wait until the split
* is done, and goto retry.
*/
if (pmd_trans_splitting(orig_pmd))
return 0;
if (pmd_protnone(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);

View File

@@ -117,7 +117,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
unsigned char *vec = walk->private;
int nr = (end - addr) >> PAGE_SHIFT;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
memset(vec, 1, nr);
spin_unlock(ptl);
goto out;

View File

@@ -192,25 +192,24 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (!new_pmd)
break;
if (pmd_trans_huge(*old_pmd)) {
int err = 0;
if (extent == HPAGE_PMD_SIZE) {
bool moved;
VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
vma);
/* See comment in move_ptes() */
if (need_rmap_locks)
anon_vma_lock_write(vma->anon_vma);
err = move_huge_pmd(vma, new_vma, old_addr,
moved = move_huge_pmd(vma, new_vma, old_addr,
new_addr, old_end,
old_pmd, new_pmd);
if (need_rmap_locks)
anon_vma_unlock_write(vma->anon_vma);
if (moved) {
need_flush = true;
continue;
}
}
if (err > 0) {
need_flush = true;
continue;
} else if (!err) {
split_huge_pmd(vma, old_pmd, old_addr);
}
split_huge_pmd(vma, old_pmd, old_addr);
VM_BUG_ON(pmd_trans_huge(*old_pmd));
}
if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,

View File

@@ -61,8 +61,7 @@ static int page_idle_clear_pte_refs_one(struct page *page,
bool referenced = false;
if (unlikely(PageTransHuge(page))) {
pmd = page_check_address_pmd(page, mm, addr,
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
pmd = page_check_address_pmd(page, mm, addr, &ptl);
if (pmd) {
referenced = pmdp_clear_young_notify(vma, addr, pmd);
spin_unlock(ptl);

View File

@@ -139,18 +139,6 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
}
#endif
#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd = pmd_mksplitting(*pmdp);
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
/* tlb flush only to serialize against gup-fast */
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
#endif
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)

View File

@@ -843,8 +843,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
* rmap might return false positives; we must filter
* these out using page_check_address_pmd().
*/
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
pmd = page_check_address_pmd(page, mm, address, &ptl);
if (!pmd)
return SWAP_AGAIN;
@@ -854,7 +853,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
return SWAP_FAIL; /* To break the loop */
}
/* go ahead even if the pmd is pmd_trans_splitting() */
if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++;
spin_unlock(ptl);