mm, thp: remove infrastructure for handling splitting PMDs
With new refcounting we don't need to mark PMDs splitting. Let's drop code to handle this. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Jerome Marchand <jmarchan@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Steve Capper <steve.capper@linaro.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
1f19617d77
commit
4b471e8898
12
mm/gup.c
12
mm/gup.c
@@ -241,13 +241,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
spin_unlock(ptl);
|
||||
return follow_page_pte(vma, address, pmd, flags);
|
||||
}
|
||||
|
||||
if (unlikely(pmd_trans_splitting(*pmd))) {
|
||||
spin_unlock(ptl);
|
||||
wait_split_huge_page(vma->anon_vma, pmd);
|
||||
return follow_page_pte(vma, address, pmd, flags);
|
||||
}
|
||||
|
||||
if (flags & FOLL_SPLIT) {
|
||||
int ret;
|
||||
page = pmd_page(*pmd);
|
||||
@@ -1068,9 +1061,6 @@ struct page *get_dump_page(unsigned long addr)
|
||||
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
|
||||
* pages containing page tables.
|
||||
*
|
||||
* *) THP splits will broadcast an IPI, this can be achieved by overriding
|
||||
* pmdp_splitting_flush.
|
||||
*
|
||||
* *) ptes can be read atomically by the architecture.
|
||||
*
|
||||
* *) access_ok is sufficient to validate userspace address ranges.
|
||||
@@ -1267,7 +1257,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
pmd_t pmd = READ_ONCE(*pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
|
||||
if (pmd_none(pmd))
|
||||
return 0;
|
||||
|
||||
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
|
||||
|
@@ -986,15 +986,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (unlikely(pmd_trans_splitting(pmd))) {
|
||||
/* split huge page running from under us */
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
pte_free(dst_mm, pgtable);
|
||||
|
||||
wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
|
||||
goto out;
|
||||
}
|
||||
src_page = pmd_page(pmd);
|
||||
VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
|
||||
get_page(src_page);
|
||||
@@ -1470,7 +1461,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
pmd_t orig_pmd;
|
||||
spinlock_t *ptl;
|
||||
|
||||
if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
|
||||
if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
|
||||
return 0;
|
||||
/*
|
||||
* For architectures like ppc64 we look at deposited pgtable
|
||||
@@ -1504,13 +1495,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
return 1;
|
||||
}
|
||||
|
||||
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||
bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||
unsigned long old_addr,
|
||||
unsigned long new_addr, unsigned long old_end,
|
||||
pmd_t *old_pmd, pmd_t *new_pmd)
|
||||
{
|
||||
spinlock_t *old_ptl, *new_ptl;
|
||||
int ret = 0;
|
||||
pmd_t pmd;
|
||||
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
@@ -1519,7 +1509,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||
(new_addr & ~HPAGE_PMD_MASK) ||
|
||||
old_end - old_addr < HPAGE_PMD_SIZE ||
|
||||
(new_vma->vm_flags & VM_NOHUGEPAGE))
|
||||
goto out;
|
||||
return false;
|
||||
|
||||
/*
|
||||
* The destination pmd shouldn't be established, free_pgtables()
|
||||
@@ -1527,15 +1517,14 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||
*/
|
||||
if (WARN_ON(!pmd_none(*new_pmd))) {
|
||||
VM_BUG_ON(pmd_trans_huge(*new_pmd));
|
||||
goto out;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't have to worry about the ordering of src and dst
|
||||
* ptlocks because exclusive mmap_sem prevents deadlock.
|
||||
*/
|
||||
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
|
||||
if (ret == 1) {
|
||||
if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
|
||||
new_ptl = pmd_lockptr(mm, new_pmd);
|
||||
if (new_ptl != old_ptl)
|
||||
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
|
||||
@@ -1551,9 +1540,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
|
||||
if (new_ptl != old_ptl)
|
||||
spin_unlock(new_ptl);
|
||||
spin_unlock(old_ptl);
|
||||
return true;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1569,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
spinlock_t *ptl;
|
||||
int ret = 0;
|
||||
|
||||
if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||
if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
|
||||
pmd_t entry;
|
||||
bool preserve_write = prot_numa && pmd_write(*pmd);
|
||||
ret = 1;
|
||||
@@ -1600,29 +1589,19 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if a given pmd maps a stable (not under splitting) thp.
|
||||
* Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
|
||||
* Returns true if a given pmd maps a thp, false otherwise.
|
||||
*
|
||||
* Note that if it returns 1, this routine returns without unlocking page
|
||||
* table locks. So callers must unlock them.
|
||||
* Note that if it returns true, this routine returns without unlocking page
|
||||
* table lock. So callers must unlock it.
|
||||
*/
|
||||
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||
bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||
spinlock_t **ptl)
|
||||
{
|
||||
*ptl = pmd_lock(vma->vm_mm, pmd);
|
||||
if (likely(pmd_trans_huge(*pmd))) {
|
||||
if (unlikely(pmd_trans_splitting(*pmd))) {
|
||||
spin_unlock(*ptl);
|
||||
wait_split_huge_page(vma->anon_vma, pmd);
|
||||
return -1;
|
||||
} else {
|
||||
/* Thp mapped by 'pmd' is stable, so we can
|
||||
* handle it as it is. */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (likely(pmd_trans_huge(*pmd)))
|
||||
return true;
|
||||
spin_unlock(*ptl);
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1636,7 +1615,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
|
||||
pmd_t *page_check_address_pmd(struct page *page,
|
||||
struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
enum page_check_address_pmd_flag flag,
|
||||
spinlock_t **ptl)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
@@ -1659,21 +1637,8 @@ pmd_t *page_check_address_pmd(struct page *page,
|
||||
goto unlock;
|
||||
if (pmd_page(*pmd) != page)
|
||||
goto unlock;
|
||||
/*
|
||||
* split_vma() may create temporary aliased mappings. There is
|
||||
* no risk as long as all huge pmd are found and have their
|
||||
* splitting bit set before __split_huge_page_refcount
|
||||
* runs. Finding the same huge pmd more than once during the
|
||||
* same rmap walk is not a problem.
|
||||
*/
|
||||
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
|
||||
pmd_trans_splitting(*pmd))
|
||||
goto unlock;
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
|
||||
!pmd_trans_splitting(*pmd));
|
||||
if (pmd_trans_huge(*pmd))
|
||||
return pmd;
|
||||
}
|
||||
unlock:
|
||||
spin_unlock(*ptl);
|
||||
return NULL;
|
||||
|
@@ -4675,7 +4675,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
||||
pte_t *pte;
|
||||
spinlock_t *ptl;
|
||||
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
|
||||
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
|
||||
mc.precharge += HPAGE_PMD_NR;
|
||||
spin_unlock(ptl);
|
||||
@@ -4863,16 +4863,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||
union mc_target target;
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* No race with splitting thp happens because:
|
||||
* - if pmd_trans_huge_lock() returns 1, the relevant thp is not
|
||||
* under splitting, which means there's no concurrent thp split,
|
||||
* - if another thread runs into split_huge_page() just after we
|
||||
* entered this if-block, the thread must wait for page table lock
|
||||
* to be unlocked in __split_huge_page_splitting(), where the main
|
||||
* part of thp split is not executed yet.
|
||||
*/
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
|
||||
if (mc.precharge < HPAGE_PMD_NR) {
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
|
18
mm/memory.c
18
mm/memory.c
@@ -566,7 +566,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
pgtable_t new = pte_alloc_one(mm, address);
|
||||
int wait_split_huge_page;
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -586,18 +585,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
|
||||
|
||||
ptl = pmd_lock(mm, pmd);
|
||||
wait_split_huge_page = 0;
|
||||
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
|
||||
atomic_long_inc(&mm->nr_ptes);
|
||||
pmd_populate(mm, pmd, new);
|
||||
new = NULL;
|
||||
} else if (unlikely(pmd_trans_splitting(*pmd)))
|
||||
wait_split_huge_page = 1;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
if (new)
|
||||
pte_free(mm, new);
|
||||
if (wait_split_huge_page)
|
||||
wait_split_huge_page(vma->anon_vma, pmd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -613,8 +608,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
|
||||
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
|
||||
pmd_populate_kernel(&init_mm, pmd, new);
|
||||
new = NULL;
|
||||
} else
|
||||
VM_BUG_ON(pmd_trans_splitting(*pmd));
|
||||
}
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
if (new)
|
||||
pte_free_kernel(&init_mm, new);
|
||||
@@ -3374,14 +3368,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
if (pmd_trans_huge(orig_pmd)) {
|
||||
unsigned int dirty = flags & FAULT_FLAG_WRITE;
|
||||
|
||||
/*
|
||||
* If the pmd is splitting, return and retry the
|
||||
* the fault. Alternative: wait until the split
|
||||
* is done, and goto retry.
|
||||
*/
|
||||
if (pmd_trans_splitting(orig_pmd))
|
||||
return 0;
|
||||
|
||||
if (pmd_protnone(orig_pmd))
|
||||
return do_huge_pmd_numa_page(mm, vma, address,
|
||||
orig_pmd, pmd);
|
||||
|
@@ -117,7 +117,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||
unsigned char *vec = walk->private;
|
||||
int nr = (end - addr) >> PAGE_SHIFT;
|
||||
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
|
||||
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
|
||||
memset(vec, 1, nr);
|
||||
spin_unlock(ptl);
|
||||
goto out;
|
||||
|
15
mm/mremap.c
15
mm/mremap.c
@@ -192,25 +192,24 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
|
||||
if (!new_pmd)
|
||||
break;
|
||||
if (pmd_trans_huge(*old_pmd)) {
|
||||
int err = 0;
|
||||
if (extent == HPAGE_PMD_SIZE) {
|
||||
bool moved;
|
||||
VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
|
||||
vma);
|
||||
/* See comment in move_ptes() */
|
||||
if (need_rmap_locks)
|
||||
anon_vma_lock_write(vma->anon_vma);
|
||||
err = move_huge_pmd(vma, new_vma, old_addr,
|
||||
moved = move_huge_pmd(vma, new_vma, old_addr,
|
||||
new_addr, old_end,
|
||||
old_pmd, new_pmd);
|
||||
if (need_rmap_locks)
|
||||
anon_vma_unlock_write(vma->anon_vma);
|
||||
if (moved) {
|
||||
need_flush = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (err > 0) {
|
||||
need_flush = true;
|
||||
continue;
|
||||
} else if (!err) {
|
||||
split_huge_pmd(vma, old_pmd, old_addr);
|
||||
}
|
||||
split_huge_pmd(vma, old_pmd, old_addr);
|
||||
VM_BUG_ON(pmd_trans_huge(*old_pmd));
|
||||
}
|
||||
if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
|
||||
|
@@ -61,8 +61,7 @@ static int page_idle_clear_pte_refs_one(struct page *page,
|
||||
bool referenced = false;
|
||||
|
||||
if (unlikely(PageTransHuge(page))) {
|
||||
pmd = page_check_address_pmd(page, mm, addr,
|
||||
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
|
||||
pmd = page_check_address_pmd(page, mm, addr, &ptl);
|
||||
if (pmd) {
|
||||
referenced = pmdp_clear_young_notify(vma, addr, pmd);
|
||||
spin_unlock(ptl);
|
||||
|
@@ -139,18 +139,6 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
|
||||
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd = pmd_mksplitting(*pmdp);
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
|
||||
/* tlb flush only to serialize against gup-fast */
|
||||
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
|
||||
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
pgtable_t pgtable)
|
||||
|
@@ -843,8 +843,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
|
||||
* rmap might return false positives; we must filter
|
||||
* these out using page_check_address_pmd().
|
||||
*/
|
||||
pmd = page_check_address_pmd(page, mm, address,
|
||||
PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
|
||||
pmd = page_check_address_pmd(page, mm, address, &ptl);
|
||||
if (!pmd)
|
||||
return SWAP_AGAIN;
|
||||
|
||||
@@ -854,7 +853,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
|
||||
return SWAP_FAIL; /* To break the loop */
|
||||
}
|
||||
|
||||
/* go ahead even if the pmd is pmd_trans_splitting() */
|
||||
if (pmdp_clear_flush_young_notify(vma, address, pmd))
|
||||
referenced++;
|
||||
spin_unlock(ptl);
|
||||
|
Reference in New Issue
Block a user