From afeec97a8dfcc5d27e5088691de3f0716469fee9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Apr 2018 16:33:10 +0200 Subject: [PATCH] FROMLIST: mm: prepare for FAULT_FLAG_SPECULATIVE When speculating faults (without holding mmap_sem) we need to validate that the vma against which we loaded pages is still valid when we're ready to install the new PTE. Therefore, replace the pte_offset_map_lock() calls that (re)take the PTL with pte_map_lock() which can fail in case we find the VMA changed since we started the fault. Signed-off-by: Peter Zijlstra (Intel) [Port to 4.12 kernel] [Remove the comment about the fault_env structure which has been implemented as the vm_fault structure in the kernel] [move pte_map_lock()'s definition upper in the file] [move the define of FAULT_FLAG_SPECULATIVE later in the series] [review error path in do_swap_page(), do_anonymous_page() and wp_page_copy()] Signed-off-by: Laurent Dufour Bug: 161210518 Link: https://lore.kernel.org/lkml/1523975611-15978-5-git-send-email-ldufour@linux.vnet.ibm.com/ Change-Id: Id6dfae130fbfdd4bb92aa6415d6f1db7ef833266 Signed-off-by: Vinayak Menon Signed-off-by: Charan Teja Reddy --- mm/memory.c | 77 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index e10008d48661..0d9e6829e20a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2560,6 +2560,13 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL_GPL(apply_to_page_range); +static inline bool pte_map_lock(struct vm_fault *vmf) +{ + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + return true; +} + /* * Scan a region of virtual memory, calling a provided function on * each leaf page table where it exists. @@ -2848,20 +2855,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) pte_t entry; int page_copied = 0; struct mmu_notifier_range range; + int ret = VM_FAULT_OOM; if (unlikely(anon_vma_prepare(vma))) - goto oom; + goto out; if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!new_page) - goto oom; + goto out; } else { new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!new_page) - goto oom; + goto out; if (!cow_user_page(new_page, old_page, vmf)) { /* @@ -2878,7 +2886,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) - goto oom_free_new; + goto out_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); __SetPageUptodate(new_page); @@ -2891,7 +2899,10 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) /* * Re-check the pte - we dropped the lock */ - vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) { + ret = VM_FAULT_RETRY; + goto out_free_new; + } if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { @@ -2978,12 +2989,12 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) put_page(old_page); } return page_copied ? VM_FAULT_WRITE : 0; -oom_free_new: +out_free_new: put_page(new_page); -oom: +out: if (old_page) put_page(old_page); - return VM_FAULT_OOM; + return ret; } /** @@ -3005,8 +3016,8 @@ oom: vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); - vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; /* * We might have raced with another page fault while we released the * pte_offset_map_lock. @@ -3341,11 +3352,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { /* - * Back out if somebody else faulted in this pte - * while we released the pte lock. + * Back out if the VMA has changed in our back during + * a speculative page fault or if somebody else + * faulted in this pte while we released the pte lock. */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) { + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); + ret = VM_FAULT_RETRY; + goto out; + } + if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); @@ -3394,10 +3410,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) cgroup_throttle_swaprate(page, GFP_KERNEL); /* - * Back out if somebody else already faulted in this pte. + * Back out if the VMA has changed in our back during a speculative + * page fault or if somebody else already faulted in this pte. */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) { + ret = VM_FAULT_RETRY; + goto out_page; + } if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) goto out_nomap; @@ -3526,8 +3545,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; if (!pte_none(*vmf->pte)) { update_mmu_tlb(vma, vmf->address, vmf->pte); goto unlock; @@ -3566,16 +3585,19 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) { + ret = VM_FAULT_RETRY; + goto release; + } + if (!pte_none(*vmf->pte)) { update_mmu_cache(vma, vmf->address, vmf->pte); - goto release; + goto unlock_and_release; } ret = check_stable_address_space(vma->vm_mm); if (ret) - goto release; + goto unlock_and_release; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { @@ -3595,9 +3617,11 @@ setpte: unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; +unlock_and_release: + pte_unmap_unlock(vmf->pte, vmf->ptl); release: put_page(page); - goto unlock; + return ret; oom_free_page: put_page(page); oom: @@ -3712,8 +3736,9 @@ map_pte: * pte_none() under vmf->ptl protection when we return to * alloc_set_pte(). */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; + return 0; }