FROMLIST: mm: prepare for FAULT_FLAG_SPECULATIVE

When speculating faults (without holding mmap_sem) we need to validate
that the vma against which we loaded pages is still valid when we're
ready to install the new PTE.

Therefore, replace the pte_offset_map_lock() calls that (re)take the
PTL with pte_map_lock() which can fail in case we find the VMA changed
since we started the fault.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[Port to 4.12 kernel]
[Remove the comment about the fault_env structure which has been
 implemented as the vm_fault structure in the kernel]
[move pte_map_lock()'s definition upper in the file]
[move the define of FAULT_FLAG_SPECULATIVE later in the series]
[review error path in do_swap_page(), do_anonymous_page() and
 wp_page_copy()]
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Bug: 161210518
Link: https://lore.kernel.org/lkml/1523975611-15978-5-git-send-email-ldufour@linux.vnet.ibm.com/
Change-Id: Id6dfae130fbfdd4bb92aa6415d6f1db7ef833266
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
This commit is contained in:
Peter Zijlstra
2018-04-17 16:33:10 +02:00
committed by Suren Baghdasaryan
parent 1ca86fafc5
commit afeec97a8d

View File

@@ -2560,6 +2560,13 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
} }
EXPORT_SYMBOL_GPL(apply_to_page_range); EXPORT_SYMBOL_GPL(apply_to_page_range);
static inline bool pte_map_lock(struct vm_fault *vmf)
{
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
return true;
}
/* /*
* Scan a region of virtual memory, calling a provided function on * Scan a region of virtual memory, calling a provided function on
* each leaf page table where it exists. * each leaf page table where it exists.
@@ -2848,20 +2855,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
pte_t entry; pte_t entry;
int page_copied = 0; int page_copied = 0;
struct mmu_notifier_range range; struct mmu_notifier_range range;
int ret = VM_FAULT_OOM;
if (unlikely(anon_vma_prepare(vma))) if (unlikely(anon_vma_prepare(vma)))
goto oom; goto out;
if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
new_page = alloc_zeroed_user_highpage_movable(vma, new_page = alloc_zeroed_user_highpage_movable(vma,
vmf->address); vmf->address);
if (!new_page) if (!new_page)
goto oom; goto out;
} else { } else {
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
vmf->address); vmf->address);
if (!new_page) if (!new_page)
goto oom; goto out;
if (!cow_user_page(new_page, old_page, vmf)) { if (!cow_user_page(new_page, old_page, vmf)) {
/* /*
@@ -2878,7 +2886,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
} }
if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new; goto out_free_new;
cgroup_throttle_swaprate(new_page, GFP_KERNEL); cgroup_throttle_swaprate(new_page, GFP_KERNEL);
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
@@ -2891,7 +2899,10 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
/* /*
* Re-check the pte - we dropped the lock * Re-check the pte - we dropped the lock
*/ */
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_map_lock(vmf)) {
ret = VM_FAULT_RETRY;
goto out_free_new;
}
if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
if (old_page) { if (old_page) {
if (!PageAnon(old_page)) { if (!PageAnon(old_page)) {
@@ -2978,12 +2989,12 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
put_page(old_page); put_page(old_page);
} }
return page_copied ? VM_FAULT_WRITE : 0; return page_copied ? VM_FAULT_WRITE : 0;
oom_free_new: out_free_new:
put_page(new_page); put_page(new_page);
oom: out:
if (old_page) if (old_page)
put_page(old_page); put_page(old_page);
return VM_FAULT_OOM; return ret;
} }
/** /**
@@ -3005,8 +3016,8 @@ oom:
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
{ {
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, if (!pte_map_lock(vmf))
&vmf->ptl); return VM_FAULT_RETRY;
/* /*
* We might have raced with another page fault while we released the * We might have raced with another page fault while we released the
* pte_offset_map_lock. * pte_offset_map_lock.
@@ -3341,11 +3352,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!page) { if (!page) {
/* /*
* Back out if somebody else faulted in this pte * Back out if the VMA has changed in our back during
* while we released the pte lock. * a speculative page fault or if somebody else
* faulted in this pte while we released the pte lock.
*/ */
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, if (!pte_map_lock(vmf)) {
vmf->address, &vmf->ptl); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
ret = VM_FAULT_RETRY;
goto out;
}
if (likely(pte_same(*vmf->pte, vmf->orig_pte))) if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -3394,10 +3410,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
cgroup_throttle_swaprate(page, GFP_KERNEL); cgroup_throttle_swaprate(page, GFP_KERNEL);
/* /*
* Back out if somebody else already faulted in this pte. * Back out if the VMA has changed in our back during a speculative
* page fault or if somebody else already faulted in this pte.
*/ */
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, if (!pte_map_lock(vmf)) {
&vmf->ptl); ret = VM_FAULT_RETRY;
goto out_page;
}
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
goto out_nomap; goto out_nomap;
@@ -3526,8 +3545,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
!mm_forbids_zeropage(vma->vm_mm)) { !mm_forbids_zeropage(vma->vm_mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
vma->vm_page_prot)); vma->vm_page_prot));
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, if (!pte_map_lock(vmf))
vmf->address, &vmf->ptl); return VM_FAULT_RETRY;
if (!pte_none(*vmf->pte)) { if (!pte_none(*vmf->pte)) {
update_mmu_tlb(vma, vmf->address, vmf->pte); update_mmu_tlb(vma, vmf->address, vmf->pte);
goto unlock; goto unlock;
@@ -3566,16 +3585,19 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry)); entry = pte_mkwrite(pte_mkdirty(entry));
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, if (!pte_map_lock(vmf)) {
&vmf->ptl); ret = VM_FAULT_RETRY;
goto release;
}
if (!pte_none(*vmf->pte)) { if (!pte_none(*vmf->pte)) {
update_mmu_cache(vma, vmf->address, vmf->pte); update_mmu_cache(vma, vmf->address, vmf->pte);
goto release; goto unlock_and_release;
} }
ret = check_stable_address_space(vma->vm_mm); ret = check_stable_address_space(vma->vm_mm);
if (ret) if (ret)
goto release; goto unlock_and_release;
/* Deliver the page fault to userland, check inside PT lock */ /* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) { if (userfaultfd_missing(vma)) {
@@ -3595,9 +3617,11 @@ setpte:
unlock: unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
return ret; return ret;
unlock_and_release:
pte_unmap_unlock(vmf->pte, vmf->ptl);
release: release:
put_page(page); put_page(page);
goto unlock; return ret;
oom_free_page: oom_free_page:
put_page(page); put_page(page);
oom: oom:
@@ -3712,8 +3736,9 @@ map_pte:
* pte_none() under vmf->ptl protection when we return to * pte_none() under vmf->ptl protection when we return to
* alloc_set_pte(). * alloc_set_pte().
*/ */
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, if (!pte_map_lock(vmf))
&vmf->ptl); return VM_FAULT_RETRY;
return 0; return 0;
} }