diff --git a/include/linux/mm.h b/include/linux/mm.h index fe192fbedf3e..e7daa0ab2f56 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1770,25 +1770,37 @@ extern int fixup_user_fault(struct mm_struct *mm, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT extern int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, - unsigned int flags); + unsigned int flags, + struct vm_area_struct **vma); static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, - unsigned int flags) + unsigned int flags, + struct vm_area_struct **vma) { /* * Try speculative page fault for multithreaded user space task only. */ - if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) + if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) { + *vma = NULL; return VM_FAULT_RETRY; - return __handle_speculative_fault(mm, address, flags); + } + return __handle_speculative_fault(mm, address, flags, vma); } +extern bool can_reuse_spf_vma(struct vm_area_struct *vma, + unsigned long address); #else static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, - unsigned int flags) + unsigned int flags, + struct vm_area_struct **vma) { return VM_FAULT_RETRY; } +static inline bool can_reuse_spf_vma(struct vm_area_struct *vma, + unsigned long address) +{ + return false; +} #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ void unmap_mapping_pages(struct address_space *mapping, diff --git a/mm/memory.c b/mm/memory.c index a3ab47ba6bb1..da76cd1d4bb0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4816,13 +4816,22 @@ static inline void mm_account_fault(struct pt_regs *regs, /* This is required by vm_normal_page() */ #error "Speculative page fault handler requires CONFIG_ARCH_HAS_PTE_SPECIAL" #endif - /* * vm_normal_page() adds some processing which should be done while * hodling the mmap_sem. */ + +/* + * Tries to handle the page fault in a speculative way, without grabbing the + * mmap_sem. + * When VM_FAULT_RETRY is returned, the vma pointer is valid and this vma must + * be checked later when the mmap_sem has been grabbed by calling + * can_reuse_spf_vma(). + * This is needed as the returned vma is kept in memory until the call to + * can_reuse_spf_vma() is made. + */ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, - unsigned int flags) + unsigned int flags, struct vm_area_struct **vma) { struct vm_fault vmf = { .address = address, @@ -4830,22 +4839,22 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pgd_t *pgd, pgdval; p4d_t *p4d, p4dval; pud_t pudval; - int seq, ret = VM_FAULT_RETRY; - struct vm_area_struct *vma; + int seq, ret; /* Clear flags that may lead to release the mmap_sem to retry */ flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE); flags |= FAULT_FLAG_SPECULATIVE; - vma = get_vma(mm, address); - if (!vma) - return ret; + *vma = get_vma(mm, address); + if (!*vma) + return VM_FAULT_RETRY; + vmf.vma = *vma; /* rmb <-> seqlock,vma_rb_erase() */ - seq = raw_read_seqcount(&vma->vm_sequence); + seq = raw_read_seqcount(&vmf.vma->vm_sequence); if (seq & 1) { - trace_spf_vma_changed(_RET_IP_, vma, address); - goto out_put; + trace_spf_vma_changed(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } /* @@ -4853,9 +4862,9 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * with the VMA. * This include huge page from hugetlbfs. */ - if (vma->vm_ops) { - trace_spf_vma_notsup(_RET_IP_, vma, address); - goto out_put; + if (vmf.vma->vm_ops) { + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } /* @@ -4863,18 +4872,18 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * because vm_next and vm_prev must be safe. This can't be guaranteed * in the speculative path. */ - if (unlikely(!vma->anon_vma)) { - trace_spf_vma_notsup(_RET_IP_, vma, address); - goto out_put; + if (unlikely(!vmf.vma->anon_vma)) { + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } - vmf.vma_flags = READ_ONCE(vma->vm_flags); - vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot); + vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags); + vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot); /* Can't call userland page fault handler in the speculative path */ if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) { - trace_spf_vma_notsup(_RET_IP_, vma, address); - goto out_put; + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) { @@ -4883,36 +4892,27 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * boundaries but we want to trace it as not supported instead * of changed. */ - trace_spf_vma_notsup(_RET_IP_, vma, address); - goto out_put; + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } - if (address < READ_ONCE(vma->vm_start) - || READ_ONCE(vma->vm_end) <= address) { - trace_spf_vma_changed(_RET_IP_, vma, address); - goto out_put; + if (address < READ_ONCE(vmf.vma->vm_start) + || READ_ONCE(vmf.vma->vm_end) <= address) { + trace_spf_vma_changed(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + if (!arch_vma_access_permitted(vmf.vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, - flags & FAULT_FLAG_REMOTE)) { - trace_spf_vma_access(_RET_IP_, vma, address); - ret = VM_FAULT_SIGSEGV; - goto out_put; - } + flags & FAULT_FLAG_REMOTE)) + goto out_segv; /* This is one is required to check that the VMA has write access set */ if (flags & FAULT_FLAG_WRITE) { - if (unlikely(!(vmf.vma_flags & VM_WRITE))) { - trace_spf_vma_access(_RET_IP_, vma, address); - ret = VM_FAULT_SIGSEGV; - goto out_put; - } - } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) { - trace_spf_vma_access(_RET_IP_, vma, address); - ret = VM_FAULT_SIGSEGV; - goto out_put; - } + if (unlikely(!(vmf.vma_flags & VM_WRITE))) + goto out_segv; + } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) + goto out_segv; #ifdef CONFIG_NUMA struct mempolicy *pol; @@ -4922,13 +4922,13 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * mpol_misplaced() which are not compatible with the *speculative page fault processing. */ - pol = __get_vma_policy(vma, address); + pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); if (!pol) if (pol && pol->mode == MPOL_INTERLEAVE) { - trace_spf_vma_notsup(_RET_IP_, vma, address); - goto out_put; + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } #endif @@ -4990,9 +4990,8 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, vmf.pte = NULL; } - vmf.vma = vma; - vmf.pgoff = linear_page_index(vma, address); - vmf.gfp_mask = __get_fault_gfp_mask(vma); + vmf.pgoff = linear_page_index(vmf.vma, address); + vmf.gfp_mask = __get_fault_gfp_mask(vmf.vma); vmf.sequence = seq; vmf.flags = flags; @@ -5002,16 +5001,22 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * We need to re-validate the VMA after checking the bounds, otherwise * we might have a false positive on the bounds. */ - if (read_seqcount_retry(&vma->vm_sequence, seq)) { - trace_spf_vma_changed(_RET_IP_, vma, address); - goto out_put; + if (read_seqcount_retry(&vmf.vma->vm_sequence, seq)) { + trace_spf_vma_changed(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; } mem_cgroup_enter_user_fault(); ret = handle_pte_fault(&vmf); mem_cgroup_exit_user_fault(); - put_vma(vma); + /* + * If there is no need to retry, don't return the vma to the caller. + */ + if (ret != VM_FAULT_RETRY) { + put_vma(vmf.vma); + *vma = NULL; + } /* * The task may have entered a memcg OOM situation but @@ -5024,9 +5029,35 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, return ret; out_walk: - trace_spf_vma_notsup(_RET_IP_, vma, address); + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); local_irq_enable(); -out_put: + return VM_FAULT_RETRY; + +out_segv: + trace_spf_vma_access(_RET_IP_, vmf.vma, address); + /* + * We don't return VM_FAULT_RETRY so the caller is not expected to + * retrieve the fetched VMA. + */ + put_vma(vmf.vma); + *vma = NULL; + return VM_FAULT_SIGSEGV; +} + +/* + * This is used to know if the vma fetch in the speculative page fault handler + * is still valid when trying the regular fault path while holding the + * mmap_sem. + * The call to put_vma(vma) must be made after checking the vma's fields, as + * the vma may be freed by put_vma(). In such a case it is expected that false + * is returned. + */ +bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) +{ + bool ret; + + ret = !RB_EMPTY_NODE(&vma->vm_rb) && + vma->vm_start <= address && address < vma->vm_end; put_vma(vma); return ret; }