ANDROID: mm: allow vmas with vm_ops to be speculatively handled

Right now only anonymous page faults are speculatively handled,
thus leaving out a large percentage of faults still requiring to
take mmap_sem. These were left out since there can be fault
handlers mainly in the fs layer which may use vma in unknown ways.
This patch enables speculative fault for ext4, f2fs and shmem. The
feature is disabled by default and enabled via allow_file_spec_access
kernel param.

Bug: 171954515
Change-Id: I0d23ebf299000e4ac5e2c71bc0b7fc9006e98da9
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon
2021-03-18 15:20:17 +05:30
committed by Todd Kjos
parent 77e791866b
commit 35eacb5c87
10 changed files with 170 additions and 37 deletions

View File

@@ -2638,7 +2638,7 @@ out:
return ret;
}
static bool pte_map_lock(struct vm_fault *vmf)
static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
{
bool ret = false;
pte_t *pte;
@@ -2647,12 +2647,6 @@ static bool pte_map_lock(struct vm_fault *vmf)
pmd_t pmdval;
#endif
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
return true;
}
/*
* The first vma_has_changed() guarantees the page-tables are still
* valid, having IRQs disabled ensures they stay around, hence the
@@ -2662,7 +2656,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
*/
local_irq_disable();
if (vma_has_changed(vmf)) {
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
@@ -2673,7 +2667,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
*/
pmdval = READ_ONCE(*vmf->pmd);
if (!pmd_same(pmdval, vmf->orig_pmd)) {
trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_pmd_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
#endif
@@ -2686,16 +2680,16 @@ static bool pte_map_lock(struct vm_fault *vmf)
* Since we are in a speculative patch, accept it could fail
*/
ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
pte = pte_offset_map(vmf->pmd, vmf->address);
pte = pte_offset_map(vmf->pmd, addr);
if (unlikely(!spin_trylock(ptl))) {
pte_unmap(pte);
trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
trace_spf_pte_lock(_RET_IP_, vmf->vma, addr);
goto out;
}
if (vma_has_changed(vmf)) {
pte_unmap_unlock(pte, ptl);
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
@@ -2706,6 +2700,82 @@ out:
local_irq_enable();
return ret;
}
static bool pte_map_lock(struct vm_fault *vmf)
{
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
return true;
}
return __pte_map_lock_speculative(vmf, vmf->address);
}
bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
{
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
addr, &vmf->ptl);
return true;
}
return __pte_map_lock_speculative(vmf, addr);
}
static bool __read_mostly allow_file_spec_access;
static int __init allow_file_spec_access_setup(char *str)
{
allow_file_spec_access = true;
return 1;
}
__setup("allow_file_spec_access", allow_file_spec_access_setup);
static bool vmf_allows_speculation(struct vm_fault *vmf)
{
if (vma_is_anonymous(vmf->vma)) {
/*
* __anon_vma_prepare() requires the mmap_sem to be held
* because vm_next and vm_prev must be safe. This can't be
* guaranteed in the speculative path.
*/
if (!vmf->vma->anon_vma) {
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
return true;
}
if (!allow_file_spec_access) {
/*
* Can't call vm_ops service has we don't know what they would
* do with the VMA.
* This include huge page from hugetlbfs.
*/
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
if (!(vmf->vma->vm_flags & VM_SHARED) &&
(vmf->flags & FAULT_FLAG_WRITE) &&
!vmf->vma->anon_vma) {
/*
* non-anonymous private COW without anon_vma.
* See above.
*/
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
if (vmf->vma->vm_ops->allow_speculation &&
vmf->vma->vm_ops->allow_speculation()) {
return true;
}
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
#else
static inline bool pte_spinlock(struct vm_fault *vmf)
{
@@ -2720,6 +2790,18 @@ static inline bool pte_map_lock(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
return true;
}
inline bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
{
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
addr, &vmf->ptl);
return true;
}
static inline bool vmf_allows_speculation(struct vm_fault *vmf)
{
return false;
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
/*
@@ -4496,6 +4578,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{
pte_t entry;
vm_fault_t ret = 0;
if (unlikely(pmd_none(*vmf->pmd))) {
/*
@@ -4559,7 +4642,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
if (!vmf->pte) {
if (vma_is_anonymous(vmf->vma))
return do_anonymous_page(vmf);
else if (vmf->flags & FAULT_FLAG_SPECULATIVE)
else if ((vmf->flags & FAULT_FLAG_SPECULATIVE) &&
!vmf_allows_speculation(vmf))
return VM_FAULT_RETRY;
else
return do_fault(vmf);
@@ -4591,6 +4675,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
/* Skip spurious TLB flush for retried page fault */
if (vmf->flags & FAULT_FLAG_TRIED)
goto unlock;
if (vmf->flags & FAULT_FLAG_SPECULATIVE)
ret = VM_FAULT_RETRY;
/*
* This is needed only for protection faults but the arch code
* is not yet telling us if this is a protection fault or not.
@@ -4602,7 +4688,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
}
unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
return ret;
}
/*
@@ -4794,6 +4880,7 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
.pgoff = linear_page_index(vma, address),
.vma = vma,
.gfp_mask = __get_fault_gfp_mask(vma),
.flags = flags,
};
#ifdef CONFIG_NUMA
struct mempolicy *pol;
@@ -4815,25 +4902,8 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
return VM_FAULT_RETRY;
}
/*
* Can't call vm_ops service has we don't know what they would do
* with the VMA.
* This include huge page from hugetlbfs.
*/
if (vmf.vma->vm_ops) {
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
if (!vmf_allows_speculation(&vmf))
return VM_FAULT_RETRY;
}
/*
* __anon_vma_prepare() requires the mmap_sem to be held
* because vm_next and vm_prev must be safe. This can't be guaranteed
* in the speculative path.
*/
if (unlikely(!vmf.vma->anon_vma)) {
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
return VM_FAULT_RETRY;
}
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
@@ -4964,8 +5034,12 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
ret = handle_pte_fault(&vmf);
mem_cgroup_exit_user_fault();
if (ret != VM_FAULT_RETRY)
count_vm_event(SPECULATIVE_PGFAULT);
if (ret != VM_FAULT_RETRY) {
if (vma_is_anonymous(vmf.vma))
count_vm_event(SPECULATIVE_PGFAULT_ANON);
else
count_vm_event(SPECULATIVE_PGFAULT_FILE);
}
/*
* The task may have entered a memcg OOM situation but