FROMLIST: mm: cache some VMA fields in the vm_fault structure
When handling speculative page fault, the vma->vm_flags and vma->vm_page_prot fields are read once the page table lock is released. So there is no more guarantee that these fields would not change in our back. They will be saved in the vm_fault structure before the VMA is checked for changes. This patch also set the fields in hugetlb_no_page() and __collapse_huge_page_swapin even if it is not need for the callee. Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com> Change-Id: I9821f02ea32ef220b57b8bfd817992bbf71bbb1d Link: https://lore.kernel.org/lkml/1523975611-15978-13-git-send-email-ldufour@linux.vnet.ibm.com/ Bug: 161210518 Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org> Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
This commit is contained in:

committed by
Suren Baghdasaryan

parent
73ab9e34ff
commit
32507b6ff2
@@ -549,6 +549,12 @@ struct vm_fault {
|
||||
* page table to avoid allocation from
|
||||
* atomic context.
|
||||
*/
|
||||
/*
|
||||
* These entries are required when handling speculative page fault.
|
||||
* This way the page handling is done using consistent field values.
|
||||
*/
|
||||
unsigned long vma_flags;
|
||||
pgprot_t vma_page_prot;
|
||||
};
|
||||
|
||||
/* page entry size for vm->huge_fault() */
|
||||
@@ -982,9 +988,9 @@ void free_compound_page(struct page *page);
|
||||
* pte_mkwrite. But get_user_pages can cause write faults for mappings
|
||||
* that do not have writing enabled, when used by access_process_vm.
|
||||
*/
|
||||
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
||||
static inline pte_t maybe_mkwrite(pte_t pte, unsigned long vma_flags)
|
||||
{
|
||||
if (likely(vma->vm_flags & VM_WRITE))
|
||||
if (likely(vma_flags & VM_WRITE))
|
||||
pte = pte_mkwrite(pte);
|
||||
return pte;
|
||||
}
|
||||
|
@@ -2130,7 +2130,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
entry = pte_swp_mkuffd_wp(entry);
|
||||
} else {
|
||||
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
|
||||
entry = maybe_mkwrite(entry, vma);
|
||||
entry = maybe_mkwrite(entry, vma->vm_flags);
|
||||
if (!write)
|
||||
entry = pte_wrprotect(entry);
|
||||
if (!young)
|
||||
|
@@ -4298,6 +4298,8 @@ retry:
|
||||
.vma = vma,
|
||||
.address = haddr,
|
||||
.flags = flags,
|
||||
.vma_flags = vma->vm_flags,
|
||||
.vma_page_prot = vma->vm_page_prot,
|
||||
/*
|
||||
* Hard to debug if it ends up being
|
||||
* used by a callee that assumes
|
||||
|
@@ -1000,6 +1000,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
||||
.flags = FAULT_FLAG_ALLOW_RETRY,
|
||||
.pmd = pmd,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
.vma_flags = vma->vm_flags,
|
||||
.vma_page_prot = vma->vm_page_prot,
|
||||
};
|
||||
|
||||
vmf.pte = pte_offset_map(pmd, address);
|
||||
|
53
mm/memory.c
53
mm/memory.c
@@ -856,7 +856,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
||||
|
||||
/* All done, just insert the new page copy in the child */
|
||||
pte = mk_pte(new_page, dst_vma->vm_page_prot);
|
||||
pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
|
||||
pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma->vm_flags);
|
||||
set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
|
||||
return 0;
|
||||
}
|
||||
@@ -1962,7 +1962,8 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
goto out_unlock;
|
||||
}
|
||||
entry = pte_mkyoung(*pte);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry),
|
||||
vma->vm_flags);
|
||||
if (ptep_set_access_flags(vma, addr, pte, entry, 1))
|
||||
update_mmu_cache(vma, addr, pte);
|
||||
}
|
||||
@@ -1977,7 +1978,7 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
||||
if (mkwrite) {
|
||||
entry = pte_mkyoung(entry);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma->vm_flags);
|
||||
}
|
||||
|
||||
set_pte_at(mm, addr, pte, entry);
|
||||
@@ -2844,7 +2845,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
|
||||
|
||||
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
|
||||
entry = pte_mkyoung(vmf->orig_pte);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
|
||||
if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
|
||||
update_mmu_cache(vma, vmf->address, vmf->pte);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
@@ -2935,9 +2936,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
|
||||
inc_mm_counter_fast(mm, MM_ANONPAGES);
|
||||
}
|
||||
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
|
||||
entry = mk_pte(new_page, vma->vm_page_prot);
|
||||
entry = mk_pte(new_page, vmf->vma_page_prot);
|
||||
entry = pte_sw_mkyoung(entry);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
|
||||
/*
|
||||
* Clear the pte entry and flush it first, before updating the
|
||||
* pte with the new entry. This will avoid a race condition
|
||||
@@ -3001,7 +3002,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
|
||||
* Don't let another task, with possibly unlocked vma,
|
||||
* keep the mlocked page.
|
||||
*/
|
||||
if (page_copied && (vma->vm_flags & VM_LOCKED)) {
|
||||
if (page_copied && (vmf->vma_flags & VM_LOCKED)) {
|
||||
lock_page(old_page); /* LRU manipulation */
|
||||
if (PageMlocked(old_page))
|
||||
munlock_vma_page(old_page);
|
||||
@@ -3036,7 +3037,7 @@ out:
|
||||
*/
|
||||
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
|
||||
{
|
||||
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
|
||||
WARN_ON_ONCE(!(vmf->vma_flags & VM_SHARED));
|
||||
if (!pte_map_lock(vmf))
|
||||
return VM_FAULT_RETRY;
|
||||
/*
|
||||
@@ -3145,7 +3146,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
|
||||
* We should not cow pages in a shared writeable mapping.
|
||||
* Just mark the pages writable and/or call ops->pfn_mkwrite.
|
||||
*/
|
||||
if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
||||
if ((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
|
||||
(VM_WRITE|VM_SHARED))
|
||||
return wp_pfn_shared(vmf);
|
||||
|
||||
@@ -3177,7 +3178,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
|
||||
unlock_page(page);
|
||||
wp_page_reuse(vmf);
|
||||
return VM_FAULT_WRITE;
|
||||
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
|
||||
} else if (unlikely((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
|
||||
(VM_WRITE|VM_SHARED))) {
|
||||
return wp_page_shared(vmf);
|
||||
}
|
||||
@@ -3467,9 +3468,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
|
||||
pte = mk_pte(page, vma->vm_page_prot);
|
||||
pte = mk_pte(page, vmf->vma_page_prot);
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
|
||||
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
||||
pte = maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
|
||||
vmf->flags &= ~FAULT_FLAG_WRITE;
|
||||
ret |= VM_FAULT_WRITE;
|
||||
exclusive = RMAP_EXCLUSIVE;
|
||||
@@ -3495,7 +3496,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
|
||||
swap_free(entry);
|
||||
if (mem_cgroup_swap_full(page) ||
|
||||
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
|
||||
(vmf->vma_flags & VM_LOCKED) || PageMlocked(page))
|
||||
try_to_free_swap(page);
|
||||
unlock_page(page);
|
||||
if (page != swapcache && swapcache) {
|
||||
@@ -3550,7 +3551,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
||||
pte_t entry;
|
||||
|
||||
/* File mapping without ->vm_ops ? */
|
||||
if (vma->vm_flags & VM_SHARED)
|
||||
if (vmf->vma_flags & VM_SHARED)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/*
|
||||
@@ -3574,7 +3575,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
||||
if (!(vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
!mm_forbids_zeropage(vma->vm_mm)) {
|
||||
entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
|
||||
vma->vm_page_prot));
|
||||
vmf->vma_page_prot));
|
||||
if (!pte_map_lock(vmf))
|
||||
return VM_FAULT_RETRY;
|
||||
if (!pte_none(*vmf->pte)) {
|
||||
@@ -3610,9 +3611,9 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
||||
*/
|
||||
__SetPageUptodate(page);
|
||||
|
||||
entry = mk_pte(page, vma->vm_page_prot);
|
||||
entry = mk_pte(page, vmf->vma_page_prot);
|
||||
entry = pte_sw_mkyoung(entry);
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
if (vmf->vma_flags & VM_WRITE)
|
||||
entry = pte_mkwrite(pte_mkdirty(entry));
|
||||
|
||||
if (!pte_map_lock(vmf)) {
|
||||
@@ -3820,7 +3821,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
for (i = 0; i < HPAGE_PMD_NR; i++)
|
||||
flush_icache_page(vma, page + i);
|
||||
|
||||
entry = mk_huge_pmd(page, vma->vm_page_prot);
|
||||
entry = mk_huge_pmd(page, vmf->vma_page_prot);
|
||||
if (write)
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
|
||||
@@ -3892,12 +3893,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
|
||||
}
|
||||
|
||||
flush_icache_page(vma, page);
|
||||
entry = mk_pte(page, vma->vm_page_prot);
|
||||
entry = mk_pte(page, vmf->vma_page_prot);
|
||||
entry = pte_sw_mkyoung(entry);
|
||||
if (write)
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
|
||||
entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
|
||||
/* copy-on-write page */
|
||||
if (write && !(vma->vm_flags & VM_SHARED)) {
|
||||
if (write && !(vmf->vma_flags & VM_SHARED)) {
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, vma, vmf->address, false);
|
||||
lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
@@ -3936,7 +3937,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
|
||||
|
||||
/* Did we COW the page? */
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
!(vmf->vma->vm_flags & VM_SHARED))
|
||||
!(vmf->vma_flags & VM_SHARED))
|
||||
page = vmf->cow_page;
|
||||
else
|
||||
page = vmf->page;
|
||||
@@ -4214,7 +4215,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
|
||||
}
|
||||
} else if (!(vmf->flags & FAULT_FLAG_WRITE))
|
||||
ret = do_read_fault(vmf);
|
||||
else if (!(vma->vm_flags & VM_SHARED))
|
||||
else if (!(vmf->vma_flags & VM_SHARED))
|
||||
ret = do_cow_fault(vmf);
|
||||
else
|
||||
ret = do_shared_fault(vmf);
|
||||
@@ -4271,7 +4272,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
|
||||
* accessible ptes, some can allow access by kernel mode.
|
||||
*/
|
||||
old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
|
||||
pte = pte_modify(old_pte, vma->vm_page_prot);
|
||||
pte = pte_modify(old_pte, vmf->vma_page_prot);
|
||||
pte = pte_mkyoung(pte);
|
||||
if (was_writable)
|
||||
pte = pte_mkwrite(pte);
|
||||
@@ -4305,7 +4306,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
|
||||
* Flag if the page is shared between multiple address spaces. This
|
||||
* is later used when determining whether to group tasks together
|
||||
*/
|
||||
if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
|
||||
if (page_mapcount(page) > 1 && (vmf->vma_flags & VM_SHARED))
|
||||
flags |= TNF_SHARED;
|
||||
|
||||
last_cpupid = page_cpupid_last(page);
|
||||
@@ -4511,6 +4512,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
|
||||
.flags = flags,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
.gfp_mask = __get_fault_gfp_mask(vma),
|
||||
.vma_flags = vma->vm_flags,
|
||||
.vma_page_prot = vma->vm_page_prot,
|
||||
};
|
||||
unsigned int dirty = flags & FAULT_FLAG_WRITE;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
@@ -242,7 +242,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
|
||||
*/
|
||||
entry = pte_to_swp_entry(*pvmw.pte);
|
||||
if (is_write_migration_entry(entry))
|
||||
pte = maybe_mkwrite(pte, vma);
|
||||
pte = maybe_mkwrite(pte, vma->vm_flags);
|
||||
else if (pte_swp_uffd_wp(*pvmw.pte))
|
||||
pte = pte_mkuffd_wp(pte);
|
||||
|
||||
|
Reference in New Issue
Block a user