x86: define _PAGE_NUMA by reusing software bits on the PMD and PTE levels
_PAGE_NUMA is currently an alias of _PROT_PROTNONE to trap NUMA hinting faults on x86. Care is taken such that _PAGE_NUMA is used only in situations where the VMA flags distinguish between NUMA hinting faults and prot_none faults. This decision was x86-specific and conceptually it is difficult requiring special casing to distinguish between PROTNONE and NUMA ptes based on context. Fundamentally, we only need the _PAGE_NUMA bit to tell the difference between an entry that is really unmapped and a page that is protected for NUMA hinting faults as if the PTE is not present then a fault will be trapped. Swap PTEs on x86-64 use the bits after _PAGE_GLOBAL for the offset. This patch shrinks the maximum possible swap size and uses the bit to uniquely distinguish between NUMA hinting ptes and swap ptes. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Steven Noonan <steven@uplinklabs.net> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
4468dd76f5
commit
c46a7c817e
17
mm/memory.c
17
mm/memory.c
@@ -756,7 +756,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn = pte_pfn(pte);
|
||||
|
||||
if (HAVE_PTE_SPECIAL) {
|
||||
if (likely(!pte_special(pte)))
|
||||
if (likely(!pte_special(pte) || pte_numa(pte)))
|
||||
goto check_pfn;
|
||||
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
|
||||
return NULL;
|
||||
@@ -782,14 +782,15 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
}
|
||||
}
|
||||
|
||||
if (is_zero_pfn(pfn))
|
||||
return NULL;
|
||||
check_pfn:
|
||||
if (unlikely(pfn > highest_memmap_pfn)) {
|
||||
print_bad_pte(vma, addr, pte, NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (is_zero_pfn(pfn))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* NOTE! We still have PageReserved() pages in the page tables.
|
||||
* eg. VDSO mappings can cause them to exist.
|
||||
@@ -1722,13 +1723,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
||||
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
|
||||
|
||||
/*
|
||||
* If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
|
||||
* would be called on PROT_NONE ranges. We must never invoke
|
||||
* handle_mm_fault on PROT_NONE ranges or the NUMA hinting
|
||||
* page faults would unprotect the PROT_NONE ranges if
|
||||
* _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
|
||||
* bitflag. So to avoid that, don't set FOLL_NUMA if
|
||||
* FOLL_FORCE is set.
|
||||
* If FOLL_FORCE is set then do not force a full fault as the hinting
|
||||
* fault information is unrelated to the reference behaviour of a task
|
||||
* using the address space
|
||||
*/
|
||||
if (!(gup_flags & FOLL_FORCE))
|
||||
gup_flags |= FOLL_NUMA;
|
||||
|
Reference in New Issue
Block a user