x86: Convert the rest of the code to support p4d_t

This patch converts x86 to use proper folding of a new (fifth) page table level
with <asm-generic/pgtable-nop4d.h>.

That's a bit of a kitchen sink patch, but I don't see how to split it further
without hurting bisectability.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170317185515.8636-7-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Kirill A. Shutemov
2017-03-17 21:55:15 +03:00
committed by Ingo Molnar
부모 907cd43902
커밋 f2a6a70501
11개의 변경된 파일403개의 추가작업 그리고 143개의 파일을 삭제

파일 보기

@@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
unsigned long address;
for (address = start; address <= end; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
pgd_t *pgd_ref = pgd_offset_k(address);
const p4d_t *p4d_ref;
struct page *page;
if (pgd_none(*pgd_ref))
/*
* With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level.
*/
BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, address);
if (p4d_none(*p4d_ref))
continue;
spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
p4d_t *p4d;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
p4d = p4d_offset(pgd, address);
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
BUG_ON(p4d_page_vaddr(*p4d)
!= p4d_page_vaddr(*p4d_ref));
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
if (p4d_none(*p4d))
set_p4d(p4d, *p4d_ref);
spin_unlock(pgt_lock);
}
@@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
return ptr;
}
static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
{
if (pgd_none(*pgd)) {
pud_t *pud = (pud_t *)spp_getpage();
pgd_populate(&init_mm, pgd, pud);
if (pud != pud_offset(pgd, 0))
p4d_t *p4d = (p4d_t *)spp_getpage();
pgd_populate(&init_mm, pgd, p4d);
if (p4d != p4d_offset(pgd, 0))
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
pud, pud_offset(pgd, 0));
p4d, p4d_offset(pgd, 0));
}
return pud_offset(pgd, vaddr);
return p4d_offset(pgd, vaddr);
}
static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
{
if (p4d_none(*p4d)) {
pud_t *pud = (pud_t *)spp_getpage();
p4d_populate(&init_mm, p4d, pud);
if (pud != pud_offset(p4d, 0))
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
pud, pud_offset(p4d, 0));
}
return pud_offset(p4d, vaddr);
}
static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
@@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
pmd_t *pmd = (pmd_t *) spp_getpage();
pud_populate(&init_mm, pud, pmd);
if (pmd != pmd_offset(pud, 0))
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
pmd, pmd_offset(pud, 0));
}
return pmd_offset(pud, vaddr);
@@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
pte_t *pte = (pte_t *) spp_getpage();
pmd_populate_kernel(&init_mm, pmd, pte);
if (pte != pte_offset_kernel(pmd, 0))
printk(KERN_ERR "PAGETABLE BUG #02!\n");
printk(KERN_ERR "PAGETABLE BUG #03!\n");
}
return pte_offset_kernel(pmd, vaddr);
}
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
{
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pud = pud_page + pud_index(vaddr);
pmd = fill_pmd(pud, vaddr);
pte = fill_pte(pmd, vaddr);
pmd_t *pmd = fill_pmd(pud, vaddr);
pte_t *pte = fill_pte(pmd, vaddr);
set_pte(pte, new_pte);
@@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
__flush_tlb_one(vaddr);
}
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
{
p4d_t *p4d = p4d_page + p4d_index(vaddr);
pud_t *pud = fill_pud(p4d, vaddr);
__set_pte_vaddr(pud, vaddr, new_pte);
}
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
{
pud_t *pud = pud_page + pud_index(vaddr);
__set_pte_vaddr(pud, vaddr, new_pte);
}
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
pud_t *pud_page;
p4d_t *p4d_page;
pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
@@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
"PGD FIXMAP MISSING, it should be setup in head.S!\n");
return;
}
pud_page = (pud_t*)pgd_page_vaddr(*pgd);
set_pte_vaddr_pud(pud_page, vaddr, pteval);
p4d_page = p4d_offset(pgd, 0);
set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
}
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pgd = pgd_offset_k(vaddr);
pud = fill_pud(pgd, vaddr);
p4d = fill_p4d(pgd, vaddr);
pud = fill_pud(p4d, vaddr);
return fill_pmd(pud, vaddr);
}
@@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
enum page_cache_mode cache)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgprot_t prot;
@@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
pgd = pgd_offset_k((unsigned long)__va(phys));
if (pgd_none(*pgd)) {
pud = (pud_t *) spp_getpage();
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
p4d = (p4d_t *) spp_getpage();
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
_PAGE_USER));
}
pud = pud_offset(pgd, (unsigned long)__va(phys));
p4d = p4d_offset(pgd, (unsigned long)__va(phys));
if (p4d_none(*p4d)) {
pud = (pud_t *) spp_getpage();
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
_PAGE_USER));
}
pud = pud_offset(p4d, (unsigned long)__va(phys));
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
@@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
pgd_t *pgd = pgd_offset_k(vaddr);
p4d_t *p4d;
pud_t *pud;
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
if (pgd_val(*pgd)) {
pud = (pud_t *)pgd_page_vaddr(*pgd);
BUILD_BUG_ON(pgd_none(*pgd));
p4d = p4d_offset(pgd, vaddr);
if (p4d_val(*p4d)) {
pud = (pud_t *)p4d_page_vaddr(*p4d);
paddr_last = phys_pud_init(pud, __pa(vaddr),
__pa(vaddr_end),
page_size_mask);
@@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
pgd_populate(&init_mm, pgd, pud);
p4d_populate(&init_mm, p4d, pud);
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
@@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
spin_unlock(&init_mm.page_table_lock);
}
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
pud_t *pud;
int i;
for (i = 0; i < PTRS_PER_PUD; i++) {
pud = pud_start + i;
if (!pud_none(*pud))
return;
}
/* free a pud talbe */
free_pagetable(p4d_page(*p4d), 0);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock);
}
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
bool direct)
@@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
update_page_count(PG_LEVEL_1G, -pages);
}
static void __meminit
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
bool direct)
{
unsigned long next, pages = 0;
pud_t *pud_base;
p4d_t *p4d;
p4d = p4d_start + p4d_index(addr);
for (; addr < end; addr = next, p4d++) {
next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d))
continue;
BUILD_BUG_ON(p4d_large(*p4d));
pud_base = (pud_t *)p4d_page_vaddr(*p4d);
remove_pud_table(pud_base, addr, next, direct);
free_pud_table(pud_base, p4d);
}
if (direct)
update_page_count(PG_LEVEL_512G, -pages);
}
/* start and end are both virtual address. */
static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct)
@@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
unsigned long next;
unsigned long addr;
pgd_t *pgd;
pud_t *pud;
p4d_t *p4d;
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
@@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
if (!pgd_present(*pgd))
continue;
pud = (pud_t *)pgd_page_vaddr(*pgd);
remove_pud_table(pud, addr, next, direct);
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
remove_p4d_table(p4d, addr, next, direct);
}
flush_tlb_all();
@@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
if (pgd_none(*pgd))
return 0;
pud = pud_offset(pgd, addr);
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d))
return 0;
pud = pud_offset(p4d, addr);
if (pud_none(*pud))
return 0;
@@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
unsigned long addr;
unsigned long next;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (!pgd)
return -ENOMEM;
pud = vmemmap_pud_populate(pgd, addr, node);
p4d = vmemmap_p4d_populate(pgd, addr, node);
if (!p4d)
return -ENOMEM;
pud = vmemmap_pud_populate(p4d, addr, node);
if (!pud)
return -ENOMEM;
@@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
unsigned long end = (unsigned long)(start_page + size);
unsigned long next;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
unsigned int nr_pages;
@@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
}
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
pud = pud_offset(pgd, addr);
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
continue;
}
get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
continue;