x86: Convert the rest of the code to support p4d_t
This patch converts x86 to use proper folding of a new (fifth) page table level with <asm-generic/pgtable-nop4d.h>. That's a bit of a kitchen sink patch, but I don't see how to split it further without hurting bisectability. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170317185515.8636-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
||||
unsigned long address;
|
||||
|
||||
for (address = start; address <= end; address += PGDIR_SIZE) {
|
||||
const pgd_t *pgd_ref = pgd_offset_k(address);
|
||||
pgd_t *pgd_ref = pgd_offset_k(address);
|
||||
const p4d_t *p4d_ref;
|
||||
struct page *page;
|
||||
|
||||
if (pgd_none(*pgd_ref))
|
||||
/*
|
||||
* With folded p4d, pgd_none() is always false, we need to
|
||||
* handle synchonization on p4d level.
|
||||
*/
|
||||
BUILD_BUG_ON(pgd_none(*pgd_ref));
|
||||
p4d_ref = p4d_offset(pgd_ref, address);
|
||||
|
||||
if (p4d_none(*p4d_ref))
|
||||
continue;
|
||||
|
||||
spin_lock(&pgd_lock);
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
spinlock_t *pgt_lock;
|
||||
|
||||
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
/* the pgt_lock only for Xen */
|
||||
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||
spin_lock(pgt_lock);
|
||||
|
||||
if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
|
||||
BUG_ON(pgd_page_vaddr(*pgd)
|
||||
!= pgd_page_vaddr(*pgd_ref));
|
||||
if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
|
||||
BUG_ON(p4d_page_vaddr(*p4d)
|
||||
!= p4d_page_vaddr(*p4d_ref));
|
||||
|
||||
if (pgd_none(*pgd))
|
||||
set_pgd(pgd, *pgd_ref);
|
||||
if (p4d_none(*p4d))
|
||||
set_p4d(p4d, *p4d_ref);
|
||||
|
||||
spin_unlock(pgt_lock);
|
||||
}
|
||||
@@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
|
||||
static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
|
||||
{
|
||||
if (pgd_none(*pgd)) {
|
||||
pud_t *pud = (pud_t *)spp_getpage();
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
if (pud != pud_offset(pgd, 0))
|
||||
p4d_t *p4d = (p4d_t *)spp_getpage();
|
||||
pgd_populate(&init_mm, pgd, p4d);
|
||||
if (p4d != p4d_offset(pgd, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
|
||||
pud, pud_offset(pgd, 0));
|
||||
p4d, p4d_offset(pgd, 0));
|
||||
}
|
||||
return pud_offset(pgd, vaddr);
|
||||
return p4d_offset(pgd, vaddr);
|
||||
}
|
||||
|
||||
static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
|
||||
{
|
||||
if (p4d_none(*p4d)) {
|
||||
pud_t *pud = (pud_t *)spp_getpage();
|
||||
p4d_populate(&init_mm, p4d, pud);
|
||||
if (pud != pud_offset(p4d, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
||||
pud, pud_offset(p4d, 0));
|
||||
}
|
||||
return pud_offset(p4d, vaddr);
|
||||
}
|
||||
|
||||
static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
|
||||
@@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
|
||||
pmd_t *pmd = (pmd_t *) spp_getpage();
|
||||
pud_populate(&init_mm, pud, pmd);
|
||||
if (pmd != pmd_offset(pud, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
||||
printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
|
||||
pmd, pmd_offset(pud, 0));
|
||||
}
|
||||
return pmd_offset(pud, vaddr);
|
||||
@@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
|
||||
pte_t *pte = (pte_t *) spp_getpage();
|
||||
pmd_populate_kernel(&init_mm, pmd, pte);
|
||||
if (pte != pte_offset_kernel(pmd, 0))
|
||||
printk(KERN_ERR "PAGETABLE BUG #02!\n");
|
||||
printk(KERN_ERR "PAGETABLE BUG #03!\n");
|
||||
}
|
||||
return pte_offset_kernel(pmd, vaddr);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
||||
static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
pud = pud_page + pud_index(vaddr);
|
||||
pmd = fill_pmd(pud, vaddr);
|
||||
pte = fill_pte(pmd, vaddr);
|
||||
pmd_t *pmd = fill_pmd(pud, vaddr);
|
||||
pte_t *pte = fill_pte(pmd, vaddr);
|
||||
|
||||
set_pte(pte, new_pte);
|
||||
|
||||
@@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
||||
__flush_tlb_one(vaddr);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
p4d_t *p4d = p4d_page + p4d_index(vaddr);
|
||||
pud_t *pud = fill_pud(p4d, vaddr);
|
||||
|
||||
__set_pte_vaddr(pud, vaddr, new_pte);
|
||||
}
|
||||
|
||||
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
||||
{
|
||||
pud_t *pud = pud_page + pud_index(vaddr);
|
||||
|
||||
__set_pte_vaddr(pud, vaddr, new_pte);
|
||||
}
|
||||
|
||||
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud_page;
|
||||
p4d_t *p4d_page;
|
||||
|
||||
pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
|
||||
|
||||
@@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
||||
"PGD FIXMAP MISSING, it should be setup in head.S!\n");
|
||||
return;
|
||||
}
|
||||
pud_page = (pud_t*)pgd_page_vaddr(*pgd);
|
||||
set_pte_vaddr_pud(pud_page, vaddr, pteval);
|
||||
|
||||
p4d_page = p4d_offset(pgd, 0);
|
||||
set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
|
||||
}
|
||||
|
||||
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
pud = fill_pud(pgd, vaddr);
|
||||
p4d = fill_p4d(pgd, vaddr);
|
||||
pud = fill_pud(p4d, vaddr);
|
||||
return fill_pmd(pud, vaddr);
|
||||
}
|
||||
|
||||
@@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
|
||||
enum page_cache_mode cache)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pgprot_t prot;
|
||||
@@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
|
||||
for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
|
||||
pgd = pgd_offset_k((unsigned long)__va(phys));
|
||||
if (pgd_none(*pgd)) {
|
||||
pud = (pud_t *) spp_getpage();
|
||||
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
|
||||
p4d = (p4d_t *) spp_getpage();
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
|
||||
_PAGE_USER));
|
||||
}
|
||||
pud = pud_offset(pgd, (unsigned long)__va(phys));
|
||||
p4d = p4d_offset(pgd, (unsigned long)__va(phys));
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = (pud_t *) spp_getpage();
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
|
||||
_PAGE_USER));
|
||||
}
|
||||
pud = pud_offset(p4d, (unsigned long)__va(phys));
|
||||
if (pud_none(*pud)) {
|
||||
pmd = (pmd_t *) spp_getpage();
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
|
||||
@@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
|
||||
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
|
||||
pgd_t *pgd = pgd_offset_k(vaddr);
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
|
||||
|
||||
if (pgd_val(*pgd)) {
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
BUILD_BUG_ON(pgd_none(*pgd));
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
if (p4d_val(*p4d)) {
|
||||
pud = (pud_t *)p4d_page_vaddr(*p4d);
|
||||
paddr_last = phys_pud_init(pud, __pa(vaddr),
|
||||
__pa(vaddr_end),
|
||||
page_size_mask);
|
||||
@@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
|
||||
page_size_mask);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_populate(&init_mm, pgd, pud);
|
||||
p4d_populate(&init_mm, p4d, pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pgd_changed = true;
|
||||
}
|
||||
@@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
|
||||
{
|
||||
pud_t *pud;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
pud = pud_start + i;
|
||||
if (!pud_none(*pud))
|
||||
return;
|
||||
}
|
||||
|
||||
/* free a pud talbe */
|
||||
free_pagetable(p4d_page(*p4d), 0);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
p4d_clear(p4d);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
@@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
||||
update_page_count(PG_LEVEL_1G, -pages);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pud_t *pud_base;
|
||||
p4d_t *p4d;
|
||||
|
||||
p4d = p4d_start + p4d_index(addr);
|
||||
for (; addr < end; addr = next, p4d++) {
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
||||
if (!p4d_present(*p4d))
|
||||
continue;
|
||||
|
||||
BUILD_BUG_ON(p4d_large(*p4d));
|
||||
|
||||
pud_base = (pud_t *)p4d_page_vaddr(*p4d);
|
||||
remove_pud_table(pud_base, addr, next, direct);
|
||||
free_pud_table(pud_base, p4d);
|
||||
}
|
||||
|
||||
if (direct)
|
||||
update_page_count(PG_LEVEL_512G, -pages);
|
||||
}
|
||||
|
||||
/* start and end are both virtual address. */
|
||||
static void __meminit
|
||||
remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
@@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
unsigned long next;
|
||||
unsigned long addr;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d;
|
||||
|
||||
for (addr = start; addr < end; addr = next) {
|
||||
next = pgd_addr_end(addr, end);
|
||||
@@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
if (!pgd_present(*pgd))
|
||||
continue;
|
||||
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
remove_pud_table(pud, addr, next, direct);
|
||||
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
|
||||
remove_p4d_table(p4d, addr, next, direct);
|
||||
}
|
||||
|
||||
flush_tlb_all();
|
||||
@@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
|
||||
{
|
||||
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
@@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
|
||||
if (pgd_none(*pgd))
|
||||
return 0;
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (p4d_none(*p4d))
|
||||
return 0;
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (pud_none(*pud))
|
||||
return 0;
|
||||
|
||||
@@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
||||
unsigned long addr;
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
@@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
||||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pud = vmemmap_pud_populate(pgd, addr, node);
|
||||
p4d = vmemmap_p4d_populate(pgd, addr, node);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
|
||||
pud = vmemmap_pud_populate(p4d, addr, node);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
unsigned long end = (unsigned long)(start_page + size);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
unsigned int nr_pages;
|
||||
@@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
}
|
||||
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (p4d_none(*p4d)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
}
|
||||
get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (pud_none(*pud)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
|
Reference in New Issue
Block a user