Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main changes in this cycle were: - Continued work to add support for 5-level paging provided by future Intel CPUs. In particular we switch the x86 GUP code to the generic implementation. (Kirill A. Shutemov) - Continued work to add PCID CPU support to native kernels as well. In this round most of the focus is on reworking/refreshing the TLB flush infrastructure for the upcoming PCID changes. (Andy Lutomirski)" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits) x86/mm: Delete a big outdated comment about TLB flushing x86/mm: Don't reenter flush_tlb_func_common() x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging x86/ftrace: Exclude functions in head64.c from function-tracing x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap x86/mm: Remove reset_lazy_tlbstate() x86/ldt: Simplify the LDT switching logic x86/boot/64: Put __startup_64() into .head.text x86/mm: Add support for 5-level paging for KASLR x86/mm: Make kernel_physical_mapping_init() support 5-level paging x86/mm: Add sync_global_pgds() for configuration with 5-level paging x86/boot/64: Add support of additional page table level during early boot x86/boot/64: Rename init_level4_pgt and early_level4_pgt x86/boot/64: Rewrite startup_64() in C x86/boot/compressed: Enable 5-level paging during decompression stage x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations x86/boot/efi: Cleanup initialization of GDT entries x86/asm: Fix comment in return_from_SYSCALL_64() x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation ...
This commit is contained in:
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
|
||||
memset((char *)gdt->address, 0x0, gdt->size);
|
||||
desc = (struct desc_struct *)gdt->address;
|
||||
|
||||
/* The first GDT is a dummy and the second is unused. */
|
||||
desc += 2;
|
||||
/* The first GDT is a dummy. */
|
||||
desc++;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
/* __KERNEL32_CS */
|
||||
desc->limit0 = 0xffff;
|
||||
desc->base0 = 0x0000;
|
||||
desc->base1 = 0x0000;
|
||||
desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
|
||||
desc->s = DESC_TYPE_CODE_DATA;
|
||||
desc->dpl = 0;
|
||||
desc->p = 1;
|
||||
desc->limit = 0xf;
|
||||
desc->avl = 0;
|
||||
desc->l = 0;
|
||||
desc->d = SEG_OP_SIZE_32BIT;
|
||||
desc->g = SEG_GRANULARITY_4KB;
|
||||
desc->base2 = 0x00;
|
||||
desc++;
|
||||
} else {
|
||||
/* Second entry is unused on 32-bit */
|
||||
desc++;
|
||||
}
|
||||
|
||||
/* __KERNEL_CS */
|
||||
desc->limit0 = 0xffff;
|
||||
desc->base0 = 0x0000;
|
||||
desc->base1 = 0x0000;
|
||||
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
|
||||
desc->p = 1;
|
||||
desc->limit = 0xf;
|
||||
desc->avl = 0;
|
||||
desc->l = 0;
|
||||
desc->d = SEG_OP_SIZE_32BIT;
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
desc->l = 1;
|
||||
desc->d = 0;
|
||||
} else {
|
||||
desc->l = 0;
|
||||
desc->d = SEG_OP_SIZE_32BIT;
|
||||
}
|
||||
desc->g = SEG_GRANULARITY_4KB;
|
||||
desc->base2 = 0x00;
|
||||
|
||||
desc++;
|
||||
|
||||
/* __KERNEL_DS */
|
||||
desc->limit0 = 0xffff;
|
||||
desc->base0 = 0x0000;
|
||||
desc->base1 = 0x0000;
|
||||
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
|
||||
desc->d = SEG_OP_SIZE_32BIT;
|
||||
desc->g = SEG_GRANULARITY_4KB;
|
||||
desc->base2 = 0x00;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Task segment value */
|
||||
desc++;
|
||||
desc->limit0 = 0x0000;
|
||||
desc->base0 = 0x0000;
|
||||
desc->base1 = 0x0000;
|
||||
desc->type = SEG_TYPE_TSS;
|
||||
desc->s = 0;
|
||||
desc->dpl = 0;
|
||||
desc->p = 1;
|
||||
desc->limit = 0x0;
|
||||
desc->avl = 0;
|
||||
desc->l = 0;
|
||||
desc->d = 0;
|
||||
desc->g = SEG_GRANULARITY_4KB;
|
||||
desc->base2 = 0x00;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
/* Task segment value */
|
||||
desc->limit0 = 0x0000;
|
||||
desc->base0 = 0x0000;
|
||||
desc->base1 = 0x0000;
|
||||
desc->type = SEG_TYPE_TSS;
|
||||
desc->s = 0;
|
||||
desc->dpl = 0;
|
||||
desc->p = 1;
|
||||
desc->limit = 0x0;
|
||||
desc->avl = 0;
|
||||
desc->l = 0;
|
||||
desc->d = 0;
|
||||
desc->g = SEG_GRANULARITY_4KB;
|
||||
desc->base2 = 0x00;
|
||||
desc++;
|
||||
}
|
||||
|
||||
asm volatile("cli");
|
||||
asm volatile ("lgdt %0" : : "m" (*gdt));
|
||||
|
@@ -346,6 +346,48 @@ preferred_addr:
|
||||
/* Set up the stack */
|
||||
leaq boot_stack_end(%rbx), %rsp
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/* Check if 5-level paging has already enabled */
|
||||
movq %cr4, %rax
|
||||
testl $X86_CR4_LA57, %eax
|
||||
jnz lvl5
|
||||
|
||||
/*
|
||||
* At this point we are in long mode with 4-level paging enabled,
|
||||
* but we want to enable 5-level paging.
|
||||
*
|
||||
* The problem is that we cannot do it directly. Setting LA57 in
|
||||
* long mode would trigger #GP. So we need to switch off long mode
|
||||
* first.
|
||||
*
|
||||
* NOTE: This is not going to work if bootloader put us above 4G
|
||||
* limit.
|
||||
*
|
||||
* The first step is go into compatibility mode.
|
||||
*/
|
||||
|
||||
/* Clear additional page table */
|
||||
leaq lvl5_pgtable(%rbx), %rdi
|
||||
xorq %rax, %rax
|
||||
movq $(PAGE_SIZE/8), %rcx
|
||||
rep stosq
|
||||
|
||||
/*
|
||||
* Setup current CR3 as the first and only entry in a new top level
|
||||
* page table.
|
||||
*/
|
||||
movq %cr3, %rdi
|
||||
leaq 0x7 (%rdi), %rax
|
||||
movq %rax, lvl5_pgtable(%rbx)
|
||||
|
||||
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
|
||||
pushq $__KERNEL32_CS
|
||||
leaq compatible_mode(%rip), %rax
|
||||
pushq %rax
|
||||
lretq
|
||||
lvl5:
|
||||
#endif
|
||||
|
||||
/* Zero EFLAGS */
|
||||
pushq $0
|
||||
popfq
|
||||
@@ -429,6 +471,44 @@ relocated:
|
||||
jmp *%rax
|
||||
|
||||
.code32
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
compatible_mode:
|
||||
/* Setup data and stack segments */
|
||||
movl $__KERNEL_DS, %eax
|
||||
movl %eax, %ds
|
||||
movl %eax, %ss
|
||||
|
||||
/* Disable paging */
|
||||
movl %cr0, %eax
|
||||
btrl $X86_CR0_PG_BIT, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Point CR3 to 5-level paging */
|
||||
leal lvl5_pgtable(%ebx), %eax
|
||||
movl %eax, %cr3
|
||||
|
||||
/* Enable PAE and LA57 mode */
|
||||
movl %cr4, %eax
|
||||
orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
/* Calculate address we are running at */
|
||||
call 1f
|
||||
1: popl %edi
|
||||
subl $1b, %edi
|
||||
|
||||
/* Prepare stack for far return to Long Mode */
|
||||
pushl $__KERNEL_CS
|
||||
leal lvl5(%edi), %eax
|
||||
push %eax
|
||||
|
||||
/* Enable paging back */
|
||||
movl $(X86_CR0_PG | X86_CR0_PE), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
lret
|
||||
#endif
|
||||
|
||||
no_longmode:
|
||||
/* This isn't an x86-64 CPU so hang */
|
||||
1:
|
||||
@@ -442,7 +522,7 @@ gdt:
|
||||
.word gdt_end - gdt
|
||||
.long gdt
|
||||
.word 0
|
||||
.quad 0x0000000000000000 /* NULL descriptor */
|
||||
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
|
||||
.quad 0x00af9a000000ffff /* __KERNEL_CS */
|
||||
.quad 0x00cf92000000ffff /* __KERNEL_DS */
|
||||
.quad 0x0080890000000000 /* TS descriptor */
|
||||
@@ -486,3 +566,7 @@ boot_stack_end:
|
||||
.balign 4096
|
||||
pgtable:
|
||||
.fill BOOT_PGT_SIZE, 1, 0
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
lvl5_pgtable:
|
||||
.fill PAGE_SIZE, 1, 0
|
||||
#endif
|
||||
|
@@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context)
|
||||
static struct alloc_pgt_data pgt_data;
|
||||
|
||||
/* The top level page table entry pointer. */
|
||||
static unsigned long level4p;
|
||||
static unsigned long top_level_pgt;
|
||||
|
||||
/*
|
||||
* Mapping information structure passed to kernel_ident_mapping_init().
|
||||
@@ -91,9 +91,15 @@ void initialize_identity_maps(void)
|
||||
* If we came here via startup_32(), cr3 will be _pgtable already
|
||||
* and we must append to the existing area instead of entirely
|
||||
* overwriting it.
|
||||
*
|
||||
* With 5-level paging, we use '_pgtable' to allocate the p4d page table,
|
||||
* the top-level page table is allocated separately.
|
||||
*
|
||||
* p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
|
||||
* cases. On 4-level paging it's equal to 'top_level_pgt'.
|
||||
*/
|
||||
level4p = read_cr3();
|
||||
if (level4p == (unsigned long)_pgtable) {
|
||||
top_level_pgt = read_cr3_pa();
|
||||
if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
|
||||
debug_putstr("booted via startup_32()\n");
|
||||
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
|
||||
pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
|
||||
@@ -103,7 +109,7 @@ void initialize_identity_maps(void)
|
||||
pgt_data.pgt_buf = _pgtable;
|
||||
pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
|
||||
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
|
||||
level4p = (unsigned long)alloc_pgt_page(&pgt_data);
|
||||
top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
|
||||
return;
|
||||
|
||||
/* Build the mapping. */
|
||||
kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
|
||||
kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
|
||||
start, end);
|
||||
}
|
||||
|
||||
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
|
||||
*/
|
||||
void finalize_identity_maps(void)
|
||||
{
|
||||
write_cr3(level4p);
|
||||
write_cr3(top_level_pgt);
|
||||
}
|
||||
|
Reference in New Issue
Block a user