Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:

 - Extend the memmap= boot parameter syntax to allow the redeclaration
   and dropping of existing ranges, and to support all e820 range types
   (Jan H. Schönherr)

 - Improve the W+X boot time security checks to remove false positive
   warnings on Xen (Jan Beulich)

 - Support booting as Xen PVH guest (Juergen Gross)

 - Improved 5-level paging (LA57) support, in particular it's possible
   now to have a single kernel image for both 4-level and 5-level
   hardware (Kirill A. Shutemov)

 - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)

 - Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
   (Kirill A. Shutemov)

 - Improved Intel-MID support (Andy Shevchenko)

 - Show EFI page tables in page_tables debug files (Andy Lutomirski)

 - ... plus misc fixes and smaller cleanups

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
  x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
  x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
  x86/mm: Update comment in detect_tme() regarding x86_phys_bits
  x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
  x86/mm: Remove pointless checks in vmalloc_fault
  x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
  ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
  ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
  x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
  x86/pconfig: Detect PCONFIG targets
  x86/tme: Detect if TME and MKTME is activated by BIOS
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Make sure we have a 32-bit code segment
  x86/mm: Do not use paravirtualized calls in native_set_p4d()
  kdump, vmcoreinfo: Export pgtable_l5_enabled value
  x86/boot/compressed/64: Prepare new top-level page table for trampoline
  x86/boot/compressed/64: Set up trampoline memory
  x86/boot/compressed/64: Save and restore trampoline memory
  ...
This commit is contained in:
Linus Torvalds
2018-04-02 15:45:30 -07:00
68 changed files with 1658 additions and 1017 deletions

View File

@@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
*
* We initialize the Hardware-reduced ACPI model here:
*/
void __init acpi_generic_reduced_hw_init(void)
{
/*
* Override x86_init functions and bypass legacy PIC in
* hardware reduced ACPI mode.
*/
x86_init.timers.timer_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
legacy_pic = &null_legacy_pic;
}
static void __init acpi_reduced_hw_init(void)
{
if (acpi_gbl_reduced_hardware) {
/*
* Override x86_init functions and bypass legacy pic
* in Hardware-reduced ACPI mode
*/
x86_init.timers.timer_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
legacy_pic = &null_legacy_pic;
}
if (acpi_gbl_reduced_hardware)
x86_init.acpi.reduced_hw_early_init();
}
/*

View File

@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o

View File

@@ -509,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
}
}
#define MSR_IA32_TME_ACTIVATE 0x982
/* Helpers to access TME_ACTIVATE MSR */
#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
#define TME_ACTIVATE_POLICY_AES_XTS_128 0
#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
/* Values for mktme_status (SW only construct) */
#define MKTME_ENABLED 0
#define MKTME_DISABLED 1
#define MKTME_UNINITIALIZED 2
static int mktme_status = MKTME_UNINITIALIZED;
static void detect_tme(struct cpuinfo_x86 *c)
{
u64 tme_activate, tme_policy, tme_crypto_algs;
int keyid_bits = 0, nr_keyids = 0;
static u64 tme_activate_cpu0 = 0;
rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
if (mktme_status != MKTME_UNINITIALIZED) {
if (tme_activate != tme_activate_cpu0) {
/* Broken BIOS? */
pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
pr_err_once("x86/tme: MKTME is not usable\n");
mktme_status = MKTME_DISABLED;
/* Proceed. We may need to exclude bits from x86_phys_bits. */
}
} else {
tme_activate_cpu0 = tme_activate;
}
if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
pr_info_once("x86/tme: not enabled by BIOS\n");
mktme_status = MKTME_DISABLED;
return;
}
if (mktme_status != MKTME_UNINITIALIZED)
goto detect_keyid_bits;
pr_info("x86/tme: enabled by BIOS\n");
tme_policy = TME_ACTIVATE_POLICY(tme_activate);
if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
tme_crypto_algs);
mktme_status = MKTME_DISABLED;
}
detect_keyid_bits:
keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
nr_keyids = (1UL << keyid_bits) - 1;
if (nr_keyids) {
pr_info_once("x86/mktme: enabled by BIOS\n");
pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
} else {
pr_info_once("x86/mktme: disabled by BIOS\n");
}
if (mktme_status == MKTME_UNINITIALIZED) {
/* MKTME is usable */
mktme_status = MKTME_ENABLED;
}
/*
* KeyID bits effectively lower the number of physical address
* bits. Update cpuinfo_x86::x86_phys_bits accordingly.
*/
c->x86_phys_bits -= keyid_bits;
}
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
{
u64 epb;
@@ -679,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
if (cpu_has(c, X86_FEATURE_TME))
detect_tme(c);
init_intel_energy_perf(c);
init_intel_misc_features(c);

View File

@@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Intel PCONFIG instruction support.
*
* Copyright (C) 2017 Intel Corporation
*
* Author:
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
*/
#include <asm/cpufeature.h>
#include <asm/intel_pconfig.h>
#define PCONFIG_CPUID 0x1b
#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1)
/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */
enum {
PCONFIG_CPUID_SUBLEAF_INVALID = 0,
PCONFIG_CPUID_SUBLEAF_TARGETID = 1,
};
/* Bitmask of supported targets */
static u64 targets_supported __read_mostly;
int pconfig_target_supported(enum pconfig_target target)
{
/*
* We would need to re-think the implementation once we get > 64
* PCONFIG targets. Spec allows up to 2^32 targets.
*/
BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64);
if (WARN_ON_ONCE(target >= 64))
return 0;
return targets_supported & (1ULL << target);
}
static int __init intel_pconfig_init(void)
{
int subleaf;
if (!boot_cpu_has(X86_FEATURE_PCONFIG))
return 0;
/*
* Scan subleafs of PCONFIG CPUID leaf.
*
* Subleafs of the same type need not to be consecutive.
*
* Stop on the first invalid subleaf type. All subleafs after the first
* invalid are invalid too.
*/
for (subleaf = 0; subleaf < INT_MAX; subleaf++) {
struct cpuid_regs regs;
cpuid_count(PCONFIG_CPUID, subleaf,
&regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) {
case PCONFIG_CPUID_SUBLEAF_INVALID:
/* Stop on the first invalid subleaf */
goto out;
case PCONFIG_CPUID_SUBLEAF_TARGETID:
/* Mark supported PCONFIG targets */
if (regs.ebx < 64)
targets_supported |= (1ULL << regs.ebx);
if (regs.ecx < 64)
targets_supported |= (1ULL << regs.ecx);
if (regs.edx < 64)
targets_supported |= (1ULL << regs.edx);
break;
default:
/* Unknown CPUID.PCONFIG subleaf: ignore */
break;
}
}
out:
return 0;
}
arch_initcall(intel_pconfig_init);

View File

@@ -1095,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn)
* a legal address.
*/
/*
* Build time check to see if we have a spare virtual bit. Don't want
* to leave this until run time because most developers don't have a
* system that can exercise this code path. This will only become a
* problem if/when we move beyond 5-level page tables.
*
* Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
*/
#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
#else
#error "no unused virtual bit available"
#endif
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
@@ -2357,6 +2345,12 @@ static __init int mcheck_init_device(void)
{
int err;
/*
* Check if we have a spare virtual bit. This will only become
* a problem if/when we move beyond 5-level page tables.
*/
MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
goto err_out;

View File

@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '!') {
start_at = memparse(p+1, &p);
e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
} else if (*p == '%') {
enum e820_type from = 0, to = 0;
start_at = memparse(p + 1, &p);
if (*p == '-')
from = simple_strtoull(p + 1, &p, 0);
if (*p == '+')
to = simple_strtoull(p + 1, &p, 0);
if (*p != '\0')
return -EINVAL;
if (from && to)
e820__range_update(start_at, mem_size, from, to);
else if (to)
e820__range_add(start_at, mem_size, to);
else if (from)
e820__range_remove(start_at, mem_size, from, 1);
else
e820__range_remove(start_at, mem_size, 0, 0);
} else {
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
}

View File

@@ -32,6 +32,11 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
#ifdef CONFIG_X86_5LEVEL
#undef pgtable_l5_enabled
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
/*
* Manage page tables very early on.
*/
@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled __ro_after_init;
EXPORT_SYMBOL(__pgtable_l5_enabled);
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
#endif
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
#define __head __section(.head.text)
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr;
}
static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
{
return fixup_pointer(ptr, physaddr);
}
#ifdef CONFIG_X86_5LEVEL
static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
{
return fixup_pointer(ptr, physaddr);
}
static bool __head check_la57_support(unsigned long physaddr)
{
if (native_cpuid_eax(0) < 7)
return false;
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return false;
*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&pgdir_shift, physaddr) = 48;
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
*fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
*fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
return true;
}
#else
static bool __head check_la57_support(unsigned long physaddr)
{
return false;
}
#endif
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
bool la57;
int i;
unsigned int *next_pgt_ptr;
la57 = check_la57_support(physaddr);
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
for (;;);
@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
p = pgd + pgd_index(__START_KERNEL_map);
if (la57)
*p = (unsigned long)level4_kernel_pgt;
else
*p = (unsigned long)level3_kernel_pgt;
*p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
if (la57) {
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
p4d[511] += load_delta;
}
@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
if (la57) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Fixup phys_base - remove the memory encryption mask to obtain
* the true physical address.
*/
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();
*fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
@@ -206,7 +271,7 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
if (!pgtable_l5_enabled)
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);

View File

@@ -39,12 +39,12 @@
*
*/
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
#endif
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -125,7 +125,10 @@ ENTRY(secondary_startup_64)
/* Enable PAE mode, PGE and LA57 */
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
testl $1, __pgtable_l5_enabled(%rip)
jz 1f
orl $X86_CR4_LA57, %ecx
1:
#endif
movq %rcx, %cr4
@@ -374,12 +377,7 @@ GLOBAL(name)
__INITDATA
NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0
#ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
@@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts)
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0
.org init_top_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0

View File

@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
VMCOREINFO_NUMBER(pgtable_l5_enabled);
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);

View File

@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif
#else
struct cpuinfo_x86 boot_cpu_data __read_mostly = {
.x86_phys_bits = MAX_PHYSMEM_BITS,
};
struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
#endif
@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif
/*

View File

@@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/pci.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
#include <asm/pci_x86.h>
@@ -26,10 +27,11 @@
void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { }
static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
static bool __init bool_x86_init_noop(void) { return false; }
static void x86_op_int_noop(int cpu) { }
static u64 u64_x86_init_noop(void) { return 0; }
/*
* The platform setup functions are preset with the default functions
@@ -91,6 +93,11 @@ struct x86_init_ops x86_init __initdata = {
.x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop,
},
.acpi = {
.get_root_pointer = u64_x86_init_noop,
.reduced_hw_early_init = acpi_generic_reduced_hw_init,
},
};
struct x86_cpuinit_ops x86_cpuinit = {