Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:
 "The main changes:

   - add initial commits to randomize kernel memory section virtual
     addresses, enabled via a new kernel option: RANDOMIZE_MEMORY
     (Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu)

   - enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees
     Cook)

   - EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar)

   - misc cleanups/fixes"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/boot: Simplify EBDA-vs-BIOS reservation logic
  x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does
  x86/boot: Reorganize and clean up the BIOS area reservation code
  x86/mm: Do not reference phys addr beyond kernel
  x86/mm: Add memory hotplug support for KASLR memory randomization
  x86/mm: Enable KASLR for vmalloc memory regions
  x86/mm: Enable KASLR for physical mapping memory regions
  x86/mm: Implement ASLR for kernel memory regions
  x86/mm: Separate variable for trampoline PGD
  x86/mm: Add PUD VA support for physical mapping
  x86/mm: Update physical mapping variable names
  x86/mm: Refactor KASLR entropy functions
  x86/KASLR: Fix boot crash with certain memory configurations
  x86/boot/64: Add forgotten end of function marker
  x86/KASLR: Allow randomization below the load address
  x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G
  x86/KASLR: Randomize virtual address separately
  x86/KASLR: Clarify identity map interface
  x86/boot: Refuse to build with data relocations
  x86/KASLR, x86/power: Remove x86 hibernation restrictions
此提交包含在:
Linus Torvalds
2016-07-25 17:32:28 -07:00
當前提交 77cd3d0c43
共有 30 個檔案被更改,包括 778 行新增336 行删除

查看文件

@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o

查看文件

@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
{ 0, "User Space" },
#ifdef CONFIG_X86_64
{ 0x8000000000000000UL, "Kernel Space" },
{ PAGE_OFFSET, "Low Kernel Mapping" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMEMMAP_START, "Vmemmap" },
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
{ 0/* VMALLOC_START */, "vmalloc() Area" },
{ 0/* VMEMMAP_START */, "Vmemmap" },
# ifdef CONFIG_X86_ESPFIX64
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
# endif
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
static int __init pt_dump_init(void)
{
/*
* Various markers are not compile-time constants, so assign them
* here.
*/
#ifdef CONFIG_X86_64
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
#endif
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
# ifdef CONFIG_HIGHMEM

查看文件

@@ -17,6 +17,7 @@
#include <asm/proto.h>
#include <asm/dma.h> /* for MAX_DMA_PFN */
#include <asm/microcode.h>
#include <asm/kaslr.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -590,6 +591,9 @@ void __init init_mem_mapping(void)
/* the ISA range is always mapped regardless of memory holes */
init_memory_mapping(0, ISA_END_ADDRESS);
/* Init the trampoline, possibly with KASLR memory offset */
init_trampoline();
/*
* If the allocation is in bottom-up direction, we setup direct mapping
* in bottom-up, otherwise we setup direct mapping in top-down.

查看文件

@@ -328,22 +328,30 @@ void __init cleanup_highmap(void)
}
}
/*
* Create PTE level page table mapping for physical addresses.
* It returns the last physical address mapped.
*/
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pgprot_t prot)
{
unsigned long pages = 0, next;
unsigned long last_map_addr = end;
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
pte_t *pte;
int i;
pte_t *pte = pte_page + pte_index(addr);
pte = pte_page + pte_index(paddr);
i = pte_index(paddr);
for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
next = (addr & PAGE_MASK) + PAGE_SIZE;
if (addr >= end) {
for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
if (paddr >= paddr_end) {
if (!after_bootmem &&
!e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
!e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
!e820_any_mapped(paddr & PAGE_MASK, paddr_next,
E820_RAM) &&
!e820_any_mapped(paddr & PAGE_MASK, paddr_next,
E820_RESERVED_KERN))
set_pte(pte, __pte(0));
continue;
}
@@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
}
if (0)
printk(" pte=%p addr=%lx pte=%016lx\n",
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
update_page_count(PG_LEVEL_4K, pages);
return last_map_addr;
return paddr_last;
}
/*
* Create PMD level page table mapping for physical addresses. The virtual
* and physical address have to be aligned at this level.
* It returns the last physical address mapped.
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot)
{
unsigned long pages = 0, next;
unsigned long last_map_addr = end;
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
int i = pmd_index(address);
int i = pmd_index(paddr);
for (; i < PTRS_PER_PMD; i++, address = next) {
pmd_t *pmd = pmd_page + pmd_index(address);
for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
pmd_t *pmd = pmd_page + pmd_index(paddr);
pte_t *pte;
pgprot_t new_prot = prot;
next = (address & PMD_MASK) + PMD_SIZE;
if (address >= end) {
paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
if (paddr >= paddr_end) {
if (!after_bootmem &&
!e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
!e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
!e820_any_mapped(paddr & PMD_MASK, paddr_next,
E820_RAM) &&
!e820_any_mapped(paddr & PMD_MASK, paddr_next,
E820_RESERVED_KERN))
set_pmd(pmd, __pmd(0));
continue;
}
@@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
last_map_addr = phys_pte_init(pte, address,
end, prot);
paddr_last = phys_pte_init(pte, paddr,
paddr_end, prot);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (page_size_mask & (1 << PG_LEVEL_2M)) {
if (!after_bootmem)
pages++;
last_map_addr = next;
paddr_last = paddr_next;
continue;
}
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -430,42 +445,54 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pmd,
pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
spin_unlock(&init_mm.page_table_lock);
last_map_addr = next;
paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
last_map_addr = phys_pte_init(pte, address, end, new_prot);
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel(&init_mm, pmd, pte);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
return last_map_addr;
return paddr_last;
}
/*
* Create PUD level page table mapping for physical addresses. The virtual
* and physical address do not have to be aligned at this level. KASLR can
* randomize virtual addresses up to this level.
* It returns the last physical address mapped.
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask)
{
unsigned long pages = 0, next;
unsigned long last_map_addr = end;
int i = pud_index(addr);
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = pud_index(vaddr);
for (; i < PTRS_PER_PUD; i++, addr = next) {
pud_t *pud = pud_page + pud_index(addr);
for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
pud_t *pud;
pmd_t *pmd;
pgprot_t prot = PAGE_KERNEL;
next = (addr & PUD_MASK) + PUD_SIZE;
if (addr >= end) {
vaddr = (unsigned long)__va(paddr);
pud = pud_page + pud_index(vaddr);
paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
if (paddr >= paddr_end) {
if (!after_bootmem &&
!e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
!e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
!e820_any_mapped(paddr & PUD_MASK, paddr_next,
E820_RAM) &&
!e820_any_mapped(paddr & PUD_MASK, paddr_next,
E820_RESERVED_KERN))
set_pud(pud, __pud(0));
continue;
}
@@ -473,8 +500,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (!pud_none(*pud)) {
if (!pud_large(*pud)) {
pmd = pmd_offset(pud, 0);
last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
page_size_mask,
prot);
__flush_tlb_all();
continue;
}
@@ -493,7 +522,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (page_size_mask & (1 << PG_LEVEL_1G)) {
if (!after_bootmem)
pages++;
last_map_addr = next;
paddr_last = paddr_next;
continue;
}
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -503,16 +532,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pud,
pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
PAGE_KERNEL_LARGE));
spin_unlock(&init_mm.page_table_lock);
last_map_addr = next;
paddr_last = paddr_next;
continue;
}
pmd = alloc_low_page();
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
prot);
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
page_size_mask, prot);
spin_lock(&init_mm.page_table_lock);
pud_populate(&init_mm, pud, pmd);
@@ -522,38 +551,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
update_page_count(PG_LEVEL_1G, pages);
return last_map_addr;
return paddr_last;
}
/*
* Create page table mapping for the physical memory for specific physical
* addresses. The virtual and physical addresses have to be aligned on PMD level
* down. It returns the last physical address mapped.
*/
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
bool pgd_changed = false;
unsigned long next, last_map_addr = end;
unsigned long addr;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
addr = start;
paddr_last = paddr_end;
vaddr = (unsigned long)__va(paddr_start);
vaddr_end = (unsigned long)__va(paddr_end);
vaddr_start = vaddr;
for (; start < end; start = next) {
pgd_t *pgd = pgd_offset_k(start);
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
pgd_t *pgd = pgd_offset_k(vaddr);
pud_t *pud;
next = (start & PGDIR_MASK) + PGDIR_SIZE;
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
if (pgd_val(*pgd)) {
pud = (pud_t *)pgd_page_vaddr(*pgd);
last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
paddr_last = phys_pud_init(pud, __pa(vaddr),
__pa(vaddr_end),
page_size_mask);
continue;
}
pud = alloc_low_page();
last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
page_size_mask);
paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
page_size_mask);
spin_lock(&init_mm.page_table_lock);
pgd_populate(&init_mm, pgd, pud);
@@ -562,11 +597,11 @@ kernel_physical_mapping_init(unsigned long start,
}
if (pgd_changed)
sync_global_pgds(addr, end - 1, 0);
sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
__flush_tlb_all();
return last_map_addr;
return paddr_last;
}
#ifndef CONFIG_NUMA

172
arch/x86/mm/kaslr.c 一般檔案
查看文件

@@ -0,0 +1,172 @@
/*
* This file implements KASLR memory randomization for x86_64. It randomizes
* the virtual address space of kernel memory regions (physical memory
* mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
* exploits relying on predictable kernel addresses.
*
* Entropy is generated using the KASLR early boot functions now shared in
* the lib directory (originally written by Kees Cook). Randomization is
* done on PGD & PUD page table levels to increase possible addresses. The
* physical memory mapping code was adapted to support PUD level virtual
* addresses. This implementation on the best configuration provides 30,000
* possible virtual addresses in average for each memory region. An additional
* low memory page is used to ensure each CPU can start with a PGD aligned
* virtual address (for realmode).
*
* The order of each memory region is not changed. The feature looks at
* the available space for the regions based on different configuration
* options and randomizes the base and space between each. The size of the
* physical memory mapping is the available physical memory.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/random.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/kaslr.h>
#include "mm_internal.h"
#define TB_SHIFT 40
/*
* Virtual address start and end range for randomization. The end changes base
* on configuration to have the highest amount of space for randomization.
* It increases the possible random position for each randomized region.
*
* You need to add an if/def entry if you introduce a new memory region
* compatible with KASLR. Your entry must be in logical order with memory
* layout. For example, ESPFIX is before EFI because its virtual address is
* before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
* ensure that this order is correct and won't be changed.
*/
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = VMEMMAP_START;
/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base = __VMALLOC_BASE;
EXPORT_SYMBOL(vmalloc_base);
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This
* order is kept after randomization.
*/
static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
{ &page_offset_base, 64/* Maximum */ },
{ &vmalloc_base, VMALLOC_SIZE_TB },
};
/* Get size in bytes used by the memory region */
static inline unsigned long get_padding(struct kaslr_memory_region *region)
{
return (region->size_tb << TB_SHIFT);
}
/*
* Apply no randomization if KASLR was disabled at boot or if KASAN
* is enabled. KASAN shadow mappings rely on regions being PGD aligned.
*/
static inline bool kaslr_memory_enabled(void)
{
return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
}
/* Initialize base and padding for each memory region randomized with KASLR */
void __init kernel_randomize_memory(void)
{
size_t i;
unsigned long vaddr = vaddr_start;
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;
if (!kaslr_memory_enabled())
return;
/*
* Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support).
*/
BUG_ON(kaslr_regions[0].base != &page_offset_base);
memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
/* Adapt phyiscal memory region size based on available memory */
if (memory_tb < kaslr_regions[0].size_tb)
kaslr_regions[0].size_tb = memory_tb;
/* Calculate entropy available between regions */
remain_entropy = vaddr_end - vaddr_start;
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
remain_entropy -= get_padding(&kaslr_regions[i]);
prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
unsigned long entropy;
/*
* Select a random virtual address using the extra entropy
* available.
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
entropy = (rand % (entropy + 1)) & PUD_MASK;
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
/*
* Jump the region and add a minimum padding based on
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
}
/*
* Create PGD aligned trampoline table to allow real mode initialization
* of additional CPUs. Consume only 1 low memory page.
*/
void __meminit init_trampoline(void)
{
unsigned long paddr, paddr_next;
pgd_t *pgd;
pud_t *pud_page, *pud_page_tramp;
int i;
if (!kaslr_memory_enabled()) {
init_trampoline_default();
return;
}
pud_page_tramp = alloc_low_page();
paddr = 0;
pgd = pgd_offset_k((unsigned long)__va(paddr));
pud_page = (pud_t *) pgd_page_vaddr(*pgd);
for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
pud_t *pud, *pud_tramp;
unsigned long vaddr = (unsigned long)__va(paddr);
pud_tramp = pud_page_tramp + pud_index(paddr);
pud = pud_page + pud_index(vaddr);
paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
*pud_tramp = *pud;
}
set_pgd(&trampoline_pgd_entry,
__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
}

查看文件

@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
static inline unsigned long highmap_end_pfn(void)
{
return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
/* Do not reference physical address outside the kernel. */
return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
}
#endif
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
return addr >= start && addr < end;
}
static inline int
within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
{
return addr >= start && addr <= end;
}
/*
* Flushing functions
*/
@@ -1299,7 +1306,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
* to touch the high mapped kernel as well:
*/
if (!within(vaddr, (unsigned long)_text, _brk_end) &&
within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
within_inclusive(cpa->pfn, highmap_start_pfn(),
highmap_end_pfn())) {
unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
__START_KERNEL_map - phys_base;
alias_cpa = *cpa;