Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:
 "The main changes:

   - add initial commits to randomize kernel memory section virtual
     addresses, enabled via a new kernel option: RANDOMIZE_MEMORY
     (Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu)

   - enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees
     Cook)

   - EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar)

   - misc cleanups/fixes"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/boot: Simplify EBDA-vs-BIOS reservation logic
  x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does
  x86/boot: Reorganize and clean up the BIOS area reservation code
  x86/mm: Do not reference phys addr beyond kernel
  x86/mm: Add memory hotplug support for KASLR memory randomization
  x86/mm: Enable KASLR for vmalloc memory regions
  x86/mm: Enable KASLR for physical mapping memory regions
  x86/mm: Implement ASLR for kernel memory regions
  x86/mm: Separate variable for trampoline PGD
  x86/mm: Add PUD VA support for physical mapping
  x86/mm: Update physical mapping variable names
  x86/mm: Refactor KASLR entropy functions
  x86/KASLR: Fix boot crash with certain memory configurations
  x86/boot/64: Add forgotten end of function marker
  x86/KASLR: Allow randomization below the load address
  x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G
  x86/KASLR: Randomize virtual address separately
  x86/KASLR: Clarify identity map interface
  x86/boot: Refuse to build with data relocations
  x86/KASLR, x86/power: Remove x86 hibernation restrictions
This commit is contained in:
Linus Torvalds
2016-07-25 17:32:28 -07:00
30 changed files with 778 additions and 336 deletions

View File

@@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
$(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
# can place it anywhere in memory and it will still run. However, since
# it is executed as-is without any ELF relocation processing performed
# (and has already had all relocation sections stripped from the binary),
# none of the code can use data relocations (e.g. static assignments of
# pointer values), since they will be meaningless at runtime. This check
# will refuse to link the vmlinux if any of these relocations are found.
quiet_cmd_check_data_rel = DATAREL $@
define cmd_check_data_rel
for obj in $(filter %.o,$^); do \
readelf -S $$obj | grep -qF .rel.local && { \
echo "error: $$obj has data relocations!" >&2; \
exit 1; \
} || true; \
done
endef
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,check_data_rel)
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S

View File

@@ -12,10 +12,6 @@
#include "misc.h"
#include "error.h"
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
@@ -26,26 +22,6 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
#define I8254_SELECT_COUNTER0 0x02
#define I8254_STATUS_NOTREADY 0x40
static inline u16 i8254(void)
{
u16 status, timer;
do {
outb(I8254_PORT_CONTROL,
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
status = inb(I8254_PORT_COUNTER0);
timer = inb(I8254_PORT_COUNTER0);
timer |= inb(I8254_PORT_COUNTER0) << 8;
} while (status & I8254_STATUS_NOTREADY);
return timer;
}
static unsigned long rotate_xor(unsigned long hash, const void *area,
size_t size)
{
@@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
}
/* Attempt to create a simple but unpredictable starting entropy. */
static unsigned long get_random_boot(void)
static unsigned long get_boot_seed(void)
{
unsigned long hash = 0;
@@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
return hash;
}
static unsigned long get_random_long(const char *purpose)
{
#ifdef CONFIG_X86_64
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
#else
const unsigned long mix_const = 0x3f39e593UL;
#endif
unsigned long raw, random = get_random_boot();
bool use_i8254 = true;
debug_putstr(purpose);
debug_putstr(" KASLR using");
if (has_cpuflag(X86_FEATURE_RDRAND)) {
debug_putstr(" RDRAND");
if (rdrand_long(&raw)) {
random ^= raw;
use_i8254 = false;
}
}
if (has_cpuflag(X86_FEATURE_TSC)) {
debug_putstr(" RDTSC");
raw = rdtsc();
random ^= raw;
use_i8254 = false;
}
if (use_i8254) {
debug_putstr(" i8254");
random ^= i8254();
}
/* Circular multiply for better bit diffusion */
asm("mul %3"
: "=a" (random), "=d" (raw)
: "a" (random), "rm" (mix_const));
random += raw;
debug_putstr("...\n");
return random;
}
#define KASLR_COMPRESSED_BOOT
#include "../../lib/kaslr.c"
struct mem_vector {
unsigned long start;
@@ -132,17 +66,6 @@ enum mem_avoid_index {
static struct mem_vector mem_avoid[MEM_AVOID_MAX];
static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
{
/* Item at least partially before region. */
if (item->start < region->start)
return false;
/* Item at least partially after region. */
if (item->start + item->size > region->start + region->size)
return false;
return true;
}
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
{
/* Item one is entirely before item two. */
@@ -296,6 +219,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
if (mem_overlaps(img, &mem_avoid[i]) &&
mem_avoid[i].start < earliest) {
*overlap = mem_avoid[i];
earliest = overlap->start;
is_overlapping = true;
}
}
@@ -310,6 +234,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
*overlap = avoid;
earliest = overlap->start;
is_overlapping = true;
}
@@ -319,8 +244,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
return is_overlapping;
}
static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
struct slot_area {
unsigned long addr;
int num;
@@ -351,36 +274,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
}
}
static void slots_append(unsigned long addr)
{
/* Overflowing the slots list should be impossible. */
if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
return;
slots[slot_max++] = addr;
}
static unsigned long slots_fetch_random(void)
{
unsigned long slot;
int i;
/* Handle case of no slots stored. */
if (slot_max == 0)
return 0;
return slots[get_random_long("Physical") % slot_max];
slot = kaslr_get_random_long("Physical") % slot_max;
for (i = 0; i < slot_area_index; i++) {
if (slot >= slot_areas[i].num) {
slot -= slot_areas[i].num;
continue;
}
return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
}
if (i == slot_area_index)
debug_putstr("slots_fetch_random() failed!?\n");
return 0;
}
static void process_e820_entry(struct e820entry *entry,
unsigned long minimum,
unsigned long image_size)
{
struct mem_vector region, img, overlap;
struct mem_vector region, overlap;
struct slot_area slot_area;
unsigned long start_orig;
/* Skip non-RAM entries. */
if (entry->type != E820_RAM)
return;
/* Ignore entries entirely above our maximum. */
if (entry->addr >= KERNEL_IMAGE_SIZE)
/* On 32-bit, ignore entries entirely above our maximum. */
if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
return;
/* Ignore entries entirely below our minimum. */
@@ -390,31 +321,55 @@ static void process_e820_entry(struct e820entry *entry,
region.start = entry->addr;
region.size = entry->size;
/* Potentially raise address to minimum location. */
if (region.start < minimum)
region.start = minimum;
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
start_orig = region.start;
/* Potentially raise address to meet alignment requirements. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Potentially raise address to minimum location. */
if (region.start < minimum)
region.start = minimum;
/* Did we raise the address above the bounds of this e820 region? */
if (region.start > entry->addr + entry->size)
return;
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Reduce size by any delta from the original address. */
region.size -= region.start - entry->addr;
/* Did we raise the address above this e820 region? */
if (region.start > entry->addr + entry->size)
return;
/* Reduce maximum size to fit end of image within maximum limit. */
if (region.start + region.size > KERNEL_IMAGE_SIZE)
region.size = KERNEL_IMAGE_SIZE - region.start;
/* Reduce size by any delta from the original address. */
region.size -= region.start - start_orig;
/* Walk each aligned slot and check for avoided areas. */
for (img.start = region.start, img.size = image_size ;
mem_contains(&region, &img) ;
img.start += CONFIG_PHYSICAL_ALIGN) {
if (mem_avoid_overlap(&img, &overlap))
continue;
slots_append(img.start);
/* On 32-bit, reduce region size to fit within max size. */
if (IS_ENABLED(CONFIG_X86_32) &&
region.start + region.size > KERNEL_IMAGE_SIZE)
region.size = KERNEL_IMAGE_SIZE - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
return;
/* If nothing overlaps, store the region and return. */
if (!mem_avoid_overlap(&region, &overlap)) {
store_slot_info(&region, image_size);
return;
}
/* Store beginning of region if holds at least image_size. */
if (overlap.start > region.start + image_size) {
struct mem_vector beginning;
beginning.start = region.start;
beginning.size = overlap.start - region.start;
store_slot_info(&beginning, image_size);
}
/* Return if overlap extends to or past end of region. */
if (overlap.start + overlap.size >= region.start + region.size)
return;
/* Clip off the overlapping region and start over. */
region.size -= overlap.start - region.start + overlap.size;
region.start = overlap.start + overlap.size;
}
}
@@ -431,6 +386,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
for (i = 0; i < boot_params->e820_entries; i++) {
process_e820_entry(&boot_params->e820_map[i], minimum,
image_size);
if (slot_area_index == MAX_SLOT_AREA) {
debug_putstr("Aborted e820 scan (slot_areas full)!\n");
break;
}
}
return slots_fetch_random();
@@ -454,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
CONFIG_PHYSICAL_ALIGN + 1;
random_addr = get_random_long("Virtual") % slots;
random_addr = kaslr_get_random_long("Virtual") % slots;
return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
}
@@ -463,48 +422,54 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
* Since this function examines addresses much more numerically,
* it takes the input and output pointers as 'unsigned long'.
*/
unsigned char *choose_random_location(unsigned long input,
unsigned long input_size,
unsigned long output,
unsigned long output_size)
void choose_random_location(unsigned long input,
unsigned long input_size,
unsigned long *output,
unsigned long output_size,
unsigned long *virt_addr)
{
unsigned long choice = output;
unsigned long random_addr;
unsigned long random_addr, min_addr;
/* By default, keep output position unchanged. */
*virt_addr = *output;
#ifdef CONFIG_HIBERNATION
if (!cmdline_find_option_bool("kaslr")) {
warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
goto out;
}
#else
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
goto out;
return;
}
#endif
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Prepare to add new identity pagetables on demand. */
initialize_identity_maps();
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, output);
mem_avoid_init(input, input_size, *output);
/*
* Low end of the randomization range should be the
* smaller of 512M or the initial kernel image
* location:
*/
min_addr = min(*output, 512UL << 20);
/* Walk e820 and find a random address. */
random_addr = find_random_phys_addr(output, output_size);
random_addr = find_random_phys_addr(min_addr, output_size);
if (!random_addr) {
warn("KASLR disabled: could not find suitable E820 region!");
goto out;
} else {
/* Update the new physical address location. */
if (*output != random_addr) {
add_identity_map(random_addr, output_size);
*output = random_addr;
}
}
/* Always enforce the minimum. */
if (random_addr < choice)
goto out;
choice = random_addr;
add_identity_map(choice, output_size);
/* This actually loads the identity pagetable on x86_64. */
finalize_identity_maps();
out:
return (unsigned char *)choice;
/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
if (IS_ENABLED(CONFIG_X86_64))
random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
*virt_addr = random_addr;
}

View File

@@ -170,7 +170,8 @@ void __puthex(unsigned long value)
}
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len)
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
{
int *reloc;
unsigned long delta, map, ptr;
@@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len)
* and where it was actually loaded.
*/
delta = min_addr - LOAD_PHYSICAL_ADDR;
if (!delta) {
debug_putstr("No relocation needed... ");
return;
}
debug_putstr("Performing relocations... ");
/*
* The kernel contains a table of relocation addresses. Those
@@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len)
*/
map = delta - __START_KERNEL_map;
/*
* 32-bit always performs relocations. 64-bit relocations are only
* needed if KASLR has chosen a different starting address offset
* from __START_KERNEL_map.
*/
if (IS_ENABLED(CONFIG_X86_64))
delta = virt_addr - LOAD_PHYSICAL_ADDR;
if (!delta) {
debug_putstr("No relocation needed... ");
return;
}
debug_putstr("Performing relocations... ");
/*
* Process relocations: 32 bit relocations first then 64 bit after.
* Three sets of binary relocations are added to the end of the kernel
@@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len)
#endif
}
#else
static inline void handle_relocations(void *output, unsigned long output_len)
static inline void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
{ }
#endif
@@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned char *output_orig = output;
unsigned long virt_addr = (unsigned long)output;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
output = choose_random_location((unsigned long)input_data, input_len,
(unsigned long)output,
max(output_len, kernel_total_size));
choose_random_location((unsigned long)input_data, input_len,
(unsigned long *)&output,
max(output_len, kernel_total_size),
&virt_addr);
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
error("Destination physical address inappropriately aligned");
if (virt_addr & (MIN_KERNEL_ALIGN - 1))
error("Destination virtual address inappropriately aligned");
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
@@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Wrong destination address");
error("Destination address does not match LOAD_PHYSICAL_ADDR");
if ((unsigned long)output != virt_addr)
error("Destination virtual address changed when not relocatable");
#endif
debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
parse_elf(output);
/*
* 32-bit always performs relocations. 64-bit relocations are only
* needed if kASLR has chosen a different load address.
*/
if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
handle_relocations(output, output_len);
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
return output;
}

View File

@@ -67,28 +67,33 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE
/* kaslr.c */
unsigned char *choose_random_location(unsigned long input_ptr,
unsigned long input_size,
unsigned long output_ptr,
unsigned long output_size);
void choose_random_location(unsigned long input,
unsigned long input_size,
unsigned long *output,
unsigned long output_size,
unsigned long *virt_addr);
/* cpuflags.c */
bool has_cpuflag(int flag);
#else
static inline
unsigned char *choose_random_location(unsigned long input_ptr,
unsigned long input_size,
unsigned long output_ptr,
unsigned long output_size)
static inline void choose_random_location(unsigned long input,
unsigned long input_size,
unsigned long *output,
unsigned long output_size,
unsigned long *virt_addr)
{
return (unsigned char *)output_ptr;
/* No change from existing output location. */
*virt_addr = *output;
}
#endif
#ifdef CONFIG_X86_64
void initialize_identity_maps(void);
void add_identity_map(unsigned long start, unsigned long size);
void finalize_identity_maps(void);
extern unsigned char _pgtable[];
#else
static inline void initialize_identity_maps(void)
{ }
static inline void add_identity_map(unsigned long start, unsigned long size)
{ }
static inline void finalize_identity_maps(void)

View File

@@ -2,6 +2,9 @@
* This code is used on x86_64 to create page table identity mappings on
* demand by building up a new set of page tables (or appending to the
* existing ones), and then switching over to them when ready.
*
* Copyright (C) 2015-2016 Yinghai Lu
* Copyright (C) 2016 Kees Cook
*/
/*
@@ -17,6 +20,9 @@
/* These actually do the work of building the kernel identity maps. */
#include <asm/init.h>
#include <asm/pgtable.h>
/* Use the static base for this part of the boot process */
#undef __PAGE_OFFSET
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
#include "../../mm/ident_map.c"
/* Used by pgtable.h asm code to force instruction serialization. */
@@ -59,9 +65,21 @@ static struct alloc_pgt_data pgt_data;
/* The top level page table entry pointer. */
static unsigned long level4p;
/*
* Mapping information structure passed to kernel_ident_mapping_init().
* Due to relocation, pointers must be assigned at run time not build time.
*/
static struct x86_mapping_info mapping_info = {
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
};
/* Locates and clears a region for a new top level page table. */
static void prepare_level4(void)
void initialize_identity_maps(void)
{
/* Init mapping_info with run-time function/buffer pointers. */
mapping_info.alloc_pgt_page = alloc_pgt_page;
mapping_info.context = &pgt_data;
/*
* It should be impossible for this not to already be true,
* but since calling this a second time would rewind the other
@@ -96,17 +114,8 @@ static void prepare_level4(void)
*/
void add_identity_map(unsigned long start, unsigned long size)
{
struct x86_mapping_info mapping_info = {
.alloc_pgt_page = alloc_pgt_page,
.context = &pgt_data,
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
};
unsigned long end = start + size;
/* Make sure we have a top level page table ready to use. */
if (!level4p)
prepare_level4();
/* Align boundary to 2M. */
start = round_down(start, PMD_SIZE);
end = round_up(end, PMD_SIZE);