Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar: "In this cycle this topic tree has become one of those 'super topics' that accumulated a lot of changes: - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on x86 - preceded by an array of changes. v4.8 saw preparatory changes in this area already - this is the rest of the work. Includes the thread stack caching performance optimization. (Andy Lutomirski) - switch_to() cleanups and all around enhancements. (Brian Gerst) - A large number of dumpstack infrastructure enhancements and an unwinder abstraction. The secret long term plan is safe(r) live patching plus maybe another attempt at debuginfo based unwinding - but all these current bits are standalone enhancements in a frame pointer based debug environment as well. (Josh Poimboeuf) - More __ro_after_init and const annotations. (Kees Cook) - Enable KASLR for the vmemmap memory region. (Thomas Garnier)" [ The virtually mapped stack changes are pretty fundamental, and not x86-specific per se, even if they are only used on x86 right now. ] * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/asm: Get rid of __read_cr4_safe() thread_info: Use unsigned long for flags x86/alternatives: Add stack frame dependency to alternative_call_2() x86/dumpstack: Fix show_stack() task pointer regression x86/dumpstack: Remove dump_trace() and related callbacks x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder oprofile/x86: Convert x86_backtrace() to use the new unwinder x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder perf/x86: Convert perf_callchain_kernel() to use the new unwinder x86/unwind: Add new unwind interface and implementations x86/dumpstack: Remove NULL task pointer convention fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK lib/syscall: Pin the task stack in collect_syscall() x86/process: Pin the target stack in get_wchan() x86/dumpstack: Pin the target stack when dumping it kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function sched/core: Add try_get_task_stack() and put_task_stack() x86/entry/64: Fix a minor comment rebase error iommu/amd: Don't put completion-wait semaphore on stack ...
This commit is contained in:
@@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* Stack overflow? During boot, we can fault near the initial
|
||||
* stack in the direct map, but that's not an overflow -- check
|
||||
* that we're in vmalloc space to avoid this.
|
||||
*/
|
||||
if (is_vmalloc_addr((void *)address) &&
|
||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||
register void *__sp asm("rsp");
|
||||
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
|
||||
/*
|
||||
* We're likely to be running with very little stack space
|
||||
* left. It's plausible that we'd hit this condition but
|
||||
* double-fault even before we get this far, in which case
|
||||
* we're fine: the double-fault handler will deal with it.
|
||||
*
|
||||
* We don't want to make it all the way into the oops code
|
||||
* and then double-fault, though, because we're likely to
|
||||
* break the console driver and lose most of the stack dump.
|
||||
*/
|
||||
asm volatile ("movq %[stack], %%rsp\n\t"
|
||||
"call handle_stack_overflow\n\t"
|
||||
"1: jmp 1b"
|
||||
: "+r" (__sp)
|
||||
: "D" ("kernel stack overflow (page fault)"),
|
||||
"S" (regs), "d" (address),
|
||||
[stack] "rm" (stack));
|
||||
unreachable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit:
|
||||
*
|
||||
|
@@ -40,17 +40,26 @@
|
||||
* You need to add an if/def entry if you introduce a new memory region
|
||||
* compatible with KASLR. Your entry must be in logical order with memory
|
||||
* layout. For example, ESPFIX is before EFI because its virtual address is
|
||||
* before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
|
||||
* before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
|
||||
* ensure that this order is correct and won't be changed.
|
||||
*/
|
||||
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
|
||||
static const unsigned long vaddr_end = VMEMMAP_START;
|
||||
|
||||
#if defined(CONFIG_X86_ESPFIX64)
|
||||
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
|
||||
#elif defined(CONFIG_EFI)
|
||||
static const unsigned long vaddr_end = EFI_VA_START;
|
||||
#else
|
||||
static const unsigned long vaddr_end = __START_KERNEL_map;
|
||||
#endif
|
||||
|
||||
/* Default values */
|
||||
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
unsigned long vmalloc_base = __VMALLOC_BASE;
|
||||
EXPORT_SYMBOL(vmalloc_base);
|
||||
unsigned long vmemmap_base = __VMEMMAP_BASE;
|
||||
EXPORT_SYMBOL(vmemmap_base);
|
||||
|
||||
/*
|
||||
* Memory regions randomized by KASLR (except modules that use a separate logic
|
||||
@@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region {
|
||||
} kaslr_regions[] = {
|
||||
{ &page_offset_base, 64/* Maximum */ },
|
||||
{ &vmalloc_base, VMALLOC_SIZE_TB },
|
||||
{ &vmemmap_base, 1 },
|
||||
};
|
||||
|
||||
/* Get size in bytes used by the memory region */
|
||||
@@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void)
|
||||
struct rnd_state rand_state;
|
||||
unsigned long remain_entropy;
|
||||
|
||||
/*
|
||||
* All these BUILD_BUG_ON checks ensures the memory layout is
|
||||
* consistent with the vaddr_start/vaddr_end variables.
|
||||
*/
|
||||
BUILD_BUG_ON(vaddr_start >= vaddr_end);
|
||||
BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
|
||||
vaddr_end >= EFI_VA_START);
|
||||
BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
|
||||
config_enabled(CONFIG_EFI)) &&
|
||||
vaddr_end >= __START_KERNEL_map);
|
||||
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
|
||||
|
||||
if (!kaslr_memory_enabled())
|
||||
return;
|
||||
|
||||
|
@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
unsigned cpu = smp_processor_id();
|
||||
|
||||
if (likely(prev != next)) {
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
* If our current stack is in vmalloc space and isn't
|
||||
* mapped in the new pgd, we'll double-fault. Forcibly
|
||||
* map it.
|
||||
*/
|
||||
unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
|
||||
|
||||
pgd_t *pgd = next->pgd + stack_pgd_index;
|
||||
|
||||
if (unlikely(pgd_none(*pgd)))
|
||||
set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
this_cpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user