Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 pti updates from Ingo Molnar:
 "The main changes:

   - Make the IBPB barrier more strict and add STIBP support (Jiri
     Kosina)

   - Micro-optimize and clean up the entry code (Andy Lutomirski)

   - ... plus misc other fixes"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/speculation: Propagate information about RSB filling mitigation to sysfs
  x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
  x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
  x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
  x86/CPU: Fix unused variable warning when !CONFIG_IA32_EMULATION
  x86/pti/64: Remove the SYSCALL64 entry trampoline
  x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space
  x86/entry/64: Document idtentry
This commit is contained in:
Linus Torvalds
2018-10-23 18:43:04 +01:00
19 changed files with 222 additions and 176 deletions

View File

@@ -142,67 +142,6 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
.pushsection .entry_trampoline, "ax"
/*
* The code in here gets remapped into cpu_entry_area's trampoline. This means
* that the assembler and linker have the wrong idea as to where this code
* lives (and, in fact, it's mapped more than once, so it's not even at a
* fixed address). So we can't reference any symbols outside the entry
* trampoline and expect it to work.
*
* Instead, we carefully abuse %rip-relative addressing.
* _entry_trampoline(%rip) refers to the start of the remapped) entry
* trampoline. We can thus find cpu_entry_area with this macro:
*/
#define CPU_ENTRY_AREA \
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
ENTRY(entry_SYSCALL_64_trampoline)
UNWIND_HINT_EMPTY
swapgs
/* Stash the user RSP. */
movq %rsp, RSP_SCRATCH
/* Note: using %rsp as a scratch reg. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Load the top of the task stack into RSP */
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
/* Start building the simulated IRET frame. */
pushq $__USER_DS /* pt_regs->ss */
pushq RSP_SCRATCH /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
/*
* x86 lacks a near absolute jump, and we can't jump to the real
* entry text with a relative jump. We could push the target
* address and then use retq, but this destroys the pipeline on
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
* spill RDI and restore it in a second-stage trampoline.
*/
pushq %rdi
movq $entry_SYSCALL_64_stage2, %rdi
JMP_NOSPEC %rdi
END(entry_SYSCALL_64_trampoline)
.popsection
ENTRY(entry_SYSCALL_64_stage2)
UNWIND_HINT_EMPTY
popq %rdi
jmp entry_SYSCALL_64_after_hwframe
END(entry_SYSCALL_64_stage2)
ENTRY(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
@@ -212,21 +151,19 @@ ENTRY(entry_SYSCALL_64)
*/
swapgs
/*
* This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
/* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rax /* pt_regs->orig_ax */
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
@@ -900,6 +837,42 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
/**
* idtentry - Generate an IDT entry stub
* @sym: Name of the generated entry point
* @do_sym: C function to be called
* @has_error_code: True if this IDT vector has an error code on the stack
* @paranoid: non-zero means that this vector may be invoked from
* kernel mode with user GSBASE and/or user CR3.
* 2 is special -- see below.
* @shift_ist: Set to an IST index if entries from kernel mode should
* decrement the IST stack so that nested entries get a
* fresh stack. (This is for #DB, which has a nasty habit
* of recursing.)
*
* idtentry generates an IDT stub that sets up a usable kernel context,
* creates struct pt_regs, and calls @do_sym. The stub has the following
* special behaviors:
*
* On an entry from user mode, the stub switches from the trampoline or
* IST stack to the normal thread stack. On an exit to user mode, the
* normal exit-to-usermode path is invoked.
*
* On an exit to kernel mode, if @paranoid == 0, we check for preemption,
* whereas we omit the preemption check if @paranoid != 0. This is purely
* because the implementation is simpler this way. The kernel only needs
* to check for asynchronous kernel preemption when IRQ handlers return.
*
* If @paranoid == 0, then the stub will handle IRET faults by pretending
* that the fault came from user mode. It will handle gs_change faults by
* pretending that the fault happened with kernel GSBASE. Since this handling
* is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
* @paranoid == 0. This special handling will do the wrong thing for
* espfix-induced #DF on IRET, so #DF must not use @paranoid == 0.
*
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8