Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Two families of fixes:

   - Fix an FPU context related boot crash on newer x86 hardware with
     larger context sizes than what most people test.  To fix this
     without ugly kludges or extensive reverts we had to touch core task
     allocator, to allow x86 to determine the task size dynamically, at
     boot time.

     I've tested it on a number of x86 platforms, and I cross-built it
     to a handful of architectures:

                                        (warns)               (warns)
       testing     x86-64:  -git:  pass (    0),  -tip:  pass (    0)
       testing     x86-32:  -git:  pass (    0),  -tip:  pass (    0)
       testing        arm:  -git:  pass ( 1359),  -tip:  pass ( 1359)
       testing       cris:  -git:  pass ( 1031),  -tip:  pass ( 1031)
       testing       m32r:  -git:  pass ( 1135),  -tip:  pass ( 1135)
       testing       m68k:  -git:  pass ( 1471),  -tip:  pass ( 1471)
       testing       mips:  -git:  pass ( 1162),  -tip:  pass ( 1162)
       testing    mn10300:  -git:  pass ( 1058),  -tip:  pass ( 1058)
       testing     parisc:  -git:  pass ( 1846),  -tip:  pass ( 1846)
       testing      sparc:  -git:  pass ( 1185),  -tip:  pass ( 1185)

     ... so I hope the cross-arch impact 'none', as intended.

     (by Dave Hansen)

   - Fix various NMI handling related bugs unearthed by the big asm code
     rewrite and generally make the NMI code more robust and more
     maintainable while at it.  These changes are a bit late in the
     cycle, I hope they are still acceptable.

     (by Andy Lutomirski)"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86
  x86/fpu, sched: Dynamically allocate 'struct fpu'
  x86/entry/64, x86/nmi/64: Add CONFIG_DEBUG_ENTRY NMI testing code
  x86/nmi/64: Make the "NMI executing" variable more consistent
  x86/nmi/64: Minor asm simplification
  x86/nmi/64: Use DF to avoid userspace RSP confusing nested NMI detection
  x86/nmi/64: Reorder nested NMI checks
  x86/nmi/64: Improve nested NMI comments
  x86/nmi/64: Switch stacks on userspace NMI entry
  x86/nmi/64: Remove asm code that saves CR2
  x86/nmi: Enable nested do_nmi() handling for 64-bit kernels
This commit is contained in:
Linus Torvalds
2015-07-18 10:49:57 -07:00
12 changed files with 379 additions and 217 deletions

View File

@@ -4,6 +4,8 @@
#include <asm/fpu/internal.h>
#include <asm/tlbflush.h>
#include <linux/sched.h>
/*
* Initialize the TS bit in CR0 according to the style of context-switches
* we are using:
@@ -136,6 +138,43 @@ static void __init fpu__init_system_generic(void)
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
/* Enforce that 'MEMBER' is the last field of 'TYPE': */
#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
/*
* We append the 'struct fpu' to the task_struct:
*/
static void __init fpu__init_task_struct_size(void)
{
int task_size = sizeof(struct task_struct);
/*
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
/*
* Add back the dynamically-calculated register state
* size.
*/
task_size += xstate_size;
/*
* We dynamically size 'struct fpu', so we require that
* it be at the end of 'thread_struct' and that
* 'thread_struct' be at the end of 'task_struct'. If
* you hit a compile error here, check the structure to
* see if something got added to the end.
*/
CHECK_MEMBER_AT_END_OF(struct fpu, state);
CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
arch_task_struct_size = task_size;
}
/*
* Set up the xstate_size based on the legacy FPU context size.
*
@@ -287,6 +326,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
fpu__init_system_generic();
fpu__init_system_xstate_size_legacy();
fpu__init_system_xstate();
fpu__init_task_struct_size();
fpu__init_system_ctx_switch();
}

View File

@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs)
NOKPROBE_SYMBOL(default_do_nmi);
/*
* NMIs can hit breakpoints which will cause it to lose its
* NMI context with the CPU when the breakpoint does an iret.
*/
#ifdef CONFIG_X86_32
/*
* For i386, NMIs use the same stack as the kernel, and we can
* add a workaround to the iret problem in C (preventing nested
* NMIs if an NMI takes a trap). Simply have 3 states the NMI
* can be in:
* NMIs can page fault or hit breakpoints which will cause it to lose
* its NMI context with the CPU when the breakpoint or page fault does an IRET.
*
* As a result, NMIs can nest if NMIs get unmasked due an IRET during
* NMI processing. On x86_64, the asm glue protects us from nested NMIs
* if the outer NMI came from kernel mode, but we can still nest if the
* outer NMI came from user mode.
*
* To handle these nested NMIs, we have three states:
*
* 1) not running
* 2) executing
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi);
* (Note, the latch is binary, thus multiple NMIs triggering,
* when one is running, are ignored. Only one NMI is restarted.)
*
* If an NMI hits a breakpoint that executes an iret, another
* NMI can preempt it. We do not want to allow this new NMI
* to run, but we want to execute it when the first one finishes.
* We set the state to "latched", and the exit of the first NMI will
* perform a dec_return, if the result is zero (NOT_RUNNING), then
* it will simply exit the NMI handler. If not, the dec_return
* would have set the state to NMI_EXECUTING (what we want it to
* be when we are running). In this case, we simply jump back
* to rerun the NMI handler again, and restart the 'latched' NMI.
* If an NMI executes an iret, another NMI can preempt it. We do not
* want to allow this new NMI to run, but we want to execute it when the
* first one finishes. We set the state to "latched", and the exit of
* the first NMI will perform a dec_return, if the result is zero
* (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
* dec_return would have set the state to NMI_EXECUTING (what we want it
* to be when we are running). In this case, we simply jump back to
* rerun the NMI handler again, and restart the 'latched' NMI.
*
* No trap (breakpoint or page fault) should be hit before nmi_restart,
* thus there is no race between the first check of state for NOT_RUNNING
@@ -461,49 +460,36 @@ enum nmi_states {
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
static DEFINE_PER_CPU(unsigned long, nmi_cr2);
#define nmi_nesting_preprocess(regs) \
do { \
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
this_cpu_write(nmi_state, NMI_LATCHED); \
return; \
} \
this_cpu_write(nmi_state, NMI_EXECUTING); \
this_cpu_write(nmi_cr2, read_cr2()); \
} while (0); \
nmi_restart:
#define nmi_nesting_postprocess() \
do { \
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \
write_cr2(this_cpu_read(nmi_cr2)); \
if (this_cpu_dec_return(nmi_state)) \
goto nmi_restart; \
} while (0)
#else /* x86_64 */
#ifdef CONFIG_X86_64
/*
* In x86_64 things are a bit more difficult. This has the same problem
* where an NMI hitting a breakpoint that calls iret will remove the
* NMI context, allowing a nested NMI to enter. What makes this more
* difficult is that both NMIs and breakpoints have their own stack.
* When a new NMI or breakpoint is executed, the stack is set to a fixed
* point. If an NMI is nested, it will have its stack set at that same
* fixed address that the first NMI had, and will start corrupting the
* stack. This is handled in entry_64.S, but the same problem exists with
* the breakpoint stack.
* In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without
* some care, the inner breakpoint will clobber the outer breakpoint's
* stack.
*
* If a breakpoint is being processed, and the debug stack is being used,
* if an NMI comes in and also hits a breakpoint, the stack pointer
* will be set to the same fixed address as the breakpoint that was
* interrupted, causing that stack to be corrupted. To handle this case,
* check if the stack that was interrupted is the debug stack, and if
* so, change the IDT so that new breakpoints will use the current stack
* and not switch to the fixed address. On return of the NMI, switch back
* to the original IDT.
* If a breakpoint is being processed, and the debug stack is being
* used, if an NMI comes in and also hits a breakpoint, the stack
* pointer will be set to the same fixed address as the breakpoint that
* was interrupted, causing that stack to be corrupted. To handle this
* case, check if the stack that was interrupted is the debug stack, and
* if so, change the IDT so that new breakpoints will use the current
* stack and not switch to the fixed address. On return of the NMI,
* switch back to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
#endif
static inline void nmi_nesting_preprocess(struct pt_regs *regs)
dotraplinkage notrace void
do_nmi(struct pt_regs *regs, long error_code)
{
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
this_cpu_write(nmi_state, NMI_LATCHED);
return;
}
this_cpu_write(nmi_state, NMI_EXECUTING);
this_cpu_write(nmi_cr2, read_cr2());
nmi_restart:
#ifdef CONFIG_X86_64
/*
* If we interrupted a breakpoint, it is possible that
* the nmi handler will have breakpoints too. We need to
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
debug_stack_set_zero();
this_cpu_write(update_debug_stack, 1);
}
}
static inline void nmi_nesting_postprocess(void)
{
if (unlikely(this_cpu_read(update_debug_stack))) {
debug_stack_reset();
this_cpu_write(update_debug_stack, 0);
}
}
#endif
dotraplinkage notrace void
do_nmi(struct pt_regs *regs, long error_code)
{
nmi_nesting_preprocess(regs);
nmi_enter();
inc_irq_stat(__nmi_count);
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code)
nmi_exit();
/* On i386, may loop back to preprocess */
nmi_nesting_postprocess();
#ifdef CONFIG_X86_64
if (unlikely(this_cpu_read(update_debug_stack))) {
debug_stack_reset();
this_cpu_write(update_debug_stack, 0);
}
#endif
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
}
NOKPROBE_SYMBOL(do_nmi);

View File

@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
memcpy(dst, src, arch_task_struct_size);
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}