Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 irq updates from Ingo Molnar:
 "Here are the main changes in this tree:

   - Introduce x86-64 IRQ/exception/debug stack guard pages to detect
     stack overflows immediately and deterministically.

   - Clean up over a decade worth of cruft accumulated.

  The outcome of this should be more clear-cut faults/crashes when any
  of the low level x86 CPU stacks overflow, instead of silent memory
  corruption and sporadic failures much later on"

* 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  x86/irq: Fix outdated comments
  x86/irq/64: Remove stack overflow debug code
  x86/irq/64: Remap the IRQ stack with guard pages
  x86/irq/64: Split the IRQ stack into its own pages
  x86/irq/64: Init hardirq_stack_ptr during CPU hotplug
  x86/irq/32: Handle irq stack allocation failure proper
  x86/irq/32: Invoke irq_ctx_init() from init_IRQ()
  x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr
  x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr
  x86/irq/32: Make irq stack a character array
  x86/irq/32: Define IRQ_STACK_SIZE
  x86/dumpstack/64: Speedup in_exception_stack()
  x86/exceptions: Split debug IST stack
  x86/exceptions: Enable IST guard pages
  x86/exceptions: Disconnect IST index and stack order
  x86/cpu: Remove orig_ist array
  x86/cpu: Prepare TSS.IST setup for guard pages
  x86/dumpstack/64: Use cpu_entry_area instead of orig_ist
  x86/irq/64: Use cpu entry area instead of orig_ist
  x86/traps: Use cpu_entry_area instead of orig_ist
  ...
This commit is contained in:
Linus Torvalds
2019-05-06 15:56:41 -07:00
33 changed files with 382 additions and 322 deletions

View File

@@ -7,6 +7,64 @@
#include <asm/processor.h>
#include <asm/intel_ds.h>
#ifdef CONFIG_X86_64
/* Macro to enforce the same ordering and stack sizes */
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
char DF_stack_guard[guardsize]; \
char DF_stack[EXCEPTION_STKSZ]; \
char NMI_stack_guard[guardsize]; \
char NMI_stack[EXCEPTION_STKSZ]; \
char DB2_stack_guard[guardsize]; \
char DB2_stack[db2_holesize]; \
char DB1_stack_guard[guardsize]; \
char DB1_stack[EXCEPTION_STKSZ]; \
char DB_stack_guard[guardsize]; \
char DB_stack[EXCEPTION_STKSZ]; \
char MCE_stack_guard[guardsize]; \
char MCE_stack[EXCEPTION_STKSZ]; \
char IST_top_guard[guardsize]; \
/* The exception stacks' physical storage. No guard pages required */
struct exception_stacks {
ESTACKS_MEMBERS(0, 0)
};
/* The effective cpu entry area mapping with guard pages. */
struct cea_exception_stacks {
ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
};
/*
* The exception stack ordering in [cea_]exception_stacks
*/
enum exception_stack_ordering {
ESTACK_DF,
ESTACK_NMI,
ESTACK_DB2,
ESTACK_DB1,
ESTACK_DB,
ESTACK_MCE,
N_EXCEPTION_STACKS
};
#define CEA_ESTACK_SIZE(st) \
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
#define CEA_ESTACK_BOT(ceastp, st) \
((unsigned long)&(ceastp)->st## _stack)
#define CEA_ESTACK_TOP(ceastp, st) \
(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
#define CEA_ESTACK_OFFS(st) \
offsetof(struct cea_exception_stacks, st## _stack)
#define CEA_ESTACK_PAGES \
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
#endif
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
@@ -32,12 +90,9 @@ struct cpu_entry_area {
#ifdef CONFIG_X86_64
/*
* Exception stacks used for IST entries.
*
* In the future, this should have a separate slot for each stack
* with guard pages between them.
* Exception stacks used for IST entries with guard pages.
*/
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
@@ -57,6 +112,7 @@ struct cpu_entry_area {
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
#define __this_cpu_ist_top_va(name) \
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
#endif

View File

@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
{
__this_cpu_dec(debug_stack_usage);
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }

View File

@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
return ((irq == 2) ? 9 : irq);
}
#ifdef CONFIG_X86_32
extern void irq_ctx_init(int cpu);
#else
# define irq_ctx_init(cpu) do { } while (0)
#endif
extern int irq_init_percpu_irqstack(unsigned int cpu);
#define __ARCH_HAS_DO_SOFTIRQ

View File

@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*

View File

@@ -22,11 +22,9 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 0
#define DEBUG_STACK 0
#define MCE_STACK 0
#define N_EXCEPTION_STACKS 1
#define IRQ_STACK_SIZE THREAD_SIZE
#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
/*

View File

@@ -14,22 +14,20 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 2
#define DEBUG_STACK 3
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
/*
* The index for the tss.ist[] array. The hardware limit is 7 entries.
*/
#define IST_INDEX_DF 0
#define IST_INDEX_NMI 1
#define IST_INDEX_DB 2
#define IST_INDEX_MCE 3
/*
* Set __PAGE_OFFSET to the most negative possible address +

View File

@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
#define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else
@@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
/*
* Save the original ist values for checking stack pointers during debugging
*/
struct orig_ist {
unsigned long ist[7];
};
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
char gs_base[40];
unsigned long stack_canary;
};
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
@@ -427,15 +421,8 @@ struct stack_canary {
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
/*
* per-CPU IRQ handling stacks
*/
struct irq_stack {
u32 stack[THREAD_SIZE/sizeof(u32)];
} __aligned(THREAD_SIZE);
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
/* Per CPU softirq stack pointer */
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */
extern unsigned int fpu_kernel_xstate_size;

View File

@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);

View File

@@ -13,7 +13,7 @@
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* irq_stack_union which contains stack_canary at offset 40. Userland
* fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc;
#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
this_cpu_write(irq_stack_union.stack_canary, canary);
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
#endif

View File

@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>
enum stack_type {