Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 irq updates from Ingo Molnar: "Here are the main changes in this tree: - Introduce x86-64 IRQ/exception/debug stack guard pages to detect stack overflows immediately and deterministically. - Clean up over a decade worth of cruft accumulated. The outcome of this should be more clear-cut faults/crashes when any of the low level x86 CPU stacks overflow, instead of silent memory corruption and sporadic failures much later on" * 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) x86/irq: Fix outdated comments x86/irq/64: Remove stack overflow debug code x86/irq/64: Remap the IRQ stack with guard pages x86/irq/64: Split the IRQ stack into its own pages x86/irq/64: Init hardirq_stack_ptr during CPU hotplug x86/irq/32: Handle irq stack allocation failure proper x86/irq/32: Invoke irq_ctx_init() from init_IRQ() x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr x86/irq/32: Make irq stack a character array x86/irq/32: Define IRQ_STACK_SIZE x86/dumpstack/64: Speedup in_exception_stack() x86/exceptions: Split debug IST stack x86/exceptions: Enable IST guard pages x86/exceptions: Disconnect IST index and stack order x86/cpu: Remove orig_ist array x86/cpu: Prepare TSS.IST setup for guard pages x86/dumpstack/64: Use cpu_entry_area instead of orig_ist x86/irq/64: Use cpu entry area instead of orig_ist x86/traps: Use cpu_entry_area instead of orig_ist ...
This commit is contained in:
@@ -7,6 +7,64 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel_ds.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/* Macro to enforce the same ordering and stack sizes */
|
||||
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
|
||||
char DF_stack_guard[guardsize]; \
|
||||
char DF_stack[EXCEPTION_STKSZ]; \
|
||||
char NMI_stack_guard[guardsize]; \
|
||||
char NMI_stack[EXCEPTION_STKSZ]; \
|
||||
char DB2_stack_guard[guardsize]; \
|
||||
char DB2_stack[db2_holesize]; \
|
||||
char DB1_stack_guard[guardsize]; \
|
||||
char DB1_stack[EXCEPTION_STKSZ]; \
|
||||
char DB_stack_guard[guardsize]; \
|
||||
char DB_stack[EXCEPTION_STKSZ]; \
|
||||
char MCE_stack_guard[guardsize]; \
|
||||
char MCE_stack[EXCEPTION_STKSZ]; \
|
||||
char IST_top_guard[guardsize]; \
|
||||
|
||||
/* The exception stacks' physical storage. No guard pages required */
|
||||
struct exception_stacks {
|
||||
ESTACKS_MEMBERS(0, 0)
|
||||
};
|
||||
|
||||
/* The effective cpu entry area mapping with guard pages. */
|
||||
struct cea_exception_stacks {
|
||||
ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
|
||||
};
|
||||
|
||||
/*
|
||||
* The exception stack ordering in [cea_]exception_stacks
|
||||
*/
|
||||
enum exception_stack_ordering {
|
||||
ESTACK_DF,
|
||||
ESTACK_NMI,
|
||||
ESTACK_DB2,
|
||||
ESTACK_DB1,
|
||||
ESTACK_DB,
|
||||
ESTACK_MCE,
|
||||
N_EXCEPTION_STACKS
|
||||
};
|
||||
|
||||
#define CEA_ESTACK_SIZE(st) \
|
||||
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
|
||||
|
||||
#define CEA_ESTACK_BOT(ceastp, st) \
|
||||
((unsigned long)&(ceastp)->st## _stack)
|
||||
|
||||
#define CEA_ESTACK_TOP(ceastp, st) \
|
||||
(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
|
||||
|
||||
#define CEA_ESTACK_OFFS(st) \
|
||||
offsetof(struct cea_exception_stacks, st## _stack)
|
||||
|
||||
#define CEA_ESTACK_PAGES \
|
||||
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||
* and early entry/exit code. Real types aren't used for all fields here
|
||||
@@ -32,12 +90,9 @@ struct cpu_entry_area {
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
* Exception stacks used for IST entries with guard pages.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
struct cea_exception_stacks estacks;
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
/*
|
||||
@@ -57,6 +112,7 @@ struct cpu_entry_area {
|
||||
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||
@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
||||
}
|
||||
|
||||
#define __this_cpu_ist_top_va(name) \
|
||||
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
|
||||
|
||||
#endif
|
||||
|
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
|
||||
{
|
||||
__this_cpu_dec(debug_stack_usage);
|
||||
}
|
||||
int is_debug_stack(unsigned long addr);
|
||||
void debug_stack_set_zero(void);
|
||||
void debug_stack_reset(void);
|
||||
#else /* !X86_64 */
|
||||
static inline int is_debug_stack(unsigned long addr) { return 0; }
|
||||
static inline void debug_stack_set_zero(void) { }
|
||||
static inline void debug_stack_reset(void) { }
|
||||
static inline void debug_stack_usage_inc(void) { }
|
||||
|
@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
|
||||
return ((irq == 2) ? 9 : irq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
extern void irq_ctx_init(int cpu);
|
||||
#else
|
||||
# define irq_ctx_init(cpu) do { } while (0)
|
||||
#endif
|
||||
extern int irq_init_percpu_irqstack(unsigned int cpu);
|
||||
|
||||
#define __ARCH_HAS_DO_SOFTIRQ
|
||||
|
||||
|
@@ -18,8 +18,8 @@
|
||||
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
||||
* Vectors 32 ... 127 : device interrupts
|
||||
* Vector 128 : legacy int80 syscall interface
|
||||
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
|
||||
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
||||
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
|
||||
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
|
||||
*
|
||||
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
|
||||
*
|
||||
|
@@ -22,11 +22,9 @@
|
||||
#define THREAD_SIZE_ORDER 1
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
|
||||
#define DOUBLEFAULT_STACK 1
|
||||
#define NMI_STACK 0
|
||||
#define DEBUG_STACK 0
|
||||
#define MCE_STACK 0
|
||||
#define N_EXCEPTION_STACKS 1
|
||||
#define IRQ_STACK_SIZE THREAD_SIZE
|
||||
|
||||
#define N_EXCEPTION_STACKS 1
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/*
|
||||
|
@@ -14,22 +14,20 @@
|
||||
|
||||
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
#define CURRENT_MASK (~(THREAD_SIZE - 1))
|
||||
|
||||
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
|
||||
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
|
||||
|
||||
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
|
||||
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
|
||||
|
||||
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
|
||||
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
|
||||
|
||||
#define DOUBLEFAULT_STACK 1
|
||||
#define NMI_STACK 2
|
||||
#define DEBUG_STACK 3
|
||||
#define MCE_STACK 4
|
||||
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
|
||||
/*
|
||||
* The index for the tss.ist[] array. The hardware limit is 7 entries.
|
||||
*/
|
||||
#define IST_INDEX_DF 0
|
||||
#define IST_INDEX_NMI 1
|
||||
#define IST_INDEX_DB 2
|
||||
#define IST_INDEX_MCE 3
|
||||
|
||||
/*
|
||||
* Set __PAGE_OFFSET to the most negative possible address +
|
||||
|
@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
|
||||
#define __KERNEL_TSS_LIMIT \
|
||||
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
|
||||
|
||||
/* Per CPU interrupt stacks */
|
||||
struct irq_stack {
|
||||
char stack[IRQ_STACK_SIZE];
|
||||
} __aligned(IRQ_STACK_SIZE);
|
||||
|
||||
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#else
|
||||
@@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save the original ist values for checking stack pointers during debugging
|
||||
*/
|
||||
struct orig_ist {
|
||||
unsigned long ist[7];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
union irq_stack_union {
|
||||
char irq_stack[IRQ_STACK_SIZE];
|
||||
struct fixed_percpu_data {
|
||||
/*
|
||||
* GCC hardcodes the stack canary as %gs:40. Since the
|
||||
* irq_stack is the object at %gs:0, we reserve the bottom
|
||||
* 48 bytes of the irq stack for the canary.
|
||||
*/
|
||||
struct {
|
||||
char gs_base[40];
|
||||
unsigned long stack_canary;
|
||||
};
|
||||
char gs_base[40];
|
||||
unsigned long stack_canary;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
|
||||
DECLARE_INIT_PER_CPU(irq_stack_union);
|
||||
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
|
||||
DECLARE_INIT_PER_CPU(fixed_percpu_data);
|
||||
|
||||
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
|
||||
{
|
||||
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
|
||||
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
||||
DECLARE_PER_CPU(unsigned int, irq_count);
|
||||
extern asmlinkage void ignore_sysret(void);
|
||||
|
||||
@@ -427,15 +421,8 @@ struct stack_canary {
|
||||
};
|
||||
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
||||
#endif
|
||||
/*
|
||||
* per-CPU IRQ handling stacks
|
||||
*/
|
||||
struct irq_stack {
|
||||
u32 stack[THREAD_SIZE/sizeof(u32)];
|
||||
} __aligned(THREAD_SIZE);
|
||||
|
||||
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
|
||||
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
|
||||
/* Per CPU softirq stack pointer */
|
||||
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
|
||||
#endif /* X86_64 */
|
||||
|
||||
extern unsigned int fpu_kernel_xstate_size;
|
||||
|
@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
|
||||
void native_smp_prepare_cpus(unsigned int max_cpus);
|
||||
void calculate_max_logical_packages(void);
|
||||
void native_smp_cpus_done(unsigned int max_cpus);
|
||||
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_disable(void);
|
||||
int common_cpu_die(unsigned int cpu);
|
||||
|
@@ -13,7 +13,7 @@
|
||||
* On x86_64, %gs is shared by percpu area and stack canary. All
|
||||
* percpu symbols are zero based and %gs points to the base of percpu
|
||||
* area. The first occupant of the percpu area is always
|
||||
* irq_stack_union which contains stack_canary at offset 40. Userland
|
||||
* fixed_percpu_data which contains stack_canary at offset 40. Userland
|
||||
* %gs is always saved and restored on kernel entry and exit using
|
||||
* swapgs, so stack protector doesn't add any complexity there.
|
||||
*
|
||||
@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||
u64 tsc;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
|
||||
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
|
||||
#endif
|
||||
/*
|
||||
* We both use the random pool and the current TSC as a source
|
||||
@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||
|
||||
current->stack_canary = canary;
|
||||
#ifdef CONFIG_X86_64
|
||||
this_cpu_write(irq_stack_union.stack_canary, canary);
|
||||
this_cpu_write(fixed_percpu_data.stack_canary, canary);
|
||||
#else
|
||||
this_cpu_write(stack_canary.canary, canary);
|
||||
#endif
|
||||
|
@@ -9,6 +9,8 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ptrace.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
enum stack_type {
|
||||
|
Reference in New Issue
Block a user