Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull low-level x86 updates from Ingo Molnar:
 "In this cycle this topic tree has become one of those 'super topics'
  that accumulated a lot of changes:

   - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on
     x86 - preceded by an array of changes. v4.8 saw preparatory changes
     in this area already - this is the rest of the work. Includes the
     thread stack caching performance optimization. (Andy Lutomirski)

   - switch_to() cleanups and all around enhancements. (Brian Gerst)

   - A large number of dumpstack infrastructure enhancements and an
     unwinder abstraction. The secret long term plan is safe(r) live
     patching plus maybe another attempt at debuginfo based unwinding -
     but all these current bits are standalone enhancements in a frame
     pointer based debug environment as well. (Josh Poimboeuf)

   - More __ro_after_init and const annotations. (Kees Cook)

   - Enable KASLR for the vmemmap memory region. (Thomas Garnier)"

[ The virtually mapped stack changes are pretty fundamental, and not
  x86-specific per se, even if they are only used on x86 right now. ]

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
  x86/asm: Get rid of __read_cr4_safe()
  thread_info: Use unsigned long for flags
  x86/alternatives: Add stack frame dependency to alternative_call_2()
  x86/dumpstack: Fix show_stack() task pointer regression
  x86/dumpstack: Remove dump_trace() and related callbacks
  x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder
  oprofile/x86: Convert x86_backtrace() to use the new unwinder
  x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder
  perf/x86: Convert perf_callchain_kernel() to use the new unwinder
  x86/unwind: Add new unwind interface and implementations
  x86/dumpstack: Remove NULL task pointer convention
  fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y
  sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
  lib/syscall: Pin the task stack in collect_syscall()
  x86/process: Pin the target stack in get_wchan()
  x86/dumpstack: Pin the target stack when dumping it
  kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function
  sched/core: Add try_get_task_stack() and put_task_stack()
  x86/entry/64: Fix a minor comment rebase error
  iommu/amd: Don't put completion-wait semaphore on stack
  ...
This commit is contained in:
Linus Torvalds
2016-10-03 16:13:28 -07:00
114 changed files with 1751 additions and 1137 deletions

View File

@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o
else
obj-y += unwind_guess.o
endif
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)

View File

@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_table(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());

View File

@@ -25,7 +25,7 @@
static struct apic apic_physflat;
static struct apic apic_flat;
struct apic __read_mostly *apic = &apic_flat;
struct apic *apic __ro_after_init = &apic_flat;
EXPORT_SYMBOL_GPL(apic);
static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -154,7 +154,7 @@ static int flat_probe(void)
return 1;
}
static struct apic apic_flat = {
static struct apic apic_flat __ro_after_init = {
.name = "flat",
.probe = flat_probe,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -248,7 +248,7 @@ static int physflat_probe(void)
return 0;
}
static struct apic apic_physflat = {
static struct apic apic_physflat __ro_after_init = {
.name = "physical flat",
.probe = physflat_probe,

View File

@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
struct apic apic_noop = {
struct apic apic_noop __ro_after_init = {
.name = "noop",
.probe = noop_probe,
.acpi_madt_oem_check = NULL,

View File

@@ -142,7 +142,7 @@ static int probe_bigsmp(void)
return dmi_bigsmp;
}
static struct apic apic_bigsmp = {
static struct apic apic_bigsmp __ro_after_init = {
.name = "bigsmp",
.probe = probe_bigsmp,

View File

@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
}
static struct irq_chip hpet_msi_controller = {
static struct irq_chip hpet_msi_controller __ro_after_init = {
.name = "HPET-MSI",
.irq_unmask = hpet_msi_unmask,
.irq_mask = hpet_msi_mask,

View File

@@ -72,7 +72,7 @@ static int probe_default(void)
return 1;
}
static struct apic apic_default = {
static struct apic apic_default __ro_after_init = {
.name = "default",
.probe = probe_default,
@@ -126,7 +126,7 @@ static struct apic apic_default = {
apic_driver(apic_default);
struct apic *apic = &apic_default;
struct apic *apic __ro_after_init = &apic_default;
EXPORT_SYMBOL_GPL(apic);
static int cmdline_apic __initdata;

View File

@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
}
static struct apic apic_x2apic_cluster = {
static struct apic apic_x2apic_cluster __ro_after_init = {
.name = "cluster x2apic",
.probe = x2apic_cluster_probe,

View File

@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void)
return apic == &apic_x2apic_phys;
}
static struct apic apic_x2apic_phys = {
static struct apic apic_x2apic_phys __ro_after_init = {
.name = "physical x2apic",
.probe = x2apic_phys_probe,

View File

@@ -560,7 +560,7 @@ static int uv_probe(void)
return apic == &apic_x2apic_uv_x;
}
static struct apic __refdata apic_x2apic_uv_x = {
static struct apic apic_x2apic_uv_x __ro_after_init = {
.name = "UV large system",
.probe = uv_probe,

View File

@@ -29,10 +29,13 @@
void common(void) {
BLANK();
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
OFFSET(TASK_threadsp, task_struct, thread.sp);
#ifdef CONFIG_CC_STACKPROTECTOR
OFFSET(TASK_stack_canary, task_struct, stack_canary);
#endif
BLANK();
OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();

View File

@@ -57,6 +57,11 @@ void foo(void)
/* Size of SYSENTER_stack */
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
#ifdef CONFIG_CC_STACKPROTECTOR
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
#endif
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);

View File

@@ -56,6 +56,11 @@ int main(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK();
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
BLANK();
#endif
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));

View File

@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) debug_idt_table };
struct desc_ptr idt_descr __ro_after_init = {
.size = NR_VECTORS * 16 - 1,
.address = (unsigned long) idt_table,
};
const struct desc_ptr debug_idt_descr = {
.size = NR_VECTORS * 16 - 1,
.address = (unsigned long) debug_idt_table,
};
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
/*
* LSTAR and STAR live in a bit strange symbiosis.
* They both write to the same internal register. STAR allows to
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);

View File

@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
static bool mtrr_aps_delayed_init;
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
const struct mtrr_ops *mtrr_if;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void set_mtrr_ops(const struct mtrr_ops *ops)
void __init set_mtrr_ops(const struct mtrr_ops *ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
mtrr_ops[ops->vendor] = ops;

View File

@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index,
bool get_mtrr_state(void);
void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
extern u64 size_or_mask, size_and_mask;
extern const struct mtrr_ops *mtrr_if;

View File

@@ -17,7 +17,7 @@
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
static void printk_stack_address(unsigned long address, int reliable,
void *data)
bool in_task_stack(unsigned long *stack, struct task_struct *task,
struct stack_info *info)
{
unsigned long *begin = task_stack_page(task);
unsigned long *end = task_stack_page(task) + THREAD_SIZE;
if (stack < begin || stack >= end)
return false;
info->type = STACK_TYPE_TASK;
info->begin = begin;
info->end = end;
info->next_sp = NULL;
return true;
}
static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl)
{
touch_nmi_watchdog();
printk("%s [<%p>] %s%pB\n",
(char *)data, (void *)address, reliable ? "" : "? ",
log_lvl, (void *)address, reliable ? "" : "? ",
(void *)address);
}
@@ -38,176 +56,120 @@ void printk_address(unsigned long address)
pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void
print_ftrace_graph_addr(unsigned long addr, void *data,
const struct stacktrace_ops *ops,
struct task_struct *task, int *graph)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
unsigned long ret_addr;
int index;
struct unwind_state state;
struct stack_info stack_info = {0};
unsigned long visit_mask = 0;
int graph_idx = 0;
if (addr != (unsigned long)return_to_handler)
return;
printk("%sCall Trace:\n", log_lvl);
index = task->curr_ret_stack;
if (!task->ret_stack || index < *graph)
return;
index -= *graph;
ret_addr = task->ret_stack[index].ret;
ops->address(data, ret_addr, 1);
(*graph)++;
}
#else
static inline void
print_ftrace_graph_addr(unsigned long addr, void *data,
const struct stacktrace_ops *ops,
struct task_struct *task, int *graph)
{ }
#endif
/*
* x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
static inline int valid_stack_ptr(struct task_struct *task,
void *p, unsigned int size, void *end)
{
void *t = task_stack_page(task);
if (end) {
if (p < end && p >= (end-THREAD_SIZE))
return 1;
else
return 0;
}
return p >= t && p < t + THREAD_SIZE - size;
}
unsigned long
print_context_stack(struct task_struct *task,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end, int *graph)
{
struct stack_frame *frame = (struct stack_frame *)bp;
unwind_start(&state, task, regs, stack);
/*
* If we overflowed the stack into a guard page, jump back to the
* bottom of the usable stack.
* Iterate through the stacks, starting with the current stack pointer.
* Each stack has a pointer to the next one.
*
* x86-64 can have several stacks:
* - task stack
* - interrupt stack
* - HW exception stacks (double fault, nmi, debug, mce)
*
* x86-32 can have up to three stacks:
* - task stack
* - softirq stack
* - hardirq stack
*/
if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
PAGE_SIZE)
stack = (unsigned long *)task_stack_page(task);
for (; stack; stack = stack_info.next_sp) {
const char *str_begin, *str_end;
while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
unsigned long addr;
/*
* If we overflowed the task stack into a guard page, jump back
* to the bottom of the usable stack.
*/
if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
stack = task_stack_page(task);
addr = *stack;
if (__kernel_text_address(addr)) {
if ((unsigned long) stack == bp + sizeof(long)) {
ops->address(data, addr, 1);
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
ops->address(data, addr, 0);
}
print_ftrace_graph_addr(addr, data, ops, task, graph);
if (get_stack_info(stack, task, &stack_info, &visit_mask))
break;
stack_type_str(stack_info.type, &str_begin, &str_end);
if (str_begin)
printk("%s <%s> ", log_lvl, str_begin);
/*
* Scan the stack, printing any text addresses we find. At the
* same time, follow proper stack frames with the unwinder.
*
* Addresses found during the scan which are not reported by
* the unwinder are considered to be additional clues which are
* sometimes useful for debugging and are prefixed with '?'.
* This also serves as a failsafe option in case the unwinder
* goes off in the weeds.
*/
for (; stack < stack_info.end; stack++) {
unsigned long real_addr;
int reliable = 0;
unsigned long addr = *stack;
unsigned long *ret_addr_p =
unwind_get_return_address_ptr(&state);
if (!__kernel_text_address(addr))
continue;
if (stack == ret_addr_p)
reliable = 1;
/*
* When function graph tracing is enabled for a
* function, its return address on the stack is
* replaced with the address of an ftrace handler
* (return_to_handler). In that case, before printing
* the "real" address, we want to print the handler
* address as an "unreliable" hint that function graph
* tracing was involved.
*/
real_addr = ftrace_graph_ret_addr(task, &graph_idx,
addr, stack);
if (real_addr != addr)
printk_stack_address(addr, 0, log_lvl);
printk_stack_address(real_addr, reliable, log_lvl);
if (!reliable)
continue;
/*
* Get the next frame from the unwinder. No need to
* check for an error: if anything goes wrong, the rest
* of the addresses will just be printed as unreliable.
*/
unwind_next_frame(&state);
}
stack++;
if (str_end)
printk("%s <%s> ", log_lvl, str_end);
}
return bp;
}
EXPORT_SYMBOL_GPL(print_context_stack);
unsigned long
print_context_stack_bp(struct task_struct *task,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data,
unsigned long *end, int *graph)
{
struct stack_frame *frame = (struct stack_frame *)bp;
unsigned long *ret_addr = &frame->return_address;
while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
unsigned long addr = *ret_addr;
if (!__kernel_text_address(addr))
break;
if (ops->address(data, addr, 1))
break;
frame = frame->next_frame;
ret_addr = &frame->return_address;
print_ftrace_graph_addr(addr, data, ops, task, graph);
}
return (unsigned long)frame;
}
EXPORT_SYMBOL_GPL(print_context_stack_bp);
static int print_trace_stack(void *data, char *name)
{
printk("%s <%s> ", (char *)data, name);
return 0;
}
/*
* Print one address/symbol entries per line.
*/
static int print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk_stack_address(addr, reliable, data);
return 0;
}
static const struct stacktrace_ops print_trace_ops = {
.stack = print_trace_stack,
.address = print_trace_address,
.walk_stack = print_context_stack,
};
void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl)
{
printk("%sCall Trace:\n", log_lvl);
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp)
{
show_trace_log_lvl(task, regs, stack, bp, "");
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
unsigned long bp = 0;
unsigned long stack;
task = task ? : current;
/*
* Stack frames below this one aren't interesting. Don't show them
* if we're printing for %current.
*/
if (!sp && (!task || task == current)) {
sp = &stack;
bp = stack_frame(current, NULL);
}
if (!sp && task == current)
sp = get_stack_pointer(current, NULL);
show_stack_log_lvl(task, NULL, sp, bp, "");
show_stack_log_lvl(task, NULL, sp, "");
}
void show_stack_regs(struct pt_regs *regs)
{
show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
show_stack_log_lvl(current, regs, NULL, "");
}
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;

View File

@@ -16,93 +16,121 @@
#include <asm/stacktrace.h>
static void *is_irq_stack(void *p, void *irq)
void stack_type_str(enum stack_type type, const char **begin, const char **end)
{
if (p < irq || p >= (irq + THREAD_SIZE))
return NULL;
return irq + THREAD_SIZE;
switch (type) {
case STACK_TYPE_IRQ:
case STACK_TYPE_SOFTIRQ:
*begin = "IRQ";
*end = "EOI";
break;
default:
*begin = NULL;
*end = NULL;
}
}
static void *is_hardirq_stack(unsigned long *stack, int cpu)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
void *irq = per_cpu(hardirq_stack, cpu);
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
return is_irq_stack(stack, irq);
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
info->begin = begin;
info->end = end;
/*
* See irq_32.c -- the next stack pointer is stored at the beginning of
* the stack.
*/
info->next_sp = (unsigned long *)*begin;
return true;
}
static void *is_softirq_stack(unsigned long *stack, int cpu)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
void *irq = per_cpu(softirq_stack, cpu);
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
return is_irq_stack(stack, irq);
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
info->begin = begin;
info->end = end;
/*
* The next stack pointer is stored at the beginning of the stack.
* See irq_32.c.
*/
info->next_sp = (unsigned long *)*begin;
return true;
}
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask)
{
const unsigned cpu = get_cpu();
int graph = 0;
u32 *prev_esp;
if (!stack)
goto unknown;
if (!task)
task = current;
task = task ? : current;
if (!stack) {
unsigned long dummy;
if (in_task_stack(stack, task, info))
goto recursion_check;
stack = &dummy;
if (task != current)
stack = (unsigned long *)task->thread.sp;
if (task != current)
goto unknown;
if (in_hardirq_stack(stack, info))
goto recursion_check;
if (in_softirq_stack(stack, info))
goto recursion_check;
goto unknown;
recursion_check:
/*
* Make sure we don't iterate through any given stack more than once.
* If it comes up a second time then there's something wrong going on:
* just break out and report an unknown stack type.
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type))
goto unknown;
*visit_mask |= 1UL << info->type;
}
if (!bp)
bp = stack_frame(task, regs);
return 0;
for (;;) {
void *end_stack;
end_stack = is_hardirq_stack(stack, cpu);
if (!end_stack)
end_stack = is_softirq_stack(stack, cpu);
bp = ops->walk_stack(task, stack, bp, ops, data,
end_stack, &graph);
/* Stop if not on irq stack */
if (!end_stack)
break;
/* The previous esp is saved on the bottom of the stack */
prev_esp = (u32 *)(end_stack - THREAD_SIZE);
stack = (unsigned long *)*prev_esp;
if (!stack)
break;
if (ops->stack(data, "IRQ") < 0)
break;
touch_nmi_watchdog();
}
put_cpu();
unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *stack;
int i;
if (sp == NULL) {
if (regs)
sp = (unsigned long *)regs->sp;
else if (task)
sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
}
if (!try_get_task_stack(task))
return;
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
touch_nmi_watchdog();
}
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs)
u8 *ip;
pr_emerg("Stack:\n");
show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
pr_emerg("Code:");

View File

@@ -16,261 +16,145 @@
#include <asm/stacktrace.h>
#define N_EXCEPTION_STACKS_END \
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
static char x86_stack_ids[][8] = {
[ DEBUG_STACK-1 ] = "#DB",
[ NMI_STACK-1 ] = "NMI",
[ DOUBLEFAULT_STACK-1 ] = "#DF",
[ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[ N_EXCEPTION_STACKS ...
N_EXCEPTION_STACKS_END ] = "#DB[?]"
#endif
static char *exception_stack_names[N_EXCEPTION_STACKS] = {
[ DOUBLEFAULT_STACK-1 ] = "#DF",
[ NMI_STACK-1 ] = "NMI",
[ DEBUG_STACK-1 ] = "#DB",
[ MCE_STACK-1 ] = "#MC",
};
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
void stack_type_str(enum stack_type type, const char **begin, const char **end)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
switch (type) {
case STACK_TYPE_IRQ:
*begin = "IRQ";
*end = "EOI";
break;
case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
*end = "EOE";
break;
default:
*begin = NULL;
*end = NULL;
}
}
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *begin, *end;
struct pt_regs *regs;
unsigned k;
/*
* Iterate over all exception stacks, and figure out whether
* 'stack' is in one of them:
*/
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
unsigned long end = per_cpu(orig_ist, cpu).ist[k];
/*
* Is 'stack' above this exception frame's end?
* If yes then skip to the next frame.
*/
if (stack >= end)
end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
begin = end - (exception_stack_sizes[k] / sizeof(long));
regs = (struct pt_regs *)end - 1;
if (stack < begin || stack >= end)
continue;
/*
* Is 'stack' above this exception frame's start address?
* If yes then we found the right frame.
*/
if (stack >= end - EXCEPTION_STKSZ) {
/*
* Make sure we only iterate through an exception
* stack once. If it comes up for the second time
* then there's something wrong going on - just
* break out and return NULL:
*/
if (*usedp & (1U << k))
break;
*usedp |= 1U << k;
*idp = x86_stack_ids[k];
return (unsigned long *)end;
}
/*
* If this is a debug stack, and if it has a larger size than
* the usual exception stacks, then 'stack' might still
* be within the lower portion of the debug stack:
*/
#if DEBUG_STKSZ > EXCEPTION_STKSZ
if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
unsigned j = N_EXCEPTION_STACKS - 1;
/*
* Black magic. A large debug stack is composed of
* multiple exception stack entries, which we
* iterate through now. Dont look:
*/
do {
++j;
end -= EXCEPTION_STKSZ;
x86_stack_ids[j][4] = '1' +
(j - N_EXCEPTION_STACKS);
} while (stack < end - EXCEPTION_STKSZ);
if (*usedp & (1U << j))
break;
*usedp |= 1U << j;
*idp = x86_stack_ids[j];
return (unsigned long *)end;
}
#endif
}
return NULL;
}
info->type = STACK_TYPE_EXCEPTION + k;
info->begin = begin;
info->end = end;
info->next_sp = (unsigned long *)regs->sp;
static inline int
in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
unsigned long *irq_stack_end)
{
return (stack >= irq_stack && stack < irq_stack_end);
}
static const unsigned long irq_stack_size =
(IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
enum stack_type {
STACK_IS_UNKNOWN,
STACK_IS_NORMAL,
STACK_IS_EXCEPTION,
STACK_IS_IRQ,
};
static enum stack_type
analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
unsigned long **stack_end, unsigned long *irq_stack,
unsigned *used, char **id)
{
unsigned long addr;
addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
if ((unsigned long)task_stack_page(task) == addr)
return STACK_IS_NORMAL;
*stack_end = in_exception_stack(cpu, (unsigned long)stack,
used, id);
if (*stack_end)
return STACK_IS_EXCEPTION;
if (!irq_stack)
return STACK_IS_NORMAL;
*stack_end = irq_stack;
irq_stack = irq_stack - irq_stack_size;
if (in_irq_stack(stack, irq_stack, *stack_end))
return STACK_IS_IRQ;
return STACK_IS_UNKNOWN;
}
/*
* x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned long dummy;
unsigned used = 0;
int graph = 0;
int done = 0;
if (!task)
task = current;
if (!stack) {
if (regs)
stack = (unsigned long *)regs->sp;
else if (task != current)
stack = (unsigned long *)task->thread.sp;
else
stack = &dummy;
return true;
}
if (!bp)
bp = stack_frame(task, regs);
/*
* Print function call entries in all stacks, starting at the
* current stack address. If the stacks consist of nested
* exceptions
*/
while (!done) {
unsigned long *stack_end;
enum stack_type stype;
char *id;
return false;
}
stype = analyze_stack(cpu, task, stack, &stack_end,
irq_stack, &used, &id);
/* Default finish unless specified to continue */
done = 1;
switch (stype) {
/* Break out early if we are on the thread stack */
case STACK_IS_NORMAL:
break;
case STACK_IS_EXCEPTION:
if (ops->stack(data, id) < 0)
break;
bp = ops->walk_stack(task, stack, bp, ops,
data, stack_end, &graph);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
* second-to-last pointer (index -2 to end) in the
* exception stack:
*/
stack = (unsigned long *) stack_end[-2];
done = 0;
break;
case STACK_IS_IRQ:
if (ops->stack(data, "IRQ") < 0)
break;
bp = ops->walk_stack(task, stack, bp,
ops, data, stack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (stack_end[-1]);
irq_stack = NULL;
ops->stack(data, "EOI");
done = 0;
break;
case STACK_IS_UNKNOWN:
ops->stack(data, "UNK");
break;
}
}
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* This handles the process stack:
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
put_cpu();
}
EXPORT_SYMBOL(dump_trace);
if (stack < begin || stack > end)
return false;
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
info->type = STACK_TYPE_IRQ;
info->begin = begin;
info->end = end;
/*
* The next stack pointer is the first thing pushed by the entry code
* after switching to the irq stack.
*/
info->next_sp = (unsigned long *)*(end - 1);
return true;
}
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask)
{
if (!stack)
goto unknown;
task = task ? : current;
if (in_task_stack(stack, task, info))
goto recursion_check;
if (task != current)
goto unknown;
if (in_exception_stack(stack, info))
goto recursion_check;
if (in_irq_stack(stack, info))
goto recursion_check;
goto unknown;
recursion_check:
/*
* Make sure we don't iterate through any given stack more than once.
* If it comes up a second time then there's something wrong going on:
* just break out and report an unknown stack type.
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type))
goto unknown;
*visit_mask |= 1UL << info->type;
}
return 0;
unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
unsigned long *stack;
int cpu;
int i;
preempt_disable();
cpu = smp_processor_id();
if (!try_get_task_stack(task))
return;
irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
/*
* Debugging aid: "show_stack(NULL, NULL);" prints the
* back trace for this cpu:
*/
if (sp == NULL) {
if (regs)
sp = (unsigned long *)regs->sp;
else if (task)
sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
}
sp = sp ? : get_stack_pointer(task, regs);
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
stack++;
touch_nmi_watchdog();
}
preempt_enable();
pr_cont("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
show_trace_log_lvl(task, regs, sp, log_lvl);
put_task_stack(task);
}
void show_regs(struct pt_regs *regs)
{
int i;
unsigned long sp;
sp = regs->sp;
show_regs_print_info(KERN_DEFAULT);
__show_regs(regs, 1);
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs)
u8 *ip;
printk(KERN_DEFAULT "Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
0, KERN_DEFAULT);
show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
printk(KERN_DEFAULT "Code: ");

View File

@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0;
WARN_ON_FPU(current->thread.fpu.fpstate_active);
current_thread_info()->status = 0;
if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
eagerfpu = ENABLE;

View File

@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
}
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer) == -EBUSY) {
frame_pointer, parent) == -EBUSY) {
*parent = old;
return;
}

View File

@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
*/
__HEAD
ENTRY(startup_32)
movl pa(stack_start),%ecx
movl pa(initial_stack),%ecx
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
* start_secondary().
*/
ENTRY(start_cpu0)
movl stack_start, %ecx
movl initial_stack, %ecx
movl %ecx, %esp
jmp *(initial_code)
ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
movl pa(stack_start),%ecx
movl pa(initial_stack),%ecx
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
@@ -703,7 +703,7 @@ ENTRY(initial_page_table)
.data
.balign 4
ENTRY(stack_start)
ENTRY(initial_stack)
.long init_thread_union+THREAD_SIZE
__INITRODATA

View File

@@ -66,7 +66,7 @@ startup_64:
*/
/*
* Setup stack for verify_cpu(). "-8" because stack_start is defined
* Setup stack for verify_cpu(). "-8" because initial_stack is defined
* this way, see below. Our best guess is a NULL ptr for stack
* termination heuristics and we don't want to break anything which
* might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
movq %rax, %cr0
/* Setup a boot time stack */
movq stack_start(%rip), %rsp
movq initial_stack(%rip), %rsp
/* zero EFLAGS after setting rsp */
pushq $0
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
* start_secondary().
*/
ENTRY(start_cpu0)
movq stack_start(%rip),%rsp
movq initial_stack(%rip),%rsp
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
pushq $__KERNEL_CS # set correct cs
@@ -319,17 +319,15 @@ ENTRY(start_cpu0)
ENDPROC(start_cpu0)
#endif
/* SMP bootup changes these two */
/* Both SMP bootup and ACPI suspend change these variables */
__REFDATA
.balign 8
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
GLOBAL(stack_start)
GLOBAL(initial_stack)
.quad init_thread_union+THREAD_SIZE-8
.word 0
__FINITDATA
bad_address:

View File

@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
if (user_mode(regs))
return;
if (regs->sp >= curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;

View File

@@ -50,6 +50,7 @@
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
gdb_regs[GDB_DX] = 0;
gdb_regs[GDB_SI] = 0;
gdb_regs[GDB_DI] = 0;
gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp;
gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
#ifdef CONFIG_X86_32
gdb_regs[GDB_DS] = __KERNEL_DS;
gdb_regs[GDB_ES] = __KERNEL_DS;
gdb_regs[GDB_PS] = 0;
gdb_regs[GDB_CS] = __KERNEL_CS;
gdb_regs[GDB_PC] = p->thread.ip;
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
#else
gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
gdb_regs32[GDB_PS] = 0;
gdb_regs32[GDB_CS] = __KERNEL_CS;
gdb_regs32[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_PC] = 0;
gdb_regs[GDB_R8] = 0;
gdb_regs[GDB_R9] = 0;
gdb_regs[GDB_R10] = 0;
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
gdb_regs[GDB_R14] = 0;
gdb_regs[GDB_R15] = 0;
#endif
gdb_regs[GDB_PC] = 0;
gdb_regs[GDB_SP] = p->thread.sp;
}

View File

@@ -184,7 +184,7 @@ out:
static struct kobj_attribute type_attr = __ATTR_RO(type);
static struct bin_attribute data_attr = {
static struct bin_attribute data_attr __ro_after_init = {
.attr = {
.name = "data",
.mode = S_IRUGO,

View File

@@ -29,7 +29,7 @@
#include <asm/x86_init.h>
#include <asm/reboot.h>
static int kvmclock = 1;
static int kvmclock __ro_after_init = 1;
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
static cycle_t kvm_sched_clock_offset;

View File

@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
#ifdef CONFIG_X86_64
.read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt);
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
#endif
struct pv_mmu_ops pv_mmu_ops = {
struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,

View File

@@ -32,6 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
#include <asm/switch_to.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -512,6 +513,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
/*
* Return saved PC of a blocked thread.
* What is this good for? it will be always the scheduler or ret_from_fork.
*/
unsigned long thread_saved_pc(struct task_struct *tsk)
{
struct inactive_task_frame *frame =
(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
return READ_ONCE_NOCHECK(frame->ret_addr);
}
/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
*/
unsigned long get_wchan(struct task_struct *p)
{
unsigned long start, bottom, top, sp, fp, ip;
unsigned long start, bottom, top, sp, fp, ip, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
if (!try_get_task_stack(p))
return 0;
start = (unsigned long)task_stack_page(p);
if (!start)
return 0;
goto out;
/*
* Layout of the stack page:
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p)
* PADDING
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
* stack
* ----------- bottom = start + sizeof(thread_info)
* thread_info
* ----------- start
* ----------- bottom = start
*
* The tasks stack pointer points at the location where the
* framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p)
*/
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
top -= 2 * sizeof(unsigned long);
bottom = start + sizeof(struct thread_info);
bottom = start;
sp = READ_ONCE(p->thread.sp);
if (sp < bottom || sp > top)
return 0;
goto out;
fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
do {
if (fp < bottom || fp > top)
return 0;
goto out;
ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
if (!in_sched_functions(ip))
return ip;
if (!in_sched_functions(ip)) {
ret = ip;
goto out;
}
fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
out:
put_task_stack(p);
return ret;
}

View File

@@ -55,17 +55,6 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
*/
unsigned long thread_saved_pc(struct task_struct *tsk)
{
return ((unsigned long *)tsk->thread.sp)[3];
}
void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = __read_cr4_safe();
cr4 = __read_cr4();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
struct pt_regs *childregs = task_pt_regs(p);
struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
struct inactive_task_frame *frame = &fork_frame->frame;
struct task_struct *tsk;
int err;
p->thread.sp = (unsigned long) childregs;
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
p->thread.sp0 = (unsigned long) (childregs+1);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.ip = (unsigned long) ret_from_kernel_thread;
task_user_gs(p) = __KERNEL_STACK_CANARY;
childregs->ds = __USER_DS;
childregs->es = __USER_DS;
childregs->fs = __KERNEL_PERCPU;
childregs->bx = sp; /* function */
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
frame->bx = sp; /* function */
frame->di = arg;
p->thread.io_bitmap_ptr = NULL;
return 0;
}
frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
if (sp)
childregs->sp = sp;
p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());
p->thread.io_bitmap_ptr = NULL;

View File

@@ -50,8 +50,6 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
asmlinkage extern void ret_from_fork(void);
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
{
int err;
struct pt_regs *childregs;
struct fork_frame *fork_frame;
struct inactive_task_frame *frame;
struct task_struct *me = current;
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
set_tsk_thread_flag(p, TIF_FORK);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->sp = (unsigned long)childregs;
childregs->ss = __KERNEL_DS;
childregs->bx = sp; /* function */
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
frame->bx = sp; /* function */
frame->r12 = arg;
return 0;
}
frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32)
current->personality &= ~READ_IMPLIES_EXEC;
/* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
current_thread_info()->status &= ~TS_COMPAT;
current->thread.status &= ~TS_COMPAT;
} else {
set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32);
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32)
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
current_thread_info()->status |= TS_COMPAT;
current->thread.status |= TS_COMPAT;
}
}
EXPORT_SYMBOL_GPL(set_personality_ia32);

View File

@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
return sp;
prev_esp = (u32 *)(context);
if (prev_esp)
return (unsigned long)prev_esp;
if (*prev_esp)
return (unsigned long)*prev_esp;
return (unsigned long)regs;
}
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
task_thread_info(child)->status |= TS_I386_REGS_POKED;
child->thread.status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
#ifdef CONFIG_X86_64
static struct user_regset x86_64_regsets[] __read_mostly = {
static struct user_regset x86_64_regsets[] __ro_after_init = {
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = {
#endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
static struct user_regset x86_32_regsets[] __read_mostly = {
static struct user_regset x86_32_regsets[] __ro_after_init = {
[REGSET_GENERAL] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = {
*/
u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
{
#ifdef CONFIG_X86_64
x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);

View File

@@ -705,7 +705,7 @@ static void native_machine_power_off(void)
tboot_shutdown(TB_SHUTDOWN_HALT);
}
struct machine_ops machine_ops = {
struct machine_ops machine_ops __ro_after_init = {
.power_off = native_machine_power_off,
.shutdown = native_machine_shutdown,
.emergency_restart = native_machine_emergency_restart,

View File

@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data);
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
__visible unsigned long mmu_cr4_features;
__visible unsigned long mmu_cr4_features __ro_after_init;
#else
__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
#endif
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
*/
mmu_cr4_features = __read_cr4_safe();
mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());

View File

@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
IRQ_STACK_SIZE;
#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =

View File

@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
* than the tracee.
*/
#ifdef CONFIG_IA32_EMULATION
if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI

View File

@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
}
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
initial_stack = idle->thread.sp;
/*
* Enable the espfix hack for this CPU

View File

@@ -8,80 +8,69 @@
#include <linux/export.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
static int save_stack_stack(void *data, char *name)
static int save_stack_address(struct stack_trace *trace, unsigned long addr,
bool nosched)
{
return 0;
}
static int
__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
#ifdef CONFIG_FRAME_POINTER
if (!reliable)
return 0;
#endif
if (nosched && in_sched_functions(addr))
return 0;
if (trace->skip > 0) {
trace->skip--;
return 0;
}
if (trace->nr_entries < trace->max_entries) {
trace->entries[trace->nr_entries++] = addr;
return 0;
} else {
return -1; /* no more room, stop walking the stack */
if (trace->nr_entries >= trace->max_entries)
return -1;
trace->entries[trace->nr_entries++] = addr;
return 0;
}
static void __save_stack_trace(struct stack_trace *trace,
struct task_struct *task, struct pt_regs *regs,
bool nosched)
{
struct unwind_state state;
unsigned long addr;
if (regs)
save_stack_address(trace, regs->ip, nosched);
for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || save_stack_address(trace, addr, nosched))
break;
}
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
static int save_stack_address(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, false);
}
static int
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, true);
}
static const struct stacktrace_ops save_stack_ops = {
.stack = save_stack_stack,
.address = save_stack_address,
.walk_stack = print_context_stack,
};
static const struct stacktrace_ops save_stack_ops_nosched = {
.stack = save_stack_stack,
.address = save_stack_address_nosched,
.walk_stack = print_context_stack,
};
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace)
{
dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
__save_stack_trace(trace, current, NULL, false);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
__save_stack_trace(trace, current, regs, false);
}
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
if (!try_get_task_stack(tsk))
return;
__save_stack_trace(trace, tsk, NULL, true);
put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);

View File

@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
unsigned long fault_address)
{
printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
(void *)fault_address, current->stack,
(char *)current->stack + THREAD_SIZE - 1);
die(message, regs, 0);
/* Be absolutely certain we don't return. */
panic(message);
}
#endif
#ifdef CONFIG_X86_64
/* Runs on IST stack */
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
#ifdef CONFIG_VMAP_STACK
unsigned long cr2;
#endif
#ifdef CONFIG_X86_ESPFIX64
extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_DF;
#ifdef CONFIG_VMAP_STACK
/*
* If we overflow the stack into a guard page, the CPU will fail
* to deliver #PF and will send #DF instead. Similarly, if we
* take any non-IST exception while too close to the bottom of
* the stack, the processor will get a page fault while
* delivering the exception and will generate a double fault.
*
* According to the SDM (footnote in 6.15 under "Interrupt 14 -
* Page-Fault Exception (#PF):
*
* Processors update CR2 whenever a page fault is detected. If a
* second page fault occurs while an earlier page fault is being
* deliv- ered, the faulting linear address of the second fault will
* overwrite the contents of CR2 (replacing the previous
* address). These updates to CR2 occur even if the page fault
* results in a double fault or occurs during the delivery of a
* double fault.
*
* The logic below has a small possibility of incorrectly diagnosing
* some errors as stack overflows. For example, if the IDT or GDT
* gets corrupted such that #GP delivery fails due to a bad descriptor
* causing #GP and we hit this condition while CR2 coincidentally
* points to the stack guard page, we'll think we overflowed the
* stack. Given that we're going to panic one way or another
* if this happens, this isn't necessarily worth fixing.
*
* If necessary, we could improve the test by only diagnosing
* a stack overflow if the saved RSP points within 47 bytes of
* the bottom of the stack: if RSP == tsk_stack + 48 and we
* take an exception, the stack is already aligned and there
* will be enough room SS, RSP, RFLAGS, CS, RIP, and a
* possible error code, so a stack overflow would *not* double
* fault. With any less space left, exception delivery could
* fail, and, as a practical matter, we've overflowed the
* stack even if the actual trigger for the double fault was
* something else.
*/
cr2 = read_cr2();
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
#endif
#ifdef CONFIG_DOUBLEFAULT
df_debug(regs, error_code);
#endif

View File

@@ -0,0 +1,93 @@
#include <linux/sched.h>
#include <asm/ptrace.h>
#include <asm/bitops.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#define FRAME_HEADER_SIZE (sizeof(long) * 2)
unsigned long unwind_get_return_address(struct unwind_state *state)
{
unsigned long addr;
unsigned long *addr_p = unwind_get_return_address_ptr(state);
if (unwind_done(state))
return 0;
addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
addr_p);
return __kernel_text_address(addr) ? addr : 0;
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
static bool update_stack_state(struct unwind_state *state, void *addr,
size_t len)
{
struct stack_info *info = &state->stack_info;
/*
* If addr isn't on the current stack, switch to the next one.
*
* We may have to traverse multiple stacks to deal with the possibility
* that 'info->next_sp' could point to an empty stack and 'addr' could
* be on a subsequent stack.
*/
while (!on_stack(info, addr, len))
if (get_stack_info(info->next_sp, state->task, info,
&state->stack_mask))
return false;
return true;
}
bool unwind_next_frame(struct unwind_state *state)
{
unsigned long *next_bp;
if (unwind_done(state))
return false;
next_bp = (unsigned long *)*state->bp;
/* make sure the next frame's data is accessible */
if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
return false;
/* move to the next frame */
state->bp = next_bp;
return true;
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
memset(state, 0, sizeof(*state));
state->task = task;
/* don't even attempt to start from user mode regs */
if (regs && user_mode(regs)) {
state->stack_info.type = STACK_TYPE_UNKNOWN;
return;
}
/* set up the starting stack frame */
state->bp = get_frame_pointer(task, regs);
/* initialize stack info and make sure the frame data is accessible */
get_stack_info(state->bp, state->task, &state->stack_info,
&state->stack_mask);
update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
/*
* The caller can provide the address of the first frame directly
* (first_frame) or indirectly (regs->sp) to indicate which stack frame
* to start unwinding at. Skip ahead until we reach it.
*/
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
state->bp < first_frame))
unwind_next_frame(state);
}
EXPORT_SYMBOL_GPL(__unwind_start);

View File

@@ -0,0 +1,43 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
#include <asm/ptrace.h>
#include <asm/bitops.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
if (unwind_done(state))
return false;
do {
for (state->sp++; state->sp < info->end; state->sp++)
if (__kernel_text_address(*state->sp))
return true;
state->sp = info->next_sp;
} while (!get_stack_info(state->sp, state->task, info,
&state->stack_mask));
return false;
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
memset(state, 0, sizeof(*state));
state->task = task;
state->sp = first_frame;
get_stack_info(first_frame, state->task, &state->stack_info,
&state->stack_mask);
if (!__kernel_text_address(*first_frame))
unwind_next_frame(state);
}
EXPORT_SYMBOL_GPL(__unwind_start);

View File

@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
static int default_i8042_detect(void) { return 1; };
struct x86_platform_ops x86_platform = {
struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu,
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = {
EXPORT_SYMBOL_GPL(x86_platform);
#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi = {
struct x86_msi_ops x86_msi __ro_after_init = {
.setup_msi_irqs = native_setup_msi_irqs,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
}
#endif
struct x86_io_apic_ops x86_io_apic_ops = {
struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
.read = native_io_apic_read,
.disable = native_disable_io_apic,
};