Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "The biggest changes in this cycle were: - Revamp, simplify (and in some cases fix) Time Stamp Counter (TSC) primitives. (Andy Lutomirski) - Add new, comprehensible entry and exit handlers written in C. (Andy Lutomirski) - vm86 mode cleanups and fixes. (Brian Gerst) - 32-bit compat code cleanups. (Brian Gerst) The amount of simplification in low level assembly code is already palpable: arch/x86/entry/entry_32.S | 130 +---- arch/x86/entry/entry_64.S | 197 ++----- but more simplifications are planned. There's also the usual laudry mix of low level changes - see the changelog for details" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (83 commits) x86/asm: Drop repeated macro of X86_EFLAGS_AC definition x86/asm/msr: Make wrmsrl() a function x86/asm/delay: Introduce an MWAITX-based delay with a configurable timer x86/asm: Add MONITORX/MWAITX instruction support x86/traps: Weaken context tracking entry assertions x86/asm/tsc: Add rdtscll() merge helper selftests/x86: Add syscall_nt selftest selftests/x86: Disable sigreturn_64 x86/vdso: Emit a GNU hash x86/entry: Remove do_notify_resume(), syscall_trace_leave(), and their TIF masks x86/entry/32: Migrate to C exit path x86/entry/32: Remove 32-bit syscall audit optimizations x86/vm86: Rename vm86->v86flags and v86mask x86/vm86: Rename vm86->vm86_info to user_vm86 x86/vm86: Clean up vm86.h includes x86/vm86: Move the vm86 IRQ definitions to vm86.h x86/vm86: Use the normal pt_regs area for vm86 x86/vm86: Eliminate 'struct kernel_vm86_struct' x86/vm86: Move fields from 'struct kernel_vm86_struct' to 'struct vm86' x86/vm86: Move vm86 fields out of 'thread_struct' ...
This commit is contained in:
@@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
CFLAGS_irq.o := -I$(src)/../include/asm/trace
|
||||
|
||||
obj-y := process_$(BITS).o signal.o
|
||||
obj-$(CONFIG_COMPAT) += signal_compat.o
|
||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
|
||||
obj-y += time.o ioport.o dumpstack.o nmi.o
|
||||
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
|
||||
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
obj-y += probe_roms.o
|
||||
|
@@ -263,7 +263,7 @@ static int apbt_clocksource_register(void)
|
||||
|
||||
/* Verify whether apbt counter works */
|
||||
t1 = dw_apb_clocksource_read(clocksource_apbt);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
@@ -273,7 +273,7 @@ static int apbt_clocksource_register(void)
|
||||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
/* APBT is the only always on clocksource, it has to work! */
|
||||
@@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void)
|
||||
old = dw_apb_clocksource_read(clocksource_apbt);
|
||||
old += loop;
|
||||
|
||||
t1 = __native_read_tsc();
|
||||
t1 = rdtsc();
|
||||
|
||||
do {
|
||||
new = dw_apb_clocksource_read(clocksource_apbt);
|
||||
} while (new < old);
|
||||
|
||||
t2 = __native_read_tsc();
|
||||
t2 = rdtsc();
|
||||
|
||||
shift = 5;
|
||||
if (unlikely(loop >> shift == 0)) {
|
||||
|
@@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta,
|
||||
{
|
||||
u64 tsc;
|
||||
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
}
|
||||
@@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
|
||||
unsigned long pm = acpi_pm_read_early();
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
switch (lapic_cal_loops++) {
|
||||
case 0:
|
||||
@@ -1209,7 +1209,7 @@ void setup_local_APIC(void)
|
||||
long long max_loops = cpu_khz ? cpu_khz : 1000000;
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
if (disable_apic) {
|
||||
disable_ioapic_support();
|
||||
@@ -1293,7 +1293,7 @@ void setup_local_APIC(void)
|
||||
}
|
||||
if (queued) {
|
||||
if (cpu_has_tsc && cpu_khz) {
|
||||
rdtscll(ntsc);
|
||||
ntsc = rdtsc();
|
||||
max_loops = (cpu_khz << 10) - (ntsc - tsc);
|
||||
} else
|
||||
max_loops--;
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/delay.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/mmconfig.h>
|
||||
@@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
const int K6_BUG_LOOP = 1000000;
|
||||
int n;
|
||||
void (*f_vide)(void);
|
||||
unsigned long d, d2;
|
||||
u64 d, d2;
|
||||
|
||||
printk(KERN_INFO "AMD K6 stepping B detected - ");
|
||||
|
||||
@@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
|
||||
n = K6_BUG_LOOP;
|
||||
f_vide = vide;
|
||||
rdtscl(d);
|
||||
d = rdtsc();
|
||||
while (n--)
|
||||
f_vide();
|
||||
rdtscl(d2);
|
||||
d2 = rdtsc();
|
||||
d = d2-d;
|
||||
|
||||
if (d > 20*K6_BUG_LOOP)
|
||||
@@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
/* A random value per boot for bit slice [12:upper_bit) */
|
||||
va_align.bits = get_random_int() & va_align.mask;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_MWAITX))
|
||||
use_mwaitx_delay();
|
||||
}
|
||||
|
||||
static void early_init_amd(struct cpuinfo_x86 *c)
|
||||
|
@@ -1185,10 +1185,10 @@ void syscall_init(void)
|
||||
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
||||
*/
|
||||
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
||||
wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
/*
|
||||
* This only works on Intel CPUs.
|
||||
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
|
||||
@@ -1199,7 +1199,7 @@ void syscall_init(void)
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
|
||||
|
@@ -127,7 +127,7 @@ void mce_setup(struct mce *m)
|
||||
{
|
||||
memset(m, 0, sizeof(struct mce));
|
||||
m->cpu = m->extcpu = smp_processor_id();
|
||||
rdtscll(m->tsc);
|
||||
m->tsc = rdtsc();
|
||||
/* We hope get_seconds stays lockless */
|
||||
m->time = get_seconds();
|
||||
m->cpuvendor = boot_cpu_data.x86_vendor;
|
||||
@@ -974,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
struct mce m, *final;
|
||||
enum ctx_state prev_state;
|
||||
int i;
|
||||
int worst = 0;
|
||||
int severity;
|
||||
@@ -1000,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int lmce = 0;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
@@ -1166,7 +1165,7 @@ out:
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
done:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
|
||||
@@ -1754,7 +1753,7 @@ static void collect_tscs(void *data)
|
||||
{
|
||||
unsigned long *cpu_tsc = (unsigned long *)data;
|
||||
|
||||
rdtscll(cpu_tsc[smp_processor_id()]);
|
||||
cpu_tsc[smp_processor_id()] = rdtsc();
|
||||
}
|
||||
|
||||
static int mce_apei_read_done;
|
||||
|
@@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly;
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
@@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
|
@@ -15,12 +15,12 @@
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
|
@@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
int idx = segment >> 3;
|
||||
|
||||
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
if (idx > LDT_ENTRIES)
|
||||
@@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
return 0;
|
||||
|
||||
desc = &ldt->entries[idx];
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
} else {
|
||||
if (idx > GDT_ENTRIES)
|
||||
return 0;
|
||||
@@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
return get_desc_base(desc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
#include <asm/compat.h>
|
||||
|
||||
|
@@ -110,7 +110,7 @@ static void init_espfix_random(void)
|
||||
*/
|
||||
if (!arch_get_random_long(&rand)) {
|
||||
/* The constant is an arbitrary large prime */
|
||||
rdtscll(rand);
|
||||
rand = rdtsc();
|
||||
rand *= 0xc345c6b72fd16123UL;
|
||||
}
|
||||
|
||||
|
@@ -735,7 +735,7 @@ static int hpet_clocksource_register(void)
|
||||
|
||||
/* Verify whether hpet counter works */
|
||||
t1 = hpet_readl(HPET_COUNTER);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
@@ -745,7 +745,7 @@ static int hpet_clocksource_register(void)
|
||||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
if (t1 == hpet_readl(HPET_COUNTER)) {
|
||||
|
@@ -216,8 +216,23 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
unsigned vector = ~regs->orig_ax;
|
||||
unsigned irq;
|
||||
|
||||
/*
|
||||
* NB: Unlike exception entries, IRQ entries do not reliably
|
||||
* handle context tracking in the low-level entry code. This is
|
||||
* because syscall entries execute briefly with IRQs on before
|
||||
* updating context tracking state, so we can take an IRQ from
|
||||
* kernel mode with CONTEXT_USER. The low-level entry code only
|
||||
* updates the context if we came from user mode, so we won't
|
||||
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
||||
* code is cleaned up enough that we can cleanly defer enabling
|
||||
* IRQs.
|
||||
*/
|
||||
|
||||
entering_irq();
|
||||
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
|
||||
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
|
||||
if (!handle_irq(irq, regs)) {
|
||||
|
@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
|
||||
a->handler, whole_msecs, decimal_msecs);
|
||||
}
|
||||
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs)
|
||||
{
|
||||
struct nmi_desc *desc = nmi_to_desc(type);
|
||||
struct nmiaction *a;
|
||||
@@ -213,7 +213,7 @@ static void
|
||||
pci_serr_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_SERR, regs, false))
|
||||
if (nmi_handle(NMI_SERR, regs))
|
||||
return;
|
||||
|
||||
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
|
||||
@@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
|
||||
unsigned long i;
|
||||
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_IO_CHECK, regs, false))
|
||||
if (nmi_handle(NMI_IO_CHECK, regs))
|
||||
return;
|
||||
|
||||
pr_emerg(
|
||||
@@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
||||
* as only the first one is ever run (unless it can actually determine
|
||||
* if it caused the NMI)
|
||||
*/
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs, false);
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs);
|
||||
if (handled) {
|
||||
__this_cpu_add(nmi_stats.unknown, handled);
|
||||
return;
|
||||
@@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
|
||||
__this_cpu_write(last_nmi_rip, regs->ip);
|
||||
|
||||
handled = nmi_handle(NMI_LOCAL, regs, b2b);
|
||||
handled = nmi_handle(NMI_LOCAL, regs);
|
||||
__this_cpu_add(nmi_stats.normal, handled);
|
||||
if (handled) {
|
||||
/*
|
||||
|
@@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.wbinvd = native_wbinvd,
|
||||
.read_msr = native_read_msr_safe,
|
||||
.write_msr = native_write_msr_safe,
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
.read_tscp = native_read_tscp,
|
||||
.load_tr_desc = native_load_tr_desc,
|
||||
.set_ldt = native_set_ldt,
|
||||
.load_gdt = native_load_gdt,
|
||||
|
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
|
||||
@@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_cpu_ops, clts);
|
||||
PATCH_SITE(pv_cpu_ops, read_tsc);
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock()) {
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
@@ -111,6 +112,8 @@ void exit_thread(void)
|
||||
kfree(bp);
|
||||
}
|
||||
|
||||
free_vm86(t);
|
||||
|
||||
fpu__drop(fpu);
|
||||
}
|
||||
|
||||
|
@@ -53,6 +53,7 @@
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
||||
|
@@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
void release_thread(struct task_struct *dead_task)
|
||||
{
|
||||
if (dead_task->mm) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
if (dead_task->mm->context.ldt) {
|
||||
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
|
||||
dead_task->comm,
|
||||
@@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task)
|
||||
dead_task->mm->context.ldt->size);
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
||||
__USER_CS, __USER_DS, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
#ifdef CONFIG_COMPAT
|
||||
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
{
|
||||
start_thread_common(regs, new_ip, new_sp,
|
||||
test_thread_flag(TIF_X32)
|
||||
|
@@ -37,12 +37,10 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
enum x86_regset {
|
||||
REGSET_GENERAL,
|
||||
REGSET_FP,
|
||||
@@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static long x32_arch_ptrace(struct task_struct *child,
|
||||
compat_long_t request, compat_ulong_t caddr,
|
||||
@@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
if (!is_ia32_task())
|
||||
return x32_arch_ptrace(child, request, caddr, cdata);
|
||||
#endif
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
return ia32_arch_ptrace(child, request, caddr, cdata);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
@@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
|
||||
/* Send us the fake SIGTRAP */
|
||||
force_sig_info(SIGTRAP, &info, tsk);
|
||||
}
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
audit_syscall_entry(regs->orig_ax, regs->di,
|
||||
regs->si, regs->dx, regs->r10);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
audit_syscall_entry(regs->orig_ax, regs->bx,
|
||||
regs->cx, regs->dx, regs->si);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We can return 0 to resume the syscall or anything else to go to phase
|
||||
* 2. If we resume the syscall, we need to put something appropriate in
|
||||
* regs->orig_ax.
|
||||
*
|
||||
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
|
||||
* are fully functional.
|
||||
*
|
||||
* For phase 2's benefit, our return value is:
|
||||
* 0: resume the syscall
|
||||
* 1: go to phase 2; no seccomp phase 2 needed
|
||||
* anything else: go to phase 2; pass return value to seccomp
|
||||
*/
|
||||
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
u32 work;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
user_exit();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
* code, and keeping seccomp fast is probably more valuable
|
||||
* than the rest of this.
|
||||
*/
|
||||
if (work & _TIF_SECCOMP) {
|
||||
struct seccomp_data sd;
|
||||
|
||||
sd.arch = arch;
|
||||
sd.nr = regs->orig_ax;
|
||||
sd.instruction_pointer = regs->ip;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
sd.args[0] = regs->di;
|
||||
sd.args[1] = regs->si;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->r10;
|
||||
sd.args[4] = regs->r8;
|
||||
sd.args[5] = regs->r9;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
sd.args[0] = regs->bx;
|
||||
sd.args[1] = regs->cx;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->si;
|
||||
sd.args[4] = regs->di;
|
||||
sd.args[5] = regs->bp;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
|
||||
|
||||
ret = seccomp_phase1(&sd);
|
||||
if (ret == SECCOMP_PHASE1_SKIP) {
|
||||
regs->orig_ax = -1;
|
||||
ret = 0;
|
||||
} else if (ret != SECCOMP_PHASE1_OK) {
|
||||
return ret; /* Go directly to phase 2 */
|
||||
}
|
||||
|
||||
work &= ~_TIF_SECCOMP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do our best to finish without phase 2. */
|
||||
if (work == 0)
|
||||
return ret; /* seccomp and/or nohz only (ret == 0 here) */
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (work == _TIF_SYSCALL_AUDIT) {
|
||||
/*
|
||||
* If there is no more work to be done except auditing,
|
||||
* then audit in phase 1. Phase 2 always audits, so, if
|
||||
* we audit here, then we can't go on to phase 2.
|
||||
*/
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1; /* Something is enabled that we can't handle in phase 1 */
|
||||
}
|
||||
|
||||
/* Returns the syscall nr to run (which should match regs->orig_ax). */
|
||||
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
||||
unsigned long phase1_result)
|
||||
{
|
||||
long ret = 0;
|
||||
u32 work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
* they can see any changes made by a seccomp tracer.
|
||||
*/
|
||||
if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
|
||||
/* seccomp failures shouldn't expose any additional code. */
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(work & _TIF_SYSCALL_EMU))
|
||||
ret = -1L;
|
||||
|
||||
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
|
||||
tracehook_report_syscall_entry(regs))
|
||||
ret = -1L;
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_enter(regs, regs->orig_ax);
|
||||
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
|
||||
|
||||
if (phase1_result == 0)
|
||||
return regs->orig_ax;
|
||||
else
|
||||
return syscall_trace_enter_phase2(regs, arch, phase1_result);
|
||||
}
|
||||
|
||||
void syscall_trace_leave(struct pt_regs *regs)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* We may come here right after calling schedule_user()
|
||||
* or do_notify_resume(), in which case we can be in RCU
|
||||
* user mode.
|
||||
*/
|
||||
user_exit();
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_exit(regs, regs->ax);
|
||||
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter().
|
||||
*/
|
||||
step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
|
||||
!test_thread_flag(TIF_SYSCALL_EMU);
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
@@ -31,11 +31,11 @@
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/sighandling.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#include <asm/sys_ia32.h>
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#include <asm/syscall.h>
|
||||
@@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
bool stepping, failed;
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
|
||||
if (v8086_mode(regs))
|
||||
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
|
||||
|
||||
/* Are we from a system call? */
|
||||
if (syscall_get_nr(current, regs) >= 0) {
|
||||
/* If so, check system call restarting.. */
|
||||
@@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
* want to handle. Thus you cannot kill init even with a SIGKILL even by
|
||||
* mistake.
|
||||
*/
|
||||
static void do_signal(struct pt_regs *regs)
|
||||
void do_signal(struct pt_regs *regs)
|
||||
{
|
||||
struct ksignal ksig;
|
||||
|
||||
@@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs)
|
||||
restore_saved_sigmask();
|
||||
}
|
||||
|
||||
/*
|
||||
* notification of userspace execution resumption
|
||||
* - triggered by the TIF_WORK_MASK flags
|
||||
*/
|
||||
__visible void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
user_exit();
|
||||
|
||||
if (thread_info_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
||||
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
|
95
arch/x86/kernel/signal_compat.c
Normal file
95
arch/x86/kernel/signal_compat.c
Normal file
@@ -0,0 +1,95 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
|
||||
{
|
||||
int err = 0;
|
||||
bool ia32 = test_thread_flag(TIF_IA32);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
put_user_try {
|
||||
/* If you change siginfo_t structure, please make sure that
|
||||
this code is fixed accordingly.
|
||||
It should never copy any pad contained in the structure
|
||||
to avoid security leaks, but must copy the generic
|
||||
3 ints plus the relevant union member. */
|
||||
put_user_ex(from->si_signo, &to->si_signo);
|
||||
put_user_ex(from->si_errno, &to->si_errno);
|
||||
put_user_ex((short)from->si_code, &to->si_code);
|
||||
|
||||
if (from->si_code < 0) {
|
||||
put_user_ex(from->si_pid, &to->si_pid);
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
|
||||
} else {
|
||||
/*
|
||||
* First 32bits of unions are always present:
|
||||
* si_pid === si_band === si_tid === si_addr(LS half)
|
||||
*/
|
||||
put_user_ex(from->_sifields._pad[0],
|
||||
&to->_sifields._pad[0]);
|
||||
switch (from->si_code >> 16) {
|
||||
case __SI_FAULT >> 16:
|
||||
break;
|
||||
case __SI_SYS >> 16:
|
||||
put_user_ex(from->si_syscall, &to->si_syscall);
|
||||
put_user_ex(from->si_arch, &to->si_arch);
|
||||
break;
|
||||
case __SI_CHLD >> 16:
|
||||
if (ia32) {
|
||||
put_user_ex(from->si_utime, &to->si_utime);
|
||||
put_user_ex(from->si_stime, &to->si_stime);
|
||||
} else {
|
||||
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
|
||||
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
|
||||
}
|
||||
put_user_ex(from->si_status, &to->si_status);
|
||||
/* FALL THROUGH */
|
||||
default:
|
||||
case __SI_KILL >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
break;
|
||||
case __SI_POLL >> 16:
|
||||
put_user_ex(from->si_fd, &to->si_fd);
|
||||
break;
|
||||
case __SI_TIMER >> 16:
|
||||
put_user_ex(from->si_overrun, &to->si_overrun);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr),
|
||||
&to->si_ptr);
|
||||
break;
|
||||
/* This is not generated by the kernel as of now. */
|
||||
case __SI_RT >> 16:
|
||||
case __SI_MESGQ >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(from->si_int, &to->si_int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} put_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
|
||||
{
|
||||
int err = 0;
|
||||
u32 ptr32;
|
||||
|
||||
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
get_user_try {
|
||||
get_user_ex(to->si_signo, &from->si_signo);
|
||||
get_user_ex(to->si_errno, &from->si_errno);
|
||||
get_user_ex(to->si_code, &from->si_code);
|
||||
|
||||
get_user_ex(to->si_pid, &from->si_pid);
|
||||
get_user_ex(to->si_uid, &from->si_uid);
|
||||
get_user_ex(ptr32, &from->si_ptr);
|
||||
to->si_ptr = compat_ptr(ptr32);
|
||||
} get_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
@@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
||||
return addr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* We'll assume that the code segments in the GDT
|
||||
* are all zero-based. That is largely true: the
|
||||
@@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
||||
}
|
||||
mutex_unlock(&child->mm->context.lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
@@ -12,10 +12,5 @@
|
||||
*/
|
||||
u64 notrace trace_clock_x86_tsc(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
rdtsc_barrier();
|
||||
rdtscll(ret);
|
||||
|
||||
return ret;
|
||||
return rdtsc_ordered();
|
||||
}
|
||||
|
@@ -62,6 +62,7 @@
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/trace/mpx.h>
|
||||
#include <asm/mpx.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/x86_init.h>
|
||||
@@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
preempt_count_dec();
|
||||
}
|
||||
|
||||
enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
/* Other than that, we're just an exception. */
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
} else {
|
||||
/*
|
||||
* We might have interrupted pretty much anything. In
|
||||
@@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
* but we need to notify RCU.
|
||||
*/
|
||||
rcu_nmi_enter();
|
||||
prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
|
||||
}
|
||||
|
||||
/*
|
||||
* We are atomic because we're on the IST stack (or we're on x86_32,
|
||||
* in which case we still shouldn't schedule).
|
||||
*
|
||||
* This must be after exception_enter(), because exception_enter()
|
||||
* won't do anything if in_interrupt() returns true.
|
||||
* We are atomic because we're on the IST stack; or we're on
|
||||
* x86_32, in which case we still shouldn't schedule; or we're
|
||||
* on x86_64 and entered from user mode, in which case we're
|
||||
* still atomic unless ist_begin_non_atomic is called.
|
||||
*/
|
||||
preempt_count_add(HARDIRQ_OFFSET);
|
||||
|
||||
/* This code is a bit fragile. Test it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
|
||||
|
||||
return prev_state;
|
||||
}
|
||||
|
||||
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
void ist_exit(struct pt_regs *regs)
|
||||
{
|
||||
/* Must be before exception_exit. */
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
|
||||
if (user_mode(regs))
|
||||
return exception_exit(prev_state);
|
||||
else
|
||||
if (!user_mode(regs))
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
@@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
* the non-atomic section, and callers must call is_end_non_atomic()
|
||||
* the non-atomic section, and callers must call ist_end_non_atomic()
|
||||
* before ist_exit().
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
@@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap);
|
||||
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
|
||||
unsigned long trapnr, int signr)
|
||||
{
|
||||
enum ctx_state prev_state = exception_enter();
|
||||
siginfo_t info;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
|
||||
NOTIFY_STOP) {
|
||||
conditional_sti(regs);
|
||||
do_trap(trapnr, signr, str, regs, error_code,
|
||||
fill_trap_info(regs, signr, trapnr, &info));
|
||||
}
|
||||
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
#define DO_ERROR(trapnr, signr, str, name) \
|
||||
@@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
#endif
|
||||
|
||||
ist_enter(regs); /* Discard prev_state because we won't return. */
|
||||
ist_enter(regs);
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
const struct bndcsr *bndcsr;
|
||||
siginfo_t *info;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
|
||||
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
|
||||
if (!user_mode(regs))
|
||||
@@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
die("bounds", regs, error_code);
|
||||
}
|
||||
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
|
||||
exit_trap:
|
||||
/*
|
||||
* This path out is for all the cases where we could not
|
||||
@@ -447,35 +435,33 @@ exit_trap:
|
||||
* time..
|
||||
*/
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_general_protection(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
conditional_sti(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
goto exit;
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die("general protection fault", regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
/* May run on IST stack. */
|
||||
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* ftrace must be first, everything else may cause a recursive crash.
|
||||
@@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
if (poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
@@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
@@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
|
||||
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
enum ctx_state prev_state;
|
||||
int user_icebp = 0;
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
@@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
||||
@@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
@@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
dotraplinkage void
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
BUG_ON(use_eager_fpu());
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
@@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
#endif
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_device_not_available);
|
||||
|
||||
@@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available);
|
||||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
siginfo_t info;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
local_irq_enable();
|
||||
|
||||
info.si_signo = SIGILL;
|
||||
@@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
}
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
|
||||
|
||||
data = cyc2ns_write_begin(cpu);
|
||||
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
ns_now = cycles_2_ns(tsc_now);
|
||||
|
||||
/*
|
||||
@@ -290,7 +290,7 @@ u64 native_sched_clock(void)
|
||||
}
|
||||
|
||||
/* read the Time Stamp Counter: */
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
|
||||
/* return the value in ns */
|
||||
return cycles_2_ns(tsc_now);
|
||||
@@ -316,12 +316,6 @@ unsigned long long
|
||||
sched_clock(void) __attribute__((alias("native_sched_clock")));
|
||||
#endif
|
||||
|
||||
unsigned long long native_read_tsc(void)
|
||||
{
|
||||
return __native_read_tsc();
|
||||
}
|
||||
EXPORT_SYMBOL(native_read_tsc);
|
||||
|
||||
int check_tsc_unstable(void)
|
||||
{
|
||||
return tsc_unstable;
|
||||
@@ -984,7 +978,7 @@ static struct clocksource clocksource_tsc;
|
||||
*/
|
||||
static cycle_t read_tsc(struct clocksource *cs)
|
||||
{
|
||||
return (cycle_t)get_cycles();
|
||||
return (cycle_t)rdtsc_ordered();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -39,16 +39,15 @@ static cycles_t max_warp;
|
||||
static int nr_warps;
|
||||
|
||||
/*
|
||||
* TSC-warp measurement loop running on both CPUs:
|
||||
* TSC-warp measurement loop running on both CPUs. This is not called
|
||||
* if there is no TSC.
|
||||
*/
|
||||
static void check_tsc_warp(unsigned int timeout)
|
||||
{
|
||||
cycles_t start, now, prev, end;
|
||||
int i;
|
||||
|
||||
rdtsc_barrier();
|
||||
start = get_cycles();
|
||||
rdtsc_barrier();
|
||||
start = rdtsc_ordered();
|
||||
/*
|
||||
* The measurement runs for 'timeout' msecs:
|
||||
*/
|
||||
@@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout)
|
||||
*/
|
||||
arch_spin_lock(&sync_lock);
|
||||
prev = last_tsc;
|
||||
rdtsc_barrier();
|
||||
now = get_cycles();
|
||||
rdtsc_barrier();
|
||||
now = rdtsc_ordered();
|
||||
last_tsc = now;
|
||||
arch_spin_unlock(&sync_lock);
|
||||
|
||||
@@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu)
|
||||
|
||||
/*
|
||||
* No need to check if we already know that the TSC is not
|
||||
* synchronized:
|
||||
* synchronized or if we have no TSC.
|
||||
*/
|
||||
if (unsynchronized_tsc())
|
||||
return;
|
||||
@@ -190,6 +187,7 @@ void check_tsc_sync_target(void)
|
||||
{
|
||||
int cpus = 2;
|
||||
|
||||
/* Also aborts if there is no TSC. */
|
||||
if (unsynchronized_tsc() || tsc_clocksource_reliable)
|
||||
return;
|
||||
|
||||
|
@@ -44,11 +44,14 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
@@ -66,10 +69,6 @@
|
||||
*/
|
||||
|
||||
|
||||
#define KVM86 ((struct kernel_vm86_struct *)regs)
|
||||
#define VMPI KVM86->vm86plus
|
||||
|
||||
|
||||
/*
|
||||
* 8- and 16-bit register defines..
|
||||
*/
|
||||
@@ -81,8 +80,8 @@
|
||||
/*
|
||||
* virtual flags (16 and 32-bit versions)
|
||||
*/
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
|
||||
#define VEFLAGS (current->thread.v86flags)
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags))
|
||||
#define VEFLAGS (current->thread.vm86->veflags)
|
||||
|
||||
#define set_flags(X, new, mask) \
|
||||
((X) = ((X) & ~(mask)) | ((new) & (mask)))
|
||||
@@ -90,46 +89,13 @@
|
||||
#define SAFE_MASK (0xDD5)
|
||||
#define RETURN_MASK (0xDFF)
|
||||
|
||||
/* convert kernel_vm86_regs to vm86_regs */
|
||||
static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
|
||||
const struct kernel_vm86_regs *regs)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* kernel_vm86_regs is missing gs, so copy everything up to
|
||||
* (but not including) orig_eax, and then rest including orig_eax.
|
||||
*/
|
||||
ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* convert vm86_regs to kernel_vm86_regs */
|
||||
static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
|
||||
const struct vm86_regs __user *user,
|
||||
unsigned extra)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* copy ax-fs inclusive */
|
||||
ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
/* copy orig_ax-__gsh+extra */
|
||||
ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax) +
|
||||
extra);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
struct pt_regs *ret;
|
||||
unsigned long tmp;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
long err = 0;
|
||||
|
||||
/*
|
||||
* This gets called from entry.S with interrupts disabled, but
|
||||
@@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
if (!current->thread.vm86_info) {
|
||||
pr_alert("no vm86_info: BAD\n");
|
||||
if (!vm86 || !vm86->user_vm86) {
|
||||
pr_alert("no user_vm86: BAD\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
|
||||
tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs);
|
||||
tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap);
|
||||
if (tmp) {
|
||||
pr_alert("could not access userspace vm86_info\n");
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
|
||||
user = vm86->user_vm86;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
|
||||
sizeof(struct vm86plus_struct) :
|
||||
sizeof(struct vm86_struct))) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
put_user_ex(regs->pt.bx, &user->regs.ebx);
|
||||
put_user_ex(regs->pt.cx, &user->regs.ecx);
|
||||
put_user_ex(regs->pt.dx, &user->regs.edx);
|
||||
put_user_ex(regs->pt.si, &user->regs.esi);
|
||||
put_user_ex(regs->pt.di, &user->regs.edi);
|
||||
put_user_ex(regs->pt.bp, &user->regs.ebp);
|
||||
put_user_ex(regs->pt.ax, &user->regs.eax);
|
||||
put_user_ex(regs->pt.ip, &user->regs.eip);
|
||||
put_user_ex(regs->pt.cs, &user->regs.cs);
|
||||
put_user_ex(regs->pt.flags, &user->regs.eflags);
|
||||
put_user_ex(regs->pt.sp, &user->regs.esp);
|
||||
put_user_ex(regs->pt.ss, &user->regs.ss);
|
||||
put_user_ex(regs->es, &user->regs.es);
|
||||
put_user_ex(regs->ds, &user->regs.ds);
|
||||
put_user_ex(regs->fs, &user->regs.fs);
|
||||
put_user_ex(regs->gs, &user->regs.gs);
|
||||
|
||||
put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
|
||||
} put_user_catch(err);
|
||||
if (err) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
current->thread.sp0 = current->thread.saved_sp0;
|
||||
current->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, ¤t->thread);
|
||||
current->thread.saved_sp0 = 0;
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
|
||||
ret = KVM86->regs32;
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
ret->fs = current->thread.saved_fs;
|
||||
set_user_gs(ret, current->thread.saved_gs);
|
||||
lazy_load_gs(vm86->regs32.gs);
|
||||
|
||||
return ret;
|
||||
regs->pt.ax = retval;
|
||||
}
|
||||
|
||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
@@ -200,45 +192,16 @@ out:
|
||||
|
||||
|
||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86)
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk = current;
|
||||
int tmp;
|
||||
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, vm86plus) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
|
||||
info.regs32 = current_pt_regs();
|
||||
tsk->thread.vm86_info = v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
|
||||
}
|
||||
|
||||
|
||||
SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk;
|
||||
int tmp;
|
||||
struct vm86plus_struct __user *v86;
|
||||
|
||||
tsk = current;
|
||||
switch (cmd) {
|
||||
case VM86_REQUEST_IRQ:
|
||||
case VM86_FREE_IRQ:
|
||||
@@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
}
|
||||
|
||||
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
v86 = (struct vm86plus_struct __user *)arg;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, regs32) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
info.regs32 = current_pt_regs();
|
||||
info.vm86plus.is_vm86pus = 1;
|
||||
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
|
||||
}
|
||||
|
||||
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
/*
|
||||
* make sure the vm86() system call doesn't try to do anything silly
|
||||
*/
|
||||
info->regs.pt.ds = 0;
|
||||
info->regs.pt.es = 0;
|
||||
info->regs.pt.fs = 0;
|
||||
#ifndef CONFIG_X86_32_LAZY_GS
|
||||
info->regs.pt.gs = 0;
|
||||
#endif
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
unsigned long err = 0;
|
||||
|
||||
if (!vm86) {
|
||||
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
|
||||
return -ENOMEM;
|
||||
tsk->thread.vm86 = vm86;
|
||||
}
|
||||
if (vm86->saved_sp0)
|
||||
return -EPERM;
|
||||
|
||||
if (!access_ok(VERIFY_READ, user_vm86, plus ?
|
||||
sizeof(struct vm86_struct) :
|
||||
sizeof(struct vm86plus_struct)))
|
||||
return -EFAULT;
|
||||
|
||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||
get_user_try {
|
||||
unsigned short seg;
|
||||
get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
|
||||
get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
|
||||
get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
|
||||
get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
|
||||
get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
|
||||
get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
|
||||
get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
|
||||
get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
|
||||
get_user_ex(seg, &user_vm86->regs.cs);
|
||||
vm86regs.pt.cs = seg;
|
||||
get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
|
||||
get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
|
||||
get_user_ex(seg, &user_vm86->regs.ss);
|
||||
vm86regs.pt.ss = seg;
|
||||
get_user_ex(vm86regs.es, &user_vm86->regs.es);
|
||||
get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
|
||||
get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
|
||||
get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
|
||||
|
||||
get_user_ex(vm86->flags, &user_vm86->flags);
|
||||
get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
|
||||
get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
|
||||
} get_user_catch(err);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (copy_from_user(&vm86->int_revectored,
|
||||
&user_vm86->int_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (copy_from_user(&vm86->int21_revectored,
|
||||
&user_vm86->int21_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (plus) {
|
||||
if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
|
||||
sizeof(struct vm86plus_info_struct)))
|
||||
return -EFAULT;
|
||||
vm86->vm86plus.is_vm86pus = 1;
|
||||
} else
|
||||
memset(&vm86->vm86plus, 0,
|
||||
sizeof(struct vm86plus_info_struct));
|
||||
|
||||
memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
|
||||
vm86->user_vm86 = user_vm86;
|
||||
|
||||
/*
|
||||
* The flags register is also special: we cannot trust that the user
|
||||
* has set it up safely, so this makes sure interrupt etc flags are
|
||||
* inherited from protected mode.
|
||||
*/
|
||||
VEFLAGS = info->regs.pt.flags;
|
||||
info->regs.pt.flags &= SAFE_MASK;
|
||||
info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
|
||||
info->regs.pt.flags |= X86_VM_MASK;
|
||||
VEFLAGS = vm86regs.pt.flags;
|
||||
vm86regs.pt.flags &= SAFE_MASK;
|
||||
vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
|
||||
vm86regs.pt.flags |= X86_VM_MASK;
|
||||
|
||||
switch (info->cpu_type) {
|
||||
vm86regs.pt.orig_ax = regs->orig_ax;
|
||||
|
||||
switch (vm86->cpu_type) {
|
||||
case CPU_286:
|
||||
tsk->thread.v86mask = 0;
|
||||
vm86->veflags_mask = 0;
|
||||
break;
|
||||
case CPU_386:
|
||||
tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
case CPU_486:
|
||||
tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
default:
|
||||
tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
|
||||
* Save old state
|
||||
*/
|
||||
info->regs32->ax = VM86_SIGNAL;
|
||||
tsk->thread.saved_sp0 = tsk->thread.sp0;
|
||||
tsk->thread.saved_fs = info->regs32->fs;
|
||||
tsk->thread.saved_gs = get_user_gs(info->regs32);
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
|
||||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
if (cpu_has_sep)
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
|
||||
tsk->thread.screen_bitmap = info->screen_bitmap;
|
||||
if (info->flags & VM86_SCREEN_BITMAP)
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
||||
/*call __audit_syscall_exit since we do not exit via the normal paths */
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (unlikely(current->audit_context))
|
||||
__audit_syscall_exit(1, 0);
|
||||
#endif
|
||||
|
||||
__asm__ __volatile__(
|
||||
"movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
"mov %2, %%gs\n\t"
|
||||
#endif
|
||||
"jmp resume_userspace"
|
||||
: /* no outputs */
|
||||
:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
|
||||
/* we never return here */
|
||||
}
|
||||
|
||||
static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
|
||||
{
|
||||
struct pt_regs *regs32;
|
||||
|
||||
regs32 = save_v86_state(regs16);
|
||||
regs32->ax = retval;
|
||||
__asm__ __volatile__("movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
"jmp resume_userspace"
|
||||
: : "r" (regs32), "r" (current_thread_info()));
|
||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||
force_iret();
|
||||
return regs->ax;
|
||||
}
|
||||
|
||||
static inline void set_IF(struct kernel_vm86_regs *regs)
|
||||
{
|
||||
VEFLAGS |= X86_EFLAGS_VIF;
|
||||
if (VEFLAGS & X86_EFLAGS_VIP)
|
||||
return_to_32bit(regs, VM86_STI);
|
||||
}
|
||||
|
||||
static inline void clear_IF(struct kernel_vm86_regs *regs)
|
||||
@@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs)
|
||||
|
||||
static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VEFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
@@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs
|
||||
|
||||
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
@@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
|
||||
if (VEFLAGS & X86_EFLAGS_VIF)
|
||||
flags |= X86_EFLAGS_IF;
|
||||
flags |= X86_EFLAGS_IOPL;
|
||||
return flags | (VEFLAGS & current->thread.v86mask);
|
||||
return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
|
||||
}
|
||||
|
||||
static inline int is_revectored(int nr, struct revectored_struct *bitmap)
|
||||
@@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
||||
{
|
||||
unsigned long __user *intr_ptr;
|
||||
unsigned long segoffs;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (regs->pt.cs == BIOSSEG)
|
||||
goto cannot_handle;
|
||||
if (is_revectored(i, &KVM86->int_revectored))
|
||||
if (is_revectored(i, &vm86->int_revectored))
|
||||
goto cannot_handle;
|
||||
if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
|
||||
if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
|
||||
goto cannot_handle;
|
||||
intr_ptr = (unsigned long __user *) (i << 2);
|
||||
if (get_user(segoffs, intr_ptr))
|
||||
@@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
||||
return;
|
||||
|
||||
cannot_handle:
|
||||
return_to_32bit(regs, VM86_INTx + (i << 8));
|
||||
save_v86_state(regs, VM86_INTx + (i << 8));
|
||||
}
|
||||
|
||||
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
|
||||
{
|
||||
if (VMPI.is_vm86pus) {
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (vm86->vm86plus.is_vm86pus) {
|
||||
if ((trapno == 3) || (trapno == 1)) {
|
||||
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
|
||||
/* setting this flag forces the code in entry_32.S to
|
||||
the path where we call save_v86_state() and change
|
||||
the stack pointer to KVM86->regs32 */
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
save_v86_state(regs, VM86_TRAP + (trapno << 8));
|
||||
return 0;
|
||||
}
|
||||
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
|
||||
@@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
unsigned char __user *ssp;
|
||||
unsigned short ip, sp, orig_flags;
|
||||
int data32, pref_done;
|
||||
struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus;
|
||||
|
||||
#define CHECK_IF_IN_TRAP \
|
||||
if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
|
||||
if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
|
||||
newflags |= X86_EFLAGS_TF
|
||||
#define VM86_FAULT_RETURN do { \
|
||||
if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
|
||||
return_to_32bit(regs, VM86_PICRETURN); \
|
||||
if (orig_flags & X86_EFLAGS_TF) \
|
||||
handle_vm86_trap(regs, 0, 1); \
|
||||
return; } while (0)
|
||||
|
||||
orig_flags = *(unsigned short *)®s->pt.flags;
|
||||
|
||||
@@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
SP(regs) -= 2;
|
||||
}
|
||||
IP(regs) = ip;
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* popf */
|
||||
case 0x9d:
|
||||
@@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
else
|
||||
set_vflags_short(newflags, regs);
|
||||
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* int xx */
|
||||
case 0xcd: {
|
||||
int intno = popb(csp, ip, simulate_sigsegv);
|
||||
IP(regs) = ip;
|
||||
if (VMPI.vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
|
||||
return_to_32bit(regs, VM86_INTx + (intno << 8));
|
||||
if (vmpi->vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
|
||||
save_v86_state(regs, VM86_INTx + (intno << 8));
|
||||
return;
|
||||
}
|
||||
}
|
||||
do_int(regs, intno, ssp, sp);
|
||||
return;
|
||||
@@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
} else {
|
||||
set_vflags_short(newflags, regs);
|
||||
}
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* cli */
|
||||
case 0xfa:
|
||||
IP(regs) = ip;
|
||||
clear_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* sti */
|
||||
/*
|
||||
@@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
case 0xfb:
|
||||
IP(regs) = ip;
|
||||
set_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
|
||||
default:
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
check_vip:
|
||||
if (VEFLAGS & X86_EFLAGS_VIP) {
|
||||
save_v86_state(regs, VM86_STI);
|
||||
return;
|
||||
}
|
||||
|
||||
vm86_fault_return:
|
||||
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
|
||||
save_v86_state(regs, VM86_PICRETURN);
|
||||
return;
|
||||
}
|
||||
if (orig_flags & X86_EFLAGS_TF)
|
||||
handle_vm86_trap(regs, 0, X86_TRAP_DB);
|
||||
return;
|
||||
|
||||
simulate_sigsegv:
|
||||
/* FIXME: After a long discussion with Stas we finally
|
||||
* agreed, that this is wrong. Here we should
|
||||
@@ -720,7 +713,7 @@ simulate_sigsegv:
|
||||
* should be a mixture of the two, but how do we
|
||||
* get the information? [KD]
|
||||
*/
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
/* ---------------- vm86 special IRQ passing stuff ----------------- */
|
||||
|
Reference in New Issue
Block a user