Merge branch 'x86/process' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into HEAD
Required for KVM support of the CPUID faulting feature. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ring3mwait_disabled) {
|
||||
msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
|
||||
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
|
||||
if (ring3mwait_disabled)
|
||||
return;
|
||||
}
|
||||
|
||||
msr_set_bit(MSR_MISC_FEATURE_ENABLES,
|
||||
MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
|
||||
this_cpu_or(msr_misc_features_shadow,
|
||||
1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
|
||||
|
||||
if (c == &boot_cpu_data)
|
||||
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
|
||||
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
|
||||
init_intel_energy_perf(c);
|
||||
}
|
||||
|
||||
static void init_cpuid_fault(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
||||
if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
|
||||
if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
|
||||
set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_intel_misc_features(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
||||
if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
|
||||
return;
|
||||
|
||||
/* Clear all MISC features */
|
||||
this_cpu_write(msr_misc_features_shadow, 0);
|
||||
|
||||
/* Check features and update capabilities and shadow control bits */
|
||||
init_cpuid_fault(c);
|
||||
probe_xeon_phi_r3mwait(c);
|
||||
|
||||
msr = this_cpu_read(msr_misc_features_shadow);
|
||||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
|
||||
}
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int l2 = 0;
|
||||
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
|
||||
|
||||
init_intel_energy_perf(c);
|
||||
|
||||
probe_xeon_phi_r3mwait(c);
|
||||
init_intel_misc_features(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
@@ -124,11 +125,6 @@ void flush_thread(void)
|
||||
fpu__clear(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
static void hard_disable_TSC(void)
|
||||
{
|
||||
cr4_set_bits(X86_CR4_TSD);
|
||||
}
|
||||
|
||||
void disable_TSC(void)
|
||||
{
|
||||
preempt_disable();
|
||||
@@ -137,15 +133,10 @@ void disable_TSC(void)
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOTSC in the current running context.
|
||||
*/
|
||||
hard_disable_TSC();
|
||||
cr4_set_bits(X86_CR4_TSD);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void hard_enable_TSC(void)
|
||||
{
|
||||
cr4_clear_bits(X86_CR4_TSD);
|
||||
}
|
||||
|
||||
static void enable_TSC(void)
|
||||
{
|
||||
preempt_disable();
|
||||
@@ -154,7 +145,7 @@ static void enable_TSC(void)
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOTSC in the current running context.
|
||||
*/
|
||||
hard_enable_TSC();
|
||||
cr4_clear_bits(X86_CR4_TSD);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss)
|
||||
DEFINE_PER_CPU(u64, msr_misc_features_shadow);
|
||||
|
||||
static void set_cpuid_faulting(bool on)
|
||||
{
|
||||
struct thread_struct *prev, *next;
|
||||
u64 msrval;
|
||||
|
||||
prev = &prev_p->thread;
|
||||
next = &next_p->thread;
|
||||
msrval = this_cpu_read(msr_misc_features_shadow);
|
||||
msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
|
||||
msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
|
||||
this_cpu_write(msr_misc_features_shadow, msrval);
|
||||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
|
||||
test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
|
||||
unsigned long debugctl = get_debugctlmsr();
|
||||
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
|
||||
debugctl |= DEBUGCTLMSR_BTF;
|
||||
|
||||
update_debugctlmsr(debugctl);
|
||||
static void disable_cpuid(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (!test_and_set_thread_flag(TIF_NOCPUID)) {
|
||||
/*
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOCPUID in the current running context.
|
||||
*/
|
||||
set_cpuid_faulting(true);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
||||
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
||||
/* prev and next are different */
|
||||
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
|
||||
hard_disable_TSC();
|
||||
else
|
||||
hard_enable_TSC();
|
||||
static void enable_cpuid(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (test_and_clear_thread_flag(TIF_NOCPUID)) {
|
||||
/*
|
||||
* Must flip the CPU state synchronously with
|
||||
* TIF_NOCPUID in the current running context.
|
||||
*/
|
||||
set_cpuid_faulting(false);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
|
||||
static int get_cpuid_mode(void)
|
||||
{
|
||||
return !test_thread_flag(TIF_NOCPUID);
|
||||
}
|
||||
|
||||
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
|
||||
return -ENODEV;
|
||||
|
||||
if (cpuid_enabled)
|
||||
enable_cpuid();
|
||||
else
|
||||
disable_cpuid();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called immediately after a successful exec.
|
||||
*/
|
||||
void arch_setup_new_exec(void)
|
||||
{
|
||||
/* If cpuid was previously disabled for this task, re-enable it. */
|
||||
if (test_thread_flag(TIF_NOCPUID))
|
||||
enable_cpuid();
|
||||
}
|
||||
|
||||
static inline void switch_to_bitmap(struct tss_struct *tss,
|
||||
struct thread_struct *prev,
|
||||
struct thread_struct *next,
|
||||
unsigned long tifp, unsigned long tifn)
|
||||
{
|
||||
if (tifn & _TIF_IO_BITMAP) {
|
||||
/*
|
||||
* Copy the relevant range of the IO bitmap.
|
||||
* Normally this is 128 bytes or less:
|
||||
*/
|
||||
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
|
||||
max(prev->io_bitmap_max, next->io_bitmap_max));
|
||||
|
||||
/*
|
||||
* Make sure that the TSS limit is correct for the CPU
|
||||
* to notice the IO bitmap.
|
||||
*/
|
||||
refresh_tss_limit();
|
||||
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
|
||||
} else if (tifp & _TIF_IO_BITMAP) {
|
||||
/*
|
||||
* Clear any possible leftover bits:
|
||||
*/
|
||||
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
|
||||
}
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss)
|
||||
{
|
||||
struct thread_struct *prev, *next;
|
||||
unsigned long tifp, tifn;
|
||||
|
||||
prev = &prev_p->thread;
|
||||
next = &next_p->thread;
|
||||
|
||||
tifn = READ_ONCE(task_thread_info(next_p)->flags);
|
||||
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
|
||||
switch_to_bitmap(tss, prev, next, tifp, tifn);
|
||||
|
||||
propagate_user_return_notify(prev_p, next_p);
|
||||
|
||||
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
|
||||
arch_has_block_step()) {
|
||||
unsigned long debugctl, msk;
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
msk = tifn & _TIF_BLOCKSTEP;
|
||||
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
||||
}
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_NOTSC)
|
||||
cr4_toggle_bits(X86_CR4_TSD);
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_NOCPUID)
|
||||
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -550,3 +616,16 @@ out:
|
||||
put_task_stack(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long do_arch_prctl_common(struct task_struct *task, int option,
|
||||
unsigned long cpuid_enabled)
|
||||
{
|
||||
switch (option) {
|
||||
case ARCH_GET_CPUID:
|
||||
return get_cpuid_mode();
|
||||
case ARCH_SET_CPUID:
|
||||
return set_cpuid_mode(task, cpuid_enabled);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/ldt.h>
|
||||
@@ -56,6 +57,7 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/intel_rdt.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
void __show_regs(struct pt_regs *regs, int all)
|
||||
{
|
||||
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
return prev_p;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
}
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
(struct user_desc __user *)tls, 0);
|
||||
else
|
||||
#endif
|
||||
err = do_arch_prctl(p, ARCH_SET_FS, tls);
|
||||
err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
|
||||
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
|
||||
{
|
||||
int ret = 0;
|
||||
int doit = task == current;
|
||||
int cpu;
|
||||
|
||||
switch (code) {
|
||||
switch (option) {
|
||||
case ARCH_SET_GS:
|
||||
if (addr >= TASK_SIZE_MAX)
|
||||
if (arg2 >= TASK_SIZE_MAX)
|
||||
return -EPERM;
|
||||
cpu = get_cpu();
|
||||
task->thread.gsindex = 0;
|
||||
task->thread.gsbase = addr;
|
||||
task->thread.gsbase = arg2;
|
||||
if (doit) {
|
||||
load_gs_index(0);
|
||||
ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
|
||||
ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
|
||||
}
|
||||
put_cpu();
|
||||
break;
|
||||
case ARCH_SET_FS:
|
||||
/* Not strictly needed for fs, but do it for symmetry
|
||||
with gs */
|
||||
if (addr >= TASK_SIZE_MAX)
|
||||
if (arg2 >= TASK_SIZE_MAX)
|
||||
return -EPERM;
|
||||
cpu = get_cpu();
|
||||
task->thread.fsindex = 0;
|
||||
task->thread.fsbase = addr;
|
||||
task->thread.fsbase = arg2;
|
||||
if (doit) {
|
||||
/* set the selector to 0 to not confuse __switch_to */
|
||||
loadsegment(fs, 0);
|
||||
ret = wrmsrl_safe(MSR_FS_BASE, addr);
|
||||
ret = wrmsrl_safe(MSR_FS_BASE, arg2);
|
||||
}
|
||||
put_cpu();
|
||||
break;
|
||||
case ARCH_GET_FS: {
|
||||
unsigned long base;
|
||||
|
||||
if (doit)
|
||||
rdmsrl(MSR_FS_BASE, base);
|
||||
else
|
||||
base = task->thread.fsbase;
|
||||
ret = put_user(base, (unsigned long __user *)addr);
|
||||
ret = put_user(base, (unsigned long __user *)arg2);
|
||||
break;
|
||||
}
|
||||
case ARCH_GET_GS: {
|
||||
unsigned long base;
|
||||
|
||||
if (doit)
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
||||
else
|
||||
base = task->thread.gsbase;
|
||||
ret = put_user(base, (unsigned long __user *)addr);
|
||||
ret = put_user(base, (unsigned long __user *)arg2);
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
# ifdef CONFIG_X86_X32_ABI
|
||||
case ARCH_MAP_VDSO_X32:
|
||||
return prctl_map_vdso(&vdso_image_x32, addr);
|
||||
return prctl_map_vdso(&vdso_image_x32, arg2);
|
||||
# endif
|
||||
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
case ARCH_MAP_VDSO_32:
|
||||
return prctl_map_vdso(&vdso_image_32, addr);
|
||||
return prctl_map_vdso(&vdso_image_32, arg2);
|
||||
# endif
|
||||
case ARCH_MAP_VDSO_64:
|
||||
return prctl_map_vdso(&vdso_image_64, addr);
|
||||
return prctl_map_vdso(&vdso_image_64, arg2);
|
||||
#endif
|
||||
|
||||
default:
|
||||
@@ -621,11 +624,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
long sys_arch_prctl(int code, unsigned long addr)
|
||||
SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl(current, code, addr);
|
||||
long ret;
|
||||
|
||||
ret = do_arch_prctl_64(current, option, arg2);
|
||||
if (ret == -EINVAL)
|
||||
ret = do_arch_prctl_common(current, option, arg2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
|
||||
{
|
||||
return do_arch_prctl_common(current, option, arg2);
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned long KSTK_ESP(struct task_struct *task)
|
||||
{
|
||||
return task_pt_regs(task)->sp;
|
||||
|
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
|
||||
if (value >= TASK_SIZE_MAX)
|
||||
return -EIO;
|
||||
/*
|
||||
* When changing the segment base, use do_arch_prctl
|
||||
* When changing the segment base, use do_arch_prctl_64
|
||||
* to set either thread.fs or thread.fsindex and the
|
||||
* corresponding GDT slot.
|
||||
*/
|
||||
if (child->thread.fsbase != value)
|
||||
return do_arch_prctl(child, ARCH_SET_FS, value);
|
||||
return do_arch_prctl_64(child, ARCH_SET_FS, value);
|
||||
return 0;
|
||||
case offsetof(struct user_regs_struct,gs_base):
|
||||
/*
|
||||
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
|
||||
if (value >= TASK_SIZE_MAX)
|
||||
return -EIO;
|
||||
if (child->thread.gsbase != value)
|
||||
return do_arch_prctl(child, ARCH_SET_GS, value);
|
||||
return do_arch_prctl_64(child, ARCH_SET_GS, value);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
Works just like arch_prctl, except that the arguments
|
||||
are reversed. */
|
||||
case PTRACE_ARCH_PRCTL:
|
||||
ret = do_arch_prctl(child, data, addr);
|
||||
ret = do_arch_prctl_64(child, data, addr);
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user