Merge branch 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/xsave changes from Peter Anvin:
 "This is a patchset to support the XSAVES instruction required to
  support context switch of supervisor-only features in upcoming
  silicon.

  This patchset missed the 3.16 merge window, which is why it is based
  on 3.15-rc7"

* 'x86-xsave-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, xsave: Add forgotten inline annotation
  x86/xsaves: Clean up code in xstate offsets computation in xsave area
  x86/xsave: Make it clear that the XSAVE macros use (%edi)/(%rdi)
  Define kernel API to get address of each state in xsave area
  x86/xsaves: Enable xsaves/xrstors
  x86/xsaves: Call booting time xsaves and xrstors in setup_init_fpu_buf
  x86/xsaves: Save xstate to task's xsave area in __save_fpu during booting time
  x86/xsaves: Add xsaves and xrstors support for booting time
  x86/xsaves: Clear reserved bits in xsave header
  x86/xsaves: Use xsave/xrstor for saving and restoring user space context
  x86/xsaves: Use xsaves/xrstors for context switch
  x86/xsaves: Use xsaves/xrstors to save and restore xsave area
  x86/xsaves: Define a macro for handling xsave/xrstor instruction fault
  x86/xsaves: Define macros for xsave instructions
  x86/xsaves: Change compacted format xsave area header
  x86/alternative: Add alternative_input_2 to support alternative with two features and input
  x86/xsaves: Add a kernel parameter noxsaves to disable xsaves/xrstors
This commit is contained in:
Linus Torvalds
2014-08-13 18:20:04 -06:00
9 changed files with 325 additions and 71 deletions

View File

@@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end)
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: : "i" (0), ## input)
/*
* This is similar to alternative_input. But it has two features and
* respective instructions.
*
* If CPU has feature2, newinstr2 is used.
* Otherwise, if CPU has feature1, newinstr1 is used.
* Otherwise, oldinstr is used.
*/
#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
feature2, input...) \
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
newinstr2, feature2) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \

View File

@@ -508,9 +508,12 @@ static inline void user_fpu_begin(void)
static inline void __save_fpu(struct task_struct *tsk)
{
if (use_xsave())
xsave_state(&tsk->thread.fpu.state->xsave, -1);
else
if (use_xsave()) {
if (unlikely(system_state == SYSTEM_BOOTING))
xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
else
xsave_state(&tsk->thread.fpu.state->xsave, -1);
} else
fpu_fxsave(&tsk->thread.fpu);
}

View File

@@ -385,8 +385,8 @@ struct bndcsr_struct {
struct xsave_hdr_struct {
u64 xstate_bv;
u64 reserved1[2];
u64 reserved2[5];
u64 xcomp_bv;
u64 reserved[6];
} __attribute__((packed));
struct xsave_struct {

View File

@@ -52,24 +52,170 @@ extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child);
static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{
int err;
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
"2:\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err)
: "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
#define xstate_fault ".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err)
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XSAVES"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XSAVE"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile(xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XRSTORS"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XRSTOR"\n\t"
"2:\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile(xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* Save processor xstate to xsave area.
*/
static inline int xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err = 0;
/*
* If xsaves is enabled, xsaves replaces xsaveopt because
* it supports compact format and supervisor states in addition to
* modified optimization in xsaveopt.
*
* Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
* because xsaveopt supports modified optimization which is not
* supported by xsave.
*
* If none of xsaves and xsaveopt is enabled, use xsave.
*/
alternative_input_2(
"1:"XSAVE,
"1:"XSAVEOPT,
X86_FEATURE_XSAVEOPT,
"1:"XSAVES,
X86_FEATURE_XSAVES,
[fx] "D" (fx), "a" (lmask), "d" (hmask) :
"memory");
asm volatile("2:\n\t"
xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* Restore processor xstate from xsave area.
*/
static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
{
int err = 0;
u32 lmask = mask;
u32 hmask = mask >> 32;
/*
* Use xrstors to restore context if it is enabled. xrstors supports
* compacted format of xsave area which is not supported by xrstor.
*/
alternative_input(
"1: " XRSTOR,
"1: " XRSTORS,
X86_FEATURE_XSAVES,
"D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
asm volatile("2:\n"
xstate_fault
: "0" (0)
: "memory");
return err;
}
/*
* Save xstate context for old process during context switch.
*/
static inline void fpu_xsave(struct fpu *fpu)
{
xsave_state(&fpu->state->xsave, -1);
}
/*
* Restore xstate context for new process during context switch.
*/
static inline int fpu_xrstor_checking(struct xsave_struct *fx)
{
return xrstor_state(fx, -1);
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for
* backward compatibility for old applications which don't understand
* compacted format of xsave area.
*/
static inline int xsave_user(struct xsave_struct __user *buf)
{
int err;
@@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf)
return -EFAULT;
__asm__ __volatile__(ASM_STAC "\n"
"1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
"1:"XSAVE"\n"
"2: " ASM_CLAC "\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
xstate_fault
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory");
return err;
}
/*
* Restore xstate from user space xsave area.
*/
static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
{
int err;
int err = 0;
struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
__asm__ __volatile__(ASM_STAC "\n"
"1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
"1:"XRSTOR"\n"
"2: " ASM_CLAC "\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
xstate_fault
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
: "memory"); /* memory required? */
return err;
}
static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
void setup_xstate_comp(void);
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void fpu_xsave(struct fpu *fpu)
{
/* This, however, we can work around by forcing the compiler to select
an addressing mode that doesn't require extended registers. */
alternative_input(
".byte " REX_PREFIX "0x0f,0xae,0x27",
".byte " REX_PREFIX "0x0f,0xae,0x37",
X86_FEATURE_XSAVEOPT,
[fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
"memory");
}
#endif

View File

@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1;
@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s)
}
__setup("noxsaveopt", x86_xsaveopt_setup);
static int __init x86_xsaves_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
return 1;
}
__setup("noxsaves", x86_xsaves_setup);
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;

View File

@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/*
* These bits must be zero.
*/
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
memset(xsave_hdr->reserved, 0, 48);
return ret;
}

View File

@@ -93,6 +93,7 @@ void arch_task_cache_init(void)
kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate),
SLAB_PANIC | SLAB_NOTRACK, NULL);
setup_xstate_comp();
}
/*

View File

@@ -8,6 +8,7 @@
#include <linux/bootmem.h>
#include <linux/compat.h>
#include <linux/cpu.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/sigframe.h>
@@ -24,7 +25,9 @@ u64 pcntxt_mask;
struct xsave_struct *init_xstate_buf;
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
static unsigned int *xstate_offsets, *xstate_sizes;
static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
static unsigned int xstate_features;
/*
* If a processor implementation discern that a processor state component is
@@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (use_xsave()) {
/* These bits must be zero. */
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
memset(xsave_hdr->reserved, 0, 48);
/*
* Init the state that is not present in the memory
@@ -478,6 +481,52 @@ static void __init setup_xstate_features(void)
} while (1);
}
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
* of the xsave aread.
*
* Input: void
* Output: void
*/
void setup_xstate_comp(void)
{
unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
int i;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
if (!cpu_has_xsaves) {
for (i = 2; i < xstate_features; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
xstate_comp_offsets[i] = xstate_offsets[i];
xstate_comp_sizes[i] = xstate_sizes[i];
}
}
return;
}
xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = 2; i < xstate_features; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask))
xstate_comp_sizes[i] = xstate_sizes[i];
else
xstate_comp_sizes[i] = 0;
if (i > 2)
xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ xstate_comp_sizes[i-1];
}
}
/*
* setup the xstate image representing the init state
*/
@@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void)
setup_xstate_features();
if (cpu_has_xsaves) {
init_xstate_buf->xsave_hdr.xcomp_bv =
(u64)1 << 63 | pcntxt_mask;
init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
}
/*
* Init all the features state with header_bv being 0x0
*/
xrstor_state(init_xstate_buf, -1);
xrstor_state_booting(init_xstate_buf, -1);
/*
* Dump the init state again. This is to identify the init state
* of any feature which is not represented by all zero's.
*/
xsave_state(init_xstate_buf, -1);
xsave_state_booting(init_xstate_buf, -1);
}
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
@@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s)
}
__setup("eagerfpu=", eager_fpu_setup);
/*
* Calculate total size of enabled xstates in XCR0/pcntxt_mask.
*/
static void __init init_xstate_size(void)
{
unsigned int eax, ebx, ecx, edx;
int i;
if (!cpu_has_xsaves) {
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
return;
}
xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = 2; i < 64; i++) {
if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
xstate_size += eax;
}
}
}
/*
* Enable and initialize the xsave feature.
*/
@@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void)
/*
* Recompute the context size for enabled features
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
init_xstate_size();
update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame();
@@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void)
}
}
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
pcntxt_mask, xstate_size);
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
pcntxt_mask, xstate_size,
cpu_has_xsaves ? "compacted form" : "standard form");
}
/*
@@ -635,3 +714,26 @@ void eager_fpu_init(void)
else
fxrstor_checking(&init_xstate_buf->i387);
}
/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
* This is the API that is called to get xstate address in either
* standard format or compacted format of xsave area.
*
* Inputs:
* xsave: base address of the xsave area;
* xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
* etc.)
* Output:
* address of the state in the xsave area.
*/
void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
{
int feature = fls64(xstate) - 1;
if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
return NULL;
return (void *)xsave + xstate_comp_offsets[feature];
}