Merge branch 'x86/urgent' into x86/cache, to pick up dependent fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit)
|
||||
push %ebx
|
||||
push %ecx
|
||||
push %edx
|
||||
push %edi
|
||||
|
||||
/*
|
||||
* RIP-relative addressing is needed to access the encryption bit
|
||||
* variable. Since we are running in 32-bit mode we need this call/pop
|
||||
* sequence to get the proper relative addressing.
|
||||
*/
|
||||
call 1f
|
||||
1: popl %edi
|
||||
subl $1b, %edi
|
||||
|
||||
movl enc_bit(%edi), %eax
|
||||
cmpl $0, %eax
|
||||
jge .Lsev_exit
|
||||
|
||||
/* Check if running under a hypervisor */
|
||||
movl $1, %eax
|
||||
@@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit)
|
||||
|
||||
movl %ebx, %eax
|
||||
andl $0x3f, %eax /* Return the encryption bit location */
|
||||
movl %eax, enc_bit(%edi)
|
||||
jmp .Lsev_exit
|
||||
|
||||
.Lno_sev:
|
||||
xor %eax, %eax
|
||||
movl %eax, enc_bit(%edi)
|
||||
|
||||
.Lsev_exit:
|
||||
pop %edi
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %ebx
|
||||
@@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask)
|
||||
ENDPROC(set_sev_encryption_mask)
|
||||
|
||||
.data
|
||||
enc_bit:
|
||||
.int 0xffffffff
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
.balign 8
|
||||
|
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
|
||||
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
|
||||
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
|
||||
-fno-omit-frame-pointer -foptimize-sibling-calls \
|
||||
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
|
||||
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
|
||||
|
||||
ifdef CONFIG_RETPOLINE
|
||||
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
|
||||
CFL += $(RETPOLINE_VDSO_CFLAGS)
|
||||
endif
|
||||
endif
|
||||
|
||||
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
|
||||
|
||||
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
|
||||
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
|
||||
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
|
||||
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
|
||||
KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
|
||||
|
||||
ifdef CONFIG_RETPOLINE
|
||||
ifneq ($(RETPOLINE_VDSO_CFLAGS),)
|
||||
KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
|
||||
endif
|
||||
endif
|
||||
|
||||
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
|
||||
$(obj)/vdso32.so.dbg: FORCE \
|
||||
|
@@ -43,8 +43,9 @@ extern u8 hvclock_page
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
|
||||
asm ("syscall" : "=a" (ret), "=m" (*ts) :
|
||||
"0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
|
||||
"memory", "rcx", "r11");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
|
||||
asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
|
||||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
|
||||
"memory", "rcx", "r11");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
asm (
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"mov %[clock], %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
|
||||
: "=a" (ret), "=m" (*ts)
|
||||
: "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
@@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
asm (
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"mov %[tv], %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
|
||||
: "=a" (ret), "=m" (*tv), "=m" (*tz)
|
||||
: "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
|
@@ -36,6 +36,7 @@
|
||||
|
||||
static int num_counters_llc;
|
||||
static int num_counters_nb;
|
||||
static bool l3_mask;
|
||||
|
||||
static HLIST_HEAD(uncore_unused_list);
|
||||
|
||||
@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
|
||||
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
|
||||
hwc->idx = -1;
|
||||
|
||||
/*
|
||||
* SliceMask and ThreadMask need to be set for certain L3 events in
|
||||
* Family 17h. For other events, the two fields do not affect the count.
|
||||
*/
|
||||
if (l3_mask)
|
||||
hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
l3_mask = true;
|
||||
} else {
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
l3_mask = false;
|
||||
}
|
||||
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
|
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
|
||||
|
||||
void bdx_uncore_cpu_init(void)
|
||||
{
|
||||
int pkg = topology_phys_to_logical_pkg(0);
|
||||
int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
|
||||
|
||||
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
|
||||
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
|
||||
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
|
||||
},
|
||||
{ /* M3UPI0 Link 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
|
||||
},
|
||||
{ /* M3UPI0 Link 1 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
|
||||
},
|
||||
{ /* M3UPI1 Link 2 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
|
||||
},
|
||||
{ /* end: all zeroes */ }
|
||||
};
|
||||
|
@@ -46,6 +46,14 @@
|
||||
#define INTEL_ARCH_EVENT_MASK \
|
||||
(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
|
||||
|
||||
#define AMD64_L3_SLICE_SHIFT 48
|
||||
#define AMD64_L3_SLICE_MASK \
|
||||
((0xFULL) << AMD64_L3_SLICE_SHIFT)
|
||||
|
||||
#define AMD64_L3_THREAD_SHIFT 56
|
||||
#define AMD64_L3_THREAD_MASK \
|
||||
((0xFFULL) << AMD64_L3_THREAD_SHIFT)
|
||||
|
||||
#define X86_RAW_EVENT_MASK \
|
||||
(ARCH_PERFMON_EVENTSEL_EVENT | \
|
||||
ARCH_PERFMON_EVENTSEL_UMASK | \
|
||||
|
@@ -10,8 +10,13 @@ struct cpumask;
|
||||
struct mm_struct;
|
||||
|
||||
#ifdef CONFIG_X86_UV
|
||||
#include <linux/efi.h>
|
||||
|
||||
extern enum uv_system_type get_uv_system_type(void);
|
||||
static inline bool is_early_uv_system(void)
|
||||
{
|
||||
return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
|
||||
}
|
||||
extern int is_uv_system(void);
|
||||
extern int is_uv_hubless(void);
|
||||
extern void uv_cpu_init(void);
|
||||
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
|
||||
#else /* X86_UV */
|
||||
|
||||
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
|
||||
static inline bool is_early_uv_system(void) { return 0; }
|
||||
static inline int is_uv_system(void) { return 0; }
|
||||
static inline int is_uv_hubless(void) { return 0; }
|
||||
static inline void uv_cpu_init(void) { }
|
||||
|
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
|
||||
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
{
|
||||
/* AMD errata T13 (order #21922) */
|
||||
if ((c->x86 == 6)) {
|
||||
if (c->x86 == 6) {
|
||||
/* Duron Rev A0 */
|
||||
if (c->x86_model == 3 && c->x86_stepping == 0)
|
||||
size = 64;
|
||||
|
@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
|
||||
int rdtgroup_schemata_show(struct kernfs_open_file *of,
|
||||
struct seq_file *s, void *v);
|
||||
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
|
||||
u32 _cbm, int closid, bool exclusive);
|
||||
unsigned long cbm, int closid, bool exclusive);
|
||||
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
|
||||
u32 cbm);
|
||||
unsigned long cbm);
|
||||
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
|
||||
int rdtgroup_tasks_assigned(struct rdtgroup *r);
|
||||
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
|
||||
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
|
||||
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
|
||||
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
|
||||
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
|
||||
int rdt_pseudo_lock_init(void);
|
||||
void rdt_pseudo_lock_release(void);
|
||||
|
@@ -789,25 +789,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
|
||||
/**
|
||||
* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
|
||||
* @d: RDT domain
|
||||
* @_cbm: CBM to test
|
||||
* @cbm: CBM to test
|
||||
*
|
||||
* @d represents a cache instance and @_cbm a capacity bitmask that is
|
||||
* considered for it. Determine if @_cbm overlaps with any existing
|
||||
* @d represents a cache instance and @cbm a capacity bitmask that is
|
||||
* considered for it. Determine if @cbm overlaps with any existing
|
||||
* pseudo-locked region on @d.
|
||||
*
|
||||
* Return: true if @_cbm overlaps with pseudo-locked region on @d, false
|
||||
* @cbm is unsigned long, even if only 32 bits are used, to make the
|
||||
* bitmap functions work correctly.
|
||||
*
|
||||
* Return: true if @cbm overlaps with pseudo-locked region on @d, false
|
||||
* otherwise.
|
||||
*/
|
||||
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
|
||||
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
|
||||
{
|
||||
unsigned long *cbm = (unsigned long *)&_cbm;
|
||||
unsigned long *cbm_b;
|
||||
unsigned int cbm_len;
|
||||
unsigned long cbm_b;
|
||||
|
||||
if (d->plr) {
|
||||
cbm_len = d->plr->r->cache.cbm_len;
|
||||
cbm_b = (unsigned long *)&d->plr->cbm;
|
||||
if (bitmap_intersects(cbm, cbm_b, cbm_len))
|
||||
cbm_b = d->plr->cbm;
|
||||
if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
|
||||
* is false then overlaps with any resource group or hardware entities
|
||||
* will be considered.
|
||||
*
|
||||
* @cbm is unsigned long, even if only 32 bits are used, to make the
|
||||
* bitmap functions work correctly.
|
||||
*
|
||||
* Return: false if CBM does not overlap, true if it does.
|
||||
*/
|
||||
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
|
||||
u32 _cbm, int closid, bool exclusive)
|
||||
unsigned long cbm, int closid, bool exclusive)
|
||||
{
|
||||
unsigned long *cbm = (unsigned long *)&_cbm;
|
||||
unsigned long *ctrl_b;
|
||||
enum rdtgrp_mode mode;
|
||||
unsigned long ctrl_b;
|
||||
u32 *ctrl;
|
||||
int i;
|
||||
|
||||
/* Check for any overlap with regions used by hardware directly */
|
||||
if (!exclusive) {
|
||||
if (bitmap_intersects(cbm,
|
||||
(unsigned long *)&r->cache.shareable_bits,
|
||||
r->cache.cbm_len))
|
||||
ctrl_b = r->cache.shareable_bits;
|
||||
if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check for overlap with other resource groups */
|
||||
ctrl = d->ctrl_val;
|
||||
for (i = 0; i < closids_supported(); i++, ctrl++) {
|
||||
ctrl_b = (unsigned long *)ctrl;
|
||||
ctrl_b = *ctrl;
|
||||
mode = rdtgroup_mode_by_closid(i);
|
||||
if (closid_allocated(i) && i != closid &&
|
||||
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
|
||||
if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
|
||||
if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
|
||||
if (exclusive) {
|
||||
if (mode == RDT_MODE_EXCLUSIVE)
|
||||
return true;
|
||||
@@ -1138,15 +1139,18 @@ out:
|
||||
* computed by first dividing the total cache size by the CBM length to
|
||||
* determine how many bytes each bit in the bitmask represents. The result
|
||||
* is multiplied with the number of bits set in the bitmask.
|
||||
*
|
||||
* @cbm is unsigned long, even if only 32 bits are used to make the
|
||||
* bitmap functions work correctly.
|
||||
*/
|
||||
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
|
||||
struct rdt_domain *d, u32 cbm)
|
||||
struct rdt_domain *d, unsigned long cbm)
|
||||
{
|
||||
struct cpu_cacheinfo *ci;
|
||||
unsigned int size = 0;
|
||||
int num_b, i;
|
||||
|
||||
num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
|
||||
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
|
||||
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
|
||||
for (i = 0; i < ci->num_leaves; i++) {
|
||||
if (ci->info_list[i].level == r->cache_level) {
|
||||
@@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
|
||||
u32 used_b = 0, unused_b = 0;
|
||||
u32 closid = rdtgrp->closid;
|
||||
struct rdt_resource *r;
|
||||
unsigned long tmp_cbm;
|
||||
enum rdtgrp_mode mode;
|
||||
struct rdt_domain *d;
|
||||
int i, ret;
|
||||
@@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
|
||||
* modify the CBM based on system availability.
|
||||
*/
|
||||
cbm_ensure_valid(&d->new_ctrl, r);
|
||||
if (bitmap_weight((unsigned long *) &d->new_ctrl,
|
||||
r->cache.cbm_len) <
|
||||
r->cache.min_cbm_bits) {
|
||||
/*
|
||||
* Assign the u32 CBM to an unsigned long to ensure
|
||||
* that bitmap_weight() does not access out-of-bound
|
||||
* memory.
|
||||
*/
|
||||
tmp_cbm = d->new_ctrl;
|
||||
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
|
||||
r->cache.min_cbm_bits) {
|
||||
rdt_last_cmd_printf("no space on %s:%d\n",
|
||||
r->name, d->id);
|
||||
return -ENOSPC;
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include <asm/apic.h>
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
|
||||
EXPORT_SYMBOL(cpu_khz);
|
||||
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC))
|
||||
return;
|
||||
/* Don't change UV TSC multi-chassis synchronization */
|
||||
if (is_early_uv_system())
|
||||
return;
|
||||
if (!determine_cpu_tsc_frequencies(true))
|
||||
return;
|
||||
loops_per_jiffy = get_loops_per_jiffy();
|
||||
|
@@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
||||
*/
|
||||
static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
|
||||
|
||||
/*
|
||||
* In some cases, we need to preserve the GFN of a non-present or reserved
|
||||
* SPTE when we usurp the upper five bits of the physical address space to
|
||||
* defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll
|
||||
* shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
|
||||
* left into the reserved bits, i.e. the GFN in the SPTE will be split into
|
||||
* high and low parts. This mask covers the lower bits of the GFN.
|
||||
*/
|
||||
static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
||||
|
||||
static void mmu_spte_set(u64 *sptep, u64 spte);
|
||||
static union kvm_mmu_page_role
|
||||
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
|
||||
@@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte)
|
||||
|
||||
static gfn_t get_mmio_spte_gfn(u64 spte)
|
||||
{
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
|
||||
shadow_nonpresent_or_rsvd_mask;
|
||||
u64 gpa = spte & ~mask;
|
||||
u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
||||
gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
|
||||
& shadow_nonpresent_or_rsvd_mask;
|
||||
@@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||
|
||||
static void kvm_mmu_reset_all_pte_masks(void)
|
||||
{
|
||||
u8 low_phys_bits;
|
||||
|
||||
shadow_user_mask = 0;
|
||||
shadow_accessed_mask = 0;
|
||||
shadow_dirty_mask = 0;
|
||||
@@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void)
|
||||
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
||||
* assumed that the CPU is not vulnerable to L1TF.
|
||||
*/
|
||||
low_phys_bits = boot_cpu_data.x86_phys_bits;
|
||||
if (boot_cpu_data.x86_phys_bits <
|
||||
52 - shadow_nonpresent_or_rsvd_mask_len)
|
||||
52 - shadow_nonpresent_or_rsvd_mask_len) {
|
||||
shadow_nonpresent_or_rsvd_mask =
|
||||
rsvd_bits(boot_cpu_data.x86_phys_bits -
|
||||
shadow_nonpresent_or_rsvd_mask_len,
|
||||
boot_cpu_data.x86_phys_bits - 1);
|
||||
low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
|
||||
}
|
||||
shadow_nonpresent_or_rsvd_lower_gfn_mask =
|
||||
GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static int is_cpuid_PSE36(void)
|
||||
|
@@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
|
||||
|
||||
#define MSR_BITMAP_MODE_X2APIC 1
|
||||
#define MSR_BITMAP_MODE_X2APIC_APICV 2
|
||||
#define MSR_BITMAP_MODE_LM 4
|
||||
|
||||
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
|
||||
|
||||
@@ -857,6 +856,7 @@ struct nested_vmx {
|
||||
|
||||
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
|
||||
u64 vmcs01_debugctl;
|
||||
u64 vmcs01_guest_bndcfgs;
|
||||
|
||||
u16 vpid02;
|
||||
u16 last_vpid;
|
||||
@@ -2899,8 +2899,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
|
||||
}
|
||||
|
||||
if (is_long_mode(&vmx->vcpu))
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
#else
|
||||
savesegment(fs, fs_sel);
|
||||
savesegment(gs, gs_sel);
|
||||
@@ -2951,8 +2950,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
|
||||
vmx->loaded_cpu_state = NULL;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (is_long_mode(&vmx->vcpu))
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
#endif
|
||||
if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
|
||||
kvm_load_ldt(host_state->ldt_sel);
|
||||
@@ -2980,24 +2978,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
|
||||
#ifdef CONFIG_X86_64
|
||||
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
|
||||
{
|
||||
if (is_long_mode(&vmx->vcpu)) {
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
rdmsrl(MSR_KERNEL_GS_BASE,
|
||||
vmx->msr_guest_kernel_gs_base);
|
||||
preempt_enable();
|
||||
}
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
|
||||
preempt_enable();
|
||||
return vmx->msr_guest_kernel_gs_base;
|
||||
}
|
||||
|
||||
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
|
||||
{
|
||||
if (is_long_mode(&vmx->vcpu)) {
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, data);
|
||||
preempt_enable();
|
||||
}
|
||||
preempt_disable();
|
||||
if (vmx->loaded_cpu_state)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, data);
|
||||
preempt_enable();
|
||||
vmx->msr_guest_kernel_gs_base = data;
|
||||
}
|
||||
#endif
|
||||
@@ -3533,9 +3526,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
|
||||
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
|
||||
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
|
||||
|
||||
if (kvm_mpx_supported())
|
||||
msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
|
||||
|
||||
/* We support free control of debug control saving. */
|
||||
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
|
||||
|
||||
@@ -3552,8 +3542,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
|
||||
VM_ENTRY_LOAD_IA32_PAT;
|
||||
msrs->entry_ctls_high |=
|
||||
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
|
||||
if (kvm_mpx_supported())
|
||||
msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
|
||||
|
||||
/* We support free control of debug control loading. */
|
||||
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
|
||||
@@ -3601,12 +3589,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
|
||||
msrs->secondary_ctls_high);
|
||||
msrs->secondary_ctls_low = 0;
|
||||
msrs->secondary_ctls_high &=
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_DESC |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||||
SECONDARY_EXEC_WBINVD_EXITING;
|
||||
|
||||
/*
|
||||
* We can emulate "VMCS shadowing," even if the hardware
|
||||
* doesn't support it.
|
||||
@@ -3663,6 +3651,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
|
||||
msrs->secondary_ctls_high |=
|
||||
SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
||||
|
||||
if (flexpriority_enabled)
|
||||
msrs->secondary_ctls_high |=
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
|
||||
/* miscellaneous data */
|
||||
rdmsr(MSR_IA32_VMX_MISC,
|
||||
msrs->misc_low,
|
||||
@@ -5073,19 +5065,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
if (!msr)
|
||||
return;
|
||||
|
||||
/*
|
||||
* MSR_KERNEL_GS_BASE is not intercepted when the guest is in
|
||||
* 64-bit mode as a 64-bit kernel may frequently access the
|
||||
* MSR. This means we need to manually save/restore the MSR
|
||||
* when switching between guest and host state, but only if
|
||||
* the guest is in 64-bit mode. Sync our cached value if the
|
||||
* guest is transitioning to 32-bit mode and the CPU contains
|
||||
* guest state, i.e. the cache is stale.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
if (!(efer & EFER_LMA))
|
||||
(void)vmx_read_guest_kernel_gs_base(vmx);
|
||||
#endif
|
||||
vcpu->arch.efer = efer;
|
||||
if (efer & EFER_LMA) {
|
||||
vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
|
||||
@@ -6078,9 +6057,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
|
||||
mode |= MSR_BITMAP_MODE_X2APIC_APICV;
|
||||
}
|
||||
|
||||
if (is_long_mode(vcpu))
|
||||
mode |= MSR_BITMAP_MODE_LM;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
@@ -6121,9 +6097,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
|
||||
if (!changed)
|
||||
return;
|
||||
|
||||
vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
|
||||
!(mode & MSR_BITMAP_MODE_LM));
|
||||
|
||||
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
|
||||
vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
|
||||
|
||||
@@ -6189,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
}
|
||||
|
||||
static u8 vmx_get_rvi(void)
|
||||
{
|
||||
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
}
|
||||
|
||||
static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@@ -6201,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
|
||||
return false;
|
||||
|
||||
rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
rvi = vmx_get_rvi();
|
||||
|
||||
vapic_page = kmap(vmx->nested.virtual_apic_page);
|
||||
vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
|
||||
@@ -10245,15 +10223,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
return;
|
||||
|
||||
if (!flexpriority_enabled &&
|
||||
!cpu_has_vmx_virtualize_x2apic_mode())
|
||||
return;
|
||||
|
||||
/* Postpone execution until vmcs01 is the current VMCS. */
|
||||
if (is_guest_mode(vcpu)) {
|
||||
to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cpu_need_tpr_shadow(vcpu))
|
||||
return;
|
||||
|
||||
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
|
||||
@@ -10375,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
||||
return max_irr;
|
||||
}
|
||||
|
||||
static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u8 rvi = vmx_get_rvi();
|
||||
u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
|
||||
|
||||
return ((rvi & 0xf0) > (vppr & 0xf0));
|
||||
}
|
||||
|
||||
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
@@ -11264,6 +11251,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
|
||||
#undef cr4_fixed1_update
|
||||
}
|
||||
|
||||
static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (kvm_mpx_supported()) {
|
||||
bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
|
||||
|
||||
if (mpx_enabled) {
|
||||
vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
|
||||
vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
|
||||
} else {
|
||||
vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
|
||||
vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@@ -11280,8 +11284,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
|
||||
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
|
||||
|
||||
if (nested_vmx_allowed(vcpu))
|
||||
if (nested_vmx_allowed(vcpu)) {
|
||||
nested_vmx_cr_fixed1_bits_update(vcpu);
|
||||
nested_vmx_entry_exit_ctls_update(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
@@ -12049,8 +12055,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
||||
|
||||
set_cr4_guest_host_mask(vmx);
|
||||
|
||||
if (vmx_mpx_supported())
|
||||
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
|
||||
if (kvm_mpx_supported()) {
|
||||
if (vmx->nested.nested_run_pending &&
|
||||
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
|
||||
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
|
||||
else
|
||||
vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
|
||||
}
|
||||
|
||||
if (enable_vpid) {
|
||||
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
|
||||
@@ -12595,15 +12606,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
bool from_vmentry = !!exit_qual;
|
||||
u32 dummy_exit_qual;
|
||||
u32 vmcs01_cpu_exec_ctrl;
|
||||
bool evaluate_pending_interrupts;
|
||||
int r = 0;
|
||||
|
||||
vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
|
||||
evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
|
||||
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
|
||||
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
|
||||
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
|
||||
|
||||
enter_guest_mode(vcpu);
|
||||
|
||||
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
|
||||
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
if (kvm_mpx_supported() &&
|
||||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
|
||||
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
|
||||
vmx_segment_cache_clear(vmx);
|
||||
@@ -12643,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
|
||||
* to L1 or delivered directly to L2 (e.g. In case L1 don't
|
||||
* intercept EXTERNAL_INTERRUPT).
|
||||
*
|
||||
* Usually this would be handled by L0 requesting a
|
||||
* IRQ/NMI window by setting VMCS accordingly. However,
|
||||
* this setting was done on VMCS01 and now VMCS02 is active
|
||||
* instead. Thus, we force L0 to perform pending event
|
||||
* evaluation by requesting a KVM_REQ_EVENT.
|
||||
* Usually this would be handled by the processor noticing an
|
||||
* IRQ/NMI window request, or checking RVI during evaluation of
|
||||
* pending virtual interrupts. However, this setting was done
|
||||
* on VMCS01 and now VMCS02 is active instead. Thus, we force L0
|
||||
* to perform pending event evaluation by requesting a KVM_REQ_EVENT.
|
||||
*/
|
||||
if (vmcs01_cpu_exec_ctrl &
|
||||
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
|
||||
if (unlikely(evaluate_pending_interrupts))
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
|
||||
|
@@ -4698,7 +4698,7 @@ static void kvm_init_msr_list(void)
|
||||
*/
|
||||
switch (msrs_to_save[i]) {
|
||||
case MSR_IA32_BNDCFGS:
|
||||
if (!kvm_x86_ops->mpx_supported())
|
||||
if (!kvm_mpx_supported())
|
||||
continue;
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
|
Reference in New Issue
Block a user