Merge tag 'v5.3-rc7' into x86/mm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Šī revīzija ir iekļauta:
@@ -38,6 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
|
||||
|
||||
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
|
||||
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
|
||||
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
|
||||
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
|
||||
export REALMODE_CFLAGS
|
||||
|
||||
|
@@ -72,6 +72,8 @@ static unsigned long find_trampoline_placement(void)
|
||||
|
||||
/* Find the first usable memory region under bios_start. */
|
||||
for (i = boot_params->e820_entries - 1; i >= 0; i--) {
|
||||
unsigned long new = bios_start;
|
||||
|
||||
entry = &boot_params->e820_table[i];
|
||||
|
||||
/* Skip all entries above bios_start. */
|
||||
@@ -84,15 +86,20 @@ static unsigned long find_trampoline_placement(void)
|
||||
|
||||
/* Adjust bios_start to the end of the entry if needed. */
|
||||
if (bios_start > entry->addr + entry->size)
|
||||
bios_start = entry->addr + entry->size;
|
||||
new = entry->addr + entry->size;
|
||||
|
||||
/* Keep bios_start page-aligned. */
|
||||
bios_start = round_down(bios_start, PAGE_SIZE);
|
||||
new = round_down(new, PAGE_SIZE);
|
||||
|
||||
/* Skip the entry if it's too small. */
|
||||
if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
|
||||
if (new - TRAMPOLINE_32BIT_SIZE < entry->addr)
|
||||
continue;
|
||||
|
||||
/* Protect against underflow. */
|
||||
if (new - TRAMPOLINE_32BIT_SIZE > bios_start)
|
||||
break;
|
||||
|
||||
bios_start = new;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -37,6 +37,14 @@ int memcmp(const void *s1, const void *s2, size_t len)
|
||||
return diff;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clang may lower `memcmp == 0` to `bcmp == 0`.
|
||||
*/
|
||||
int bcmp(const void *s1, const void *s2, size_t len)
|
||||
{
|
||||
return memcmp(s1, s2, len);
|
||||
}
|
||||
|
||||
int strcmp(const char *str1, const char *str2)
|
||||
{
|
||||
const unsigned char *s1 = (const unsigned char *)str1;
|
||||
|
@@ -314,6 +314,23 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Mitigate Spectre v1 for conditional swapgs code paths.
|
||||
*
|
||||
* FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
|
||||
* prevent a speculative swapgs when coming from kernel space.
|
||||
*
|
||||
* FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
|
||||
* to prevent the swapgs from getting speculatively skipped when coming from
|
||||
* user space.
|
||||
*/
|
||||
.macro FENCE_SWAPGS_USER_ENTRY
|
||||
ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
|
||||
.endm
|
||||
.macro FENCE_SWAPGS_KERNEL_ENTRY
|
||||
ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
|
||||
.endm
|
||||
|
||||
.macro STACKLEAK_ERASE_NOCLOBBER
|
||||
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||
PUSH_AND_CLEAR_REGS
|
||||
|
@@ -1443,8 +1443,12 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
|
||||
|
||||
ENTRY(page_fault)
|
||||
ASM_CLAC
|
||||
pushl $0; /* %gs's slot on the stack */
|
||||
pushl $do_page_fault
|
||||
jmp common_exception_read_cr2
|
||||
END(page_fault)
|
||||
|
||||
common_exception_read_cr2:
|
||||
/* the function address is in %gs's slot on the stack */
|
||||
SAVE_ALL switch_stacks=1 skip_gs=1
|
||||
|
||||
ENCODE_FRAME_POINTER
|
||||
@@ -1452,6 +1456,7 @@ ENTRY(page_fault)
|
||||
|
||||
/* fixup %gs */
|
||||
GS_TO_REG %ecx
|
||||
movl PT_GS(%esp), %edi
|
||||
REG_TO_PTGS %ecx
|
||||
SET_KERNEL_GS %ecx
|
||||
|
||||
@@ -1463,9 +1468,9 @@ ENTRY(page_fault)
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
call do_page_fault
|
||||
CALL_NOSPEC %edi
|
||||
jmp ret_from_exception
|
||||
END(page_fault)
|
||||
END(common_exception_read_cr2)
|
||||
|
||||
common_exception:
|
||||
/* the function address is in %gs's slot on the stack */
|
||||
@@ -1595,7 +1600,7 @@ END(general_protection)
|
||||
ENTRY(async_page_fault)
|
||||
ASM_CLAC
|
||||
pushl $do_async_page_fault
|
||||
jmp common_exception
|
||||
jmp common_exception_read_cr2
|
||||
END(async_page_fault)
|
||||
#endif
|
||||
|
||||
|
@@ -519,7 +519,7 @@ ENTRY(interrupt_entry)
|
||||
testb $3, CS-ORIG_RAX+8(%rsp)
|
||||
jz 1f
|
||||
SWAPGS
|
||||
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
/*
|
||||
* Switch to the thread stack. The IRET frame and orig_ax are
|
||||
* on the stack, as well as the return address. RDI..R12 are
|
||||
@@ -549,8 +549,10 @@ ENTRY(interrupt_entry)
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
movq (%rdi), %rdi
|
||||
jmp 2f
|
||||
1:
|
||||
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
2:
|
||||
PUSH_AND_CLEAR_REGS save_ret=1
|
||||
ENCODE_FRAME_POINTER 8
|
||||
|
||||
@@ -1238,6 +1240,13 @@ ENTRY(paranoid_entry)
|
||||
*/
|
||||
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
|
||||
|
||||
/*
|
||||
* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
|
||||
* unconditional CR3 write, even in the PTI case. So do an lfence
|
||||
* to prevent GS speculation, regardless of whether PTI is enabled.
|
||||
*/
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
|
||||
ret
|
||||
END(paranoid_entry)
|
||||
|
||||
@@ -1288,6 +1297,7 @@ ENTRY(error_entry)
|
||||
* from user mode due to an IRET fault.
|
||||
*/
|
||||
SWAPGS
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
/* We have user CR3. Change to kernel CR3. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
|
||||
@@ -1301,6 +1311,8 @@ ENTRY(error_entry)
|
||||
pushq %r12
|
||||
ret
|
||||
|
||||
.Lerror_entry_done_lfence:
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
.Lerror_entry_done:
|
||||
ret
|
||||
|
||||
@@ -1318,7 +1330,7 @@ ENTRY(error_entry)
|
||||
cmpq %rax, RIP+8(%rsp)
|
||||
je .Lbstep_iret
|
||||
cmpq $.Lgs_change, RIP+8(%rsp)
|
||||
jne .Lerror_entry_done
|
||||
jne .Lerror_entry_done_lfence
|
||||
|
||||
/*
|
||||
* hack: .Lgs_change can fail with user gsbase. If this happens, fix up
|
||||
@@ -1326,6 +1338,7 @@ ENTRY(error_entry)
|
||||
* .Lgs_change's error handler with kernel gsbase.
|
||||
*/
|
||||
SWAPGS
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
jmp .Lerror_entry_done
|
||||
|
||||
@@ -1340,6 +1353,7 @@ ENTRY(error_entry)
|
||||
* gsbase and CR3. Switch to kernel gsbase and CR3:
|
||||
*/
|
||||
SWAPGS
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
|
||||
/*
|
||||
@@ -1431,6 +1445,7 @@ ENTRY(nmi)
|
||||
|
||||
swapgs
|
||||
cld
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
@@ -661,10 +661,17 @@ fail:
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
if (throttle)
|
||||
if (throttle) {
|
||||
perf_ibs_stop(event, 0);
|
||||
else
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
} else {
|
||||
period >>= 4;
|
||||
|
||||
if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
|
||||
(*config & IBS_OP_CNT_CTL))
|
||||
period |= *config & IBS_OP_CUR_CNT_RAND;
|
||||
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period);
|
||||
}
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
|
@@ -1236,7 +1236,7 @@ void x86_pmu_enable_event(struct perf_event *event)
|
||||
* Add a single event to the PMU.
|
||||
*
|
||||
* The event is added to the group of enabled events
|
||||
* but only if it can be scehduled with existing events.
|
||||
* but only if it can be scheduled with existing events.
|
||||
*/
|
||||
static int x86_pmu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
|
@@ -20,7 +20,6 @@
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
@@ -263,8 +262,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
|
||||
};
|
||||
|
||||
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
|
||||
EVENT_EXTRA_END
|
||||
@@ -3573,6 +3572,11 @@ static u64 bdw_limit_period(struct perf_event *event, u64 left)
|
||||
return left;
|
||||
}
|
||||
|
||||
static u64 nhm_limit_period(struct perf_event *event, u64 left)
|
||||
{
|
||||
return max(left, 32ULL);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
@@ -4053,7 +4057,7 @@ static bool check_msr(unsigned long msr, u64 mask)
|
||||
* Disable the check for real HW, so we don't
|
||||
* mess with potentionaly enabled registers:
|
||||
*/
|
||||
if (hypervisor_is_type(X86_HYPER_NATIVE))
|
||||
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return true;
|
||||
|
||||
/*
|
||||
@@ -4607,6 +4611,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
|
||||
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
|
||||
x86_pmu.extra_regs = intel_nehalem_extra_regs;
|
||||
x86_pmu.limit_period = nhm_limit_period;
|
||||
|
||||
mem_attr = nhm_mem_events_attrs;
|
||||
|
||||
@@ -4955,6 +4960,7 @@ __init int intel_pmu_init(void)
|
||||
|
||||
case INTEL_FAM6_SKYLAKE_X:
|
||||
pmem = true;
|
||||
/* fall through */
|
||||
case INTEL_FAM6_SKYLAKE_MOBILE:
|
||||
case INTEL_FAM6_SKYLAKE_DESKTOP:
|
||||
case INTEL_FAM6_KABYLAKE_MOBILE:
|
||||
@@ -5004,6 +5010,7 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_FAM6_ICELAKE_X:
|
||||
case INTEL_FAM6_ICELAKE_XEON_D:
|
||||
pmem = true;
|
||||
/* fall through */
|
||||
case INTEL_FAM6_ICELAKE_MOBILE:
|
||||
case INTEL_FAM6_ICELAKE_DESKTOP:
|
||||
x86_pmu.late_ack = true;
|
||||
|
@@ -851,7 +851,7 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
|
||||
|
||||
struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
|
||||
|
||||
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
|
||||
|
@@ -18,6 +18,20 @@
|
||||
* Note: efi_info is commonly left uninitialized, but that field has a
|
||||
* private magic, so it is better to leave it unchanged.
|
||||
*/
|
||||
|
||||
#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
|
||||
|
||||
#define BOOT_PARAM_PRESERVE(struct_member) \
|
||||
{ \
|
||||
.start = offsetof(struct boot_params, struct_member), \
|
||||
.len = sizeof_mbr(struct boot_params, struct_member), \
|
||||
}
|
||||
|
||||
struct boot_params_to_save {
|
||||
unsigned int start;
|
||||
unsigned int len;
|
||||
};
|
||||
|
||||
static void sanitize_boot_params(struct boot_params *boot_params)
|
||||
{
|
||||
/*
|
||||
@@ -35,21 +49,40 @@ static void sanitize_boot_params(struct boot_params *boot_params)
|
||||
* problems again.
|
||||
*/
|
||||
if (boot_params->sentinel) {
|
||||
/* fields in boot_params are left uninitialized, clear them */
|
||||
boot_params->acpi_rsdp_addr = 0;
|
||||
memset(&boot_params->ext_ramdisk_image, 0,
|
||||
(char *)&boot_params->efi_info -
|
||||
(char *)&boot_params->ext_ramdisk_image);
|
||||
memset(&boot_params->kbd_status, 0,
|
||||
(char *)&boot_params->hdr -
|
||||
(char *)&boot_params->kbd_status);
|
||||
memset(&boot_params->_pad7[0], 0,
|
||||
(char *)&boot_params->edd_mbr_sig_buffer[0] -
|
||||
(char *)&boot_params->_pad7[0]);
|
||||
memset(&boot_params->_pad8[0], 0,
|
||||
(char *)&boot_params->eddbuf[0] -
|
||||
(char *)&boot_params->_pad8[0]);
|
||||
memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
|
||||
static struct boot_params scratch;
|
||||
char *bp_base = (char *)boot_params;
|
||||
char *save_base = (char *)&scratch;
|
||||
int i;
|
||||
|
||||
const struct boot_params_to_save to_save[] = {
|
||||
BOOT_PARAM_PRESERVE(screen_info),
|
||||
BOOT_PARAM_PRESERVE(apm_bios_info),
|
||||
BOOT_PARAM_PRESERVE(tboot_addr),
|
||||
BOOT_PARAM_PRESERVE(ist_info),
|
||||
BOOT_PARAM_PRESERVE(hd0_info),
|
||||
BOOT_PARAM_PRESERVE(hd1_info),
|
||||
BOOT_PARAM_PRESERVE(sys_desc_table),
|
||||
BOOT_PARAM_PRESERVE(olpc_ofw_header),
|
||||
BOOT_PARAM_PRESERVE(efi_info),
|
||||
BOOT_PARAM_PRESERVE(alt_mem_k),
|
||||
BOOT_PARAM_PRESERVE(scratch),
|
||||
BOOT_PARAM_PRESERVE(e820_entries),
|
||||
BOOT_PARAM_PRESERVE(eddbuf_entries),
|
||||
BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
|
||||
BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
|
||||
BOOT_PARAM_PRESERVE(hdr),
|
||||
BOOT_PARAM_PRESERVE(e820_table),
|
||||
BOOT_PARAM_PRESERVE(eddbuf),
|
||||
};
|
||||
|
||||
memset(&scratch, 0, sizeof(scratch));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(to_save); i++) {
|
||||
memcpy(save_base + to_save[i].start,
|
||||
bp_base + to_save[i].start, to_save[i].len);
|
||||
}
|
||||
|
||||
memcpy(boot_params, save_base, sizeof(*boot_params));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -281,6 +281,8 @@
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
|
||||
#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
|
||||
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
|
||||
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
|
||||
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
||||
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
|
||||
@@ -394,5 +396,6 @@
|
||||
#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
|
||||
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
|
||||
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
|
||||
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@@ -16,7 +16,6 @@
|
||||
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern void mcount(void);
|
||||
extern atomic_t modifying_ftrace_code;
|
||||
extern void __fentry__(void);
|
||||
|
||||
|
@@ -11,6 +11,21 @@
|
||||
* While adding a new CPUID for a new microarchitecture, add a new
|
||||
* group to keep logically sorted out in chronological order. Within
|
||||
* that group keep the CPUID for the variants sorted by model number.
|
||||
*
|
||||
* The defined symbol names have the following form:
|
||||
* INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
|
||||
* where:
|
||||
* OPTFAMILY Describes the family of CPUs that this belongs to. Default
|
||||
* is assumed to be "_CORE" (and should be omitted). Other values
|
||||
* currently in use are _ATOM and _XEON_PHI
|
||||
* MICROARCH Is the code name for the micro-architecture for this core.
|
||||
* N.B. Not the platform name.
|
||||
* OPTDIFF If needed, a short string to differentiate by market segment.
|
||||
* Exact strings here will vary over time. _DESKTOP, _MOBILE, and
|
||||
* _X (short for Xeon server) should be used when they are
|
||||
* appropriate.
|
||||
*
|
||||
* The #define line may optionally include a comment including platform names.
|
||||
*/
|
||||
|
||||
#define INTEL_FAM6_CORE_YONAH 0x0E
|
||||
|
@@ -35,6 +35,8 @@
|
||||
#include <asm/kvm_vcpu_regs.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
|
||||
|
||||
#define KVM_MAX_VCPUS 288
|
||||
#define KVM_SOFT_MAX_VCPUS 240
|
||||
#define KVM_MAX_VCPU_ID 1023
|
||||
@@ -607,15 +609,16 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/*
|
||||
* QEMU userspace and the guest each have their own FPU state.
|
||||
* In vcpu_run, we switch between the user, maintained in the
|
||||
* task_struct struct, and guest FPU contexts. While running a VCPU,
|
||||
* the VCPU thread will have the guest FPU context.
|
||||
* In vcpu_run, we switch between the user and guest FPU contexts.
|
||||
* While running a VCPU, the VCPU thread will have the guest FPU
|
||||
* context.
|
||||
*
|
||||
* Note that while the PKRU state lives inside the fpu registers,
|
||||
* it is switched out separately at VMENTER and VMEXIT time. The
|
||||
* "guest_fpu" state here contains the guest FPU context, with the
|
||||
* host PRKU bits.
|
||||
*/
|
||||
struct fpu *user_fpu;
|
||||
struct fpu *guest_fpu;
|
||||
|
||||
u64 xcr0;
|
||||
@@ -1174,6 +1177,7 @@ struct kvm_x86_ops {
|
||||
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
|
||||
uint32_t guest_irq, bool set);
|
||||
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
|
||||
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
|
||||
|
||||
int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
|
||||
bool *expired);
|
||||
|
@@ -381,6 +381,7 @@
|
||||
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
|
||||
#define MSR_AMD64_TSC_RATIO 0xc0000104
|
||||
#define MSR_AMD64_NB_CFG 0xc001001f
|
||||
#define MSR_AMD64_CPUID_FN_1 0xc0011004
|
||||
#define MSR_AMD64_PATCH_LOADER 0xc0010020
|
||||
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
|
||||
#define MSR_AMD64_OSVW_STATUS 0xc0010141
|
||||
|
@@ -192,7 +192,7 @@
|
||||
" lfence;\n" \
|
||||
" jmp 902b;\n" \
|
||||
" .align 16\n" \
|
||||
"903: addl $4, %%esp;\n" \
|
||||
"903: lea 4(%%esp), %%esp;\n" \
|
||||
" pushl %[thunk_target];\n" \
|
||||
" ret;\n" \
|
||||
" .align 16\n" \
|
||||
|
@@ -252,16 +252,20 @@ struct pebs_lbr {
|
||||
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
|
||||
#define IBSCTL_LVT_OFFSET_MASK 0x0F
|
||||
|
||||
/* ibs fetch bits/masks */
|
||||
/* IBS fetch bits/masks */
|
||||
#define IBS_FETCH_RAND_EN (1ULL<<57)
|
||||
#define IBS_FETCH_VAL (1ULL<<49)
|
||||
#define IBS_FETCH_ENABLE (1ULL<<48)
|
||||
#define IBS_FETCH_CNT 0xFFFF0000ULL
|
||||
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
|
||||
|
||||
/* ibs op bits/masks */
|
||||
/* lower 4 bits of the current count are ignored: */
|
||||
#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
|
||||
/*
|
||||
* IBS op bits/masks
|
||||
* The lower 7 bits of the current count are random bits
|
||||
* preloaded by hardware and ignored in software
|
||||
*/
|
||||
#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
|
||||
#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
|
||||
#define IBS_OP_CNT_CTL (1ULL<<19)
|
||||
#define IBS_OP_VAL (1ULL<<18)
|
||||
#define IBS_OP_ENABLE (1ULL<<17)
|
||||
|
@@ -96,6 +96,8 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
||||
|
||||
#else
|
||||
|
||||
#define VDSO_HAS_32BIT_FALLBACK 1
|
||||
|
||||
static __always_inline
|
||||
long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
||||
{
|
||||
@@ -113,6 +115,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm (
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %[clock], %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret), "=m" (*_ts)
|
||||
: "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
|
||||
: "edx");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
|
||||
struct timezone *_tz)
|
||||
@@ -148,6 +167,23 @@ clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm (
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %[clock], %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret), "=m" (*_ts)
|
||||
: "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
|
||||
: "edx");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _ASM_X86_BYTEORDER_H
|
||||
#define _ASM_X86_BYTEORDER_H
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _ASM_X86_HWCAP2_H
|
||||
#define _ASM_X86_HWCAP2_H
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _ASM_X86_SIGCONTEXT32_H
|
||||
#define _ASM_X86_SIGCONTEXT32_H
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _ASM_X86_TYPES_H
|
||||
#define _ASM_X86_TYPES_H
|
||||
|
||||
|
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
|
||||
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
|
||||
|
||||
/*
|
||||
* Temporary interrupt handler.
|
||||
* Temporary interrupt handler and polled calibration function.
|
||||
*/
|
||||
static void __init lapic_cal_handler(struct clock_event_device *dev)
|
||||
{
|
||||
@@ -851,7 +851,8 @@ bool __init apic_needs_pit(void)
|
||||
static int __init calibrate_APIC_clock(void)
|
||||
{
|
||||
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
|
||||
void (*real_handler)(struct clock_event_device *dev);
|
||||
u64 tsc_perj = 0, tsc_start = 0;
|
||||
unsigned long jif_start;
|
||||
unsigned long deltaj;
|
||||
long delta, deltatsc;
|
||||
int pm_referenced = 0;
|
||||
@@ -878,29 +879,65 @@ static int __init calibrate_APIC_clock(void)
|
||||
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
|
||||
"calibrating APIC timer ...\n");
|
||||
|
||||
/*
|
||||
* There are platforms w/o global clockevent devices. Instead of
|
||||
* making the calibration conditional on that, use a polling based
|
||||
* approach everywhere.
|
||||
*/
|
||||
local_irq_disable();
|
||||
|
||||
/* Replace the global interrupt handler */
|
||||
real_handler = global_clock_event->event_handler;
|
||||
global_clock_event->event_handler = lapic_cal_handler;
|
||||
|
||||
/*
|
||||
* Setup the APIC counter to maximum. There is no way the lapic
|
||||
* can underflow in the 100ms detection time frame
|
||||
*/
|
||||
__setup_APIC_LVTT(0xffffffff, 0, 0);
|
||||
|
||||
/* Let the interrupts run */
|
||||
/*
|
||||
* Methods to terminate the calibration loop:
|
||||
* 1) Global clockevent if available (jiffies)
|
||||
* 2) TSC if available and frequency is known
|
||||
*/
|
||||
jif_start = READ_ONCE(jiffies);
|
||||
|
||||
if (tsc_khz) {
|
||||
tsc_start = rdtsc();
|
||||
tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable interrupts so the tick can fire, if a global
|
||||
* clockevent device is available
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
|
||||
cpu_relax();
|
||||
while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
|
||||
/* Wait for a tick to elapse */
|
||||
while (1) {
|
||||
if (tsc_khz) {
|
||||
u64 tsc_now = rdtsc();
|
||||
if ((tsc_now - tsc_start) >= tsc_perj) {
|
||||
tsc_start += tsc_perj;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
unsigned long jif_now = READ_ONCE(jiffies);
|
||||
|
||||
if (time_after(jif_now, jif_start)) {
|
||||
jif_start = jif_now;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Invoke the calibration routine */
|
||||
local_irq_disable();
|
||||
lapic_cal_handler(NULL);
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/* Restore the real event handler */
|
||||
global_clock_event->event_handler = real_handler;
|
||||
|
||||
/* Build delta t1-t2 as apic timer counts down */
|
||||
delta = lapic_cal_t1 - lapic_cal_t2;
|
||||
apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
|
||||
@@ -943,10 +980,11 @@ static int __init calibrate_APIC_clock(void)
|
||||
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
|
||||
|
||||
/*
|
||||
* PM timer calibration failed or not turned on
|
||||
* so lets try APIC timer based calibration
|
||||
* PM timer calibration failed or not turned on so lets try APIC
|
||||
* timer based calibration, if a global clockevent device is
|
||||
* available.
|
||||
*/
|
||||
if (!pm_referenced) {
|
||||
if (!pm_referenced && global_clock_event) {
|
||||
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
|
||||
|
||||
/*
|
||||
@@ -1141,6 +1179,10 @@ void clear_local_APIC(void)
|
||||
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
||||
v = apic_read(APIC_LVT1);
|
||||
apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
|
||||
if (!x2apic_enabled()) {
|
||||
v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
|
||||
apic_write(APIC_LDR, v);
|
||||
}
|
||||
if (maxlvt >= 4) {
|
||||
v = apic_read(APIC_LVTPC);
|
||||
apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
|
||||
|
@@ -38,32 +38,12 @@ static int bigsmp_early_logical_apicid(int cpu)
|
||||
return early_per_cpu(x86_cpu_to_apicid, cpu);
|
||||
}
|
||||
|
||||
static inline unsigned long calculate_ldr(int cpu)
|
||||
{
|
||||
unsigned long val, id;
|
||||
|
||||
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
|
||||
id = per_cpu(x86_bios_cpu_apicid, cpu);
|
||||
val |= SET_APIC_LOGICAL_ID(id);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the logical destination ID.
|
||||
*
|
||||
* Intel recommends to set DFR, LDR and TPR before enabling
|
||||
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
|
||||
* document number 292116). So here it goes...
|
||||
* bigsmp enables physical destination mode
|
||||
* and doesn't use LDR and DFR
|
||||
*/
|
||||
static void bigsmp_init_apic_ldr(void)
|
||||
{
|
||||
unsigned long val;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
apic_write(APIC_DFR, APIC_DFR_FLAT);
|
||||
val = calculate_ldr(cpu);
|
||||
apic_write(APIC_LDR, val);
|
||||
}
|
||||
|
||||
static void bigsmp_setup_apic_routing(void)
|
||||
|
@@ -2438,7 +2438,13 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
|
||||
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
|
||||
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
|
||||
*/
|
||||
return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
|
||||
if (!ioapic_initialized)
|
||||
return gsi_top;
|
||||
/*
|
||||
* For DT enabled machines ioapic_dynirq_base is irrelevant and not
|
||||
* updated. So simply return @from if ioapic_dynirq_base == 0.
|
||||
*/
|
||||
return ioapic_dynirq_base ? : from;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void)
|
||||
def_to_bigsmp = 0;
|
||||
break;
|
||||
}
|
||||
/* If P4 and above fall through */
|
||||
/* P4 and above */
|
||||
/* fall through */
|
||||
case X86_VENDOR_HYGON:
|
||||
case X86_VENDOR_AMD:
|
||||
def_to_bigsmp = 1;
|
||||
|
@@ -804,6 +804,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c)
|
||||
msr_set_bit(MSR_AMD64_DE_CFG, 31);
|
||||
}
|
||||
|
||||
static bool rdrand_force;
|
||||
|
||||
static int __init rdrand_cmdline(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(str, "force"))
|
||||
rdrand_force = true;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("rdrand", rdrand_cmdline);
|
||||
|
||||
static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* Saving of the MSR used to hide the RDRAND support during
|
||||
* suspend/resume is done by arch/x86/power/cpu.c, which is
|
||||
* dependent on CONFIG_PM_SLEEP.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_PM_SLEEP))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The nordrand option can clear X86_FEATURE_RDRAND, so check for
|
||||
* RDRAND support using the CPUID function directly.
|
||||
*/
|
||||
if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force)
|
||||
return;
|
||||
|
||||
msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62);
|
||||
|
||||
/*
|
||||
* Verify that the CPUID change has occurred in case the kernel is
|
||||
* running virtualized and the hypervisor doesn't support the MSR.
|
||||
*/
|
||||
if (cpuid_ecx(1) & BIT(30)) {
|
||||
pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
clear_cpu_cap(c, X86_FEATURE_RDRAND);
|
||||
pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n");
|
||||
}
|
||||
|
||||
static void init_amd_jg(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* Some BIOS implementations do not restore proper RDRAND support
|
||||
* across suspend and resume. Check on whether to hide the RDRAND
|
||||
* instruction support via CPUID.
|
||||
*/
|
||||
clear_rdrand_cpuid_bit(c);
|
||||
}
|
||||
|
||||
static void init_amd_bd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 value;
|
||||
@@ -818,6 +876,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
|
||||
wrmsrl_safe(MSR_F15H_IC_CFG, value);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Some BIOS implementations do not restore proper RDRAND support
|
||||
* across suspend and resume. Check on whether to hide the RDRAND
|
||||
* instruction support via CPUID.
|
||||
*/
|
||||
clear_rdrand_cpuid_bit(c);
|
||||
}
|
||||
|
||||
static void init_amd_zn(struct cpuinfo_x86 *c)
|
||||
@@ -860,6 +925,7 @@ static void init_amd(struct cpuinfo_x86 *c)
|
||||
case 0x10: init_amd_gh(c); break;
|
||||
case 0x12: init_amd_ln(c); break;
|
||||
case 0x15: init_amd_bd(c); break;
|
||||
case 0x16: init_amd_jg(c); break;
|
||||
case 0x17: init_amd_zn(c); break;
|
||||
}
|
||||
|
||||
|
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
static void __init spectre_v1_select_mitigation(void);
|
||||
static void __init spectre_v2_select_mitigation(void);
|
||||
static void __init ssb_select_mitigation(void);
|
||||
static void __init l1tf_select_mitigation(void);
|
||||
@@ -98,17 +99,11 @@ void __init check_bugs(void)
|
||||
if (boot_cpu_has(X86_FEATURE_STIBP))
|
||||
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
|
||||
|
||||
/* Select the proper spectre mitigation before patching alternatives */
|
||||
/* Select the proper CPU mitigations before patching alternatives: */
|
||||
spectre_v1_select_mitigation();
|
||||
spectre_v2_select_mitigation();
|
||||
|
||||
/*
|
||||
* Select proper mitigation for any exposure to the Speculative Store
|
||||
* Bypass vulnerability.
|
||||
*/
|
||||
ssb_select_mitigation();
|
||||
|
||||
l1tf_select_mitigation();
|
||||
|
||||
mds_select_mitigation();
|
||||
|
||||
arch_smt_update();
|
||||
@@ -273,6 +268,98 @@ static int __init mds_cmdline(char *str)
|
||||
}
|
||||
early_param("mds", mds_cmdline);
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Spectre V1 : " fmt
|
||||
|
||||
enum spectre_v1_mitigation {
|
||||
SPECTRE_V1_MITIGATION_NONE,
|
||||
SPECTRE_V1_MITIGATION_AUTO,
|
||||
};
|
||||
|
||||
static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
|
||||
SPECTRE_V1_MITIGATION_AUTO;
|
||||
|
||||
static const char * const spectre_v1_strings[] = {
|
||||
[SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
|
||||
[SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
|
||||
};
|
||||
|
||||
/*
|
||||
* Does SMAP provide full mitigation against speculative kernel access to
|
||||
* userspace?
|
||||
*/
|
||||
static bool smap_works_speculatively(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_SMAP))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* On CPUs which are vulnerable to Meltdown, SMAP does not
|
||||
* prevent speculative access to user data in the L1 cache.
|
||||
* Consider SMAP to be non-functional as a mitigation on these
|
||||
* CPUs.
|
||||
*/
|
||||
if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __init spectre_v1_select_mitigation(void)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
|
||||
spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
|
||||
/*
|
||||
* With Spectre v1, a user can speculatively control either
|
||||
* path of a conditional swapgs with a user-controlled GS
|
||||
* value. The mitigation is to add lfences to both code paths.
|
||||
*
|
||||
* If FSGSBASE is enabled, the user can put a kernel address in
|
||||
* GS, in which case SMAP provides no protection.
|
||||
*
|
||||
* [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
|
||||
* FSGSBASE enablement patches have been merged. ]
|
||||
*
|
||||
* If FSGSBASE is disabled, the user can only put a user space
|
||||
* address in GS. That makes an attack harder, but still
|
||||
* possible if there's no SMAP protection.
|
||||
*/
|
||||
if (!smap_works_speculatively()) {
|
||||
/*
|
||||
* Mitigation can be provided from SWAPGS itself or
|
||||
* PTI as the CR3 write in the Meltdown mitigation
|
||||
* is serializing.
|
||||
*
|
||||
* If neither is there, mitigate with an LFENCE to
|
||||
* stop speculation through swapgs.
|
||||
*/
|
||||
if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
|
||||
!boot_cpu_has(X86_FEATURE_PTI))
|
||||
setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
|
||||
|
||||
/*
|
||||
* Enable lfences in the kernel entry (non-swapgs)
|
||||
* paths, to prevent user entry from speculatively
|
||||
* skipping swapgs.
|
||||
*/
|
||||
setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
|
||||
}
|
||||
|
||||
static int __init nospectre_v1_cmdline(char *str)
|
||||
{
|
||||
spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
|
||||
return 0;
|
||||
}
|
||||
early_param("nospectre_v1", nospectre_v1_cmdline);
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Spectre V2 : " fmt
|
||||
|
||||
@@ -1226,7 +1313,7 @@ static ssize_t l1tf_show_state(char *buf)
|
||||
|
||||
static ssize_t mds_show_state(char *buf)
|
||||
{
|
||||
if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
|
||||
return sprintf(buf, "%s; SMT Host state unknown\n",
|
||||
mds_strings[mds_mitigation]);
|
||||
}
|
||||
@@ -1290,7 +1377,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
||||
break;
|
||||
|
||||
case X86_BUG_SPECTRE_V1:
|
||||
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
|
||||
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
|
||||
|
||||
case X86_BUG_SPECTRE_V2:
|
||||
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
||||
|
@@ -1022,6 +1022,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
|
||||
#define NO_L1TF BIT(3)
|
||||
#define NO_MDS BIT(4)
|
||||
#define MSBDS_ONLY BIT(5)
|
||||
#define NO_SWAPGS BIT(6)
|
||||
|
||||
#define VULNWL(_vendor, _family, _model, _whitelist) \
|
||||
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
|
||||
@@ -1048,30 +1049,38 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
||||
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
|
||||
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
|
||||
|
||||
VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
|
||||
VULNWL_INTEL(CORE_YONAH, NO_SSB),
|
||||
|
||||
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
|
||||
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
|
||||
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
|
||||
/*
|
||||
* Technically, swapgs isn't serializing on AMD (despite it previously
|
||||
* being documented as such in the APM). But according to AMD, %gs is
|
||||
* updated non-speculatively, and the issuing of %gs-relative memory
|
||||
* operands will be blocked until the %gs update completes, which is
|
||||
* good enough for our purposes.
|
||||
*/
|
||||
|
||||
/* AMD Family 0xf - 0x12 */
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
|
||||
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
|
||||
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
|
||||
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
|
||||
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
|
||||
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
|
||||
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
|
||||
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -1108,6 +1117,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
|
||||
}
|
||||
|
||||
if (!cpu_matches(NO_SWAPGS))
|
||||
setup_force_cpu_bug(X86_BUG_SWAPGS);
|
||||
|
||||
if (cpu_matches(NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
|
@@ -98,6 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
|
||||
case 7:
|
||||
if (size < 0x40)
|
||||
break;
|
||||
/* Else, fall through */
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
|
@@ -17,6 +17,12 @@
|
||||
*/
|
||||
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
|
||||
|
||||
/*
|
||||
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
|
||||
* hardware or BIOS before kernel boot.
|
||||
*/
|
||||
static u32 orig_umwait_control_cached __ro_after_init;
|
||||
|
||||
/*
|
||||
* Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
|
||||
* the sysfs write functions.
|
||||
@@ -52,6 +58,23 @@ static int umwait_cpu_online(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU hotplug callback sets the control MSR to the original control
|
||||
* value.
|
||||
*/
|
||||
static int umwait_cpu_offline(unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* This code is protected by the CPU hotplug already and
|
||||
* orig_umwait_control_cached is never changed after it caches
|
||||
* the original control MSR value in umwait_init(). So there
|
||||
* is no race condition here.
|
||||
*/
|
||||
wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
|
||||
* is the only active CPU at this time. The MSR is set up on the APs via the
|
||||
@@ -185,8 +208,22 @@ static int __init umwait_init(void)
|
||||
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* Cache the original control MSR value before the control MSR is
|
||||
* changed. This is the only place where orig_umwait_control_cached
|
||||
* is modified.
|
||||
*/
|
||||
rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
|
||||
umwait_cpu_online, NULL);
|
||||
umwait_cpu_online, umwait_cpu_offline);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* On failure, the control MSR on all CPUs has the
|
||||
* original control value.
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
|
||||
register_syscore_ops(&umwait_syscore_ops);
|
||||
|
||||
|
@@ -193,10 +193,10 @@ ENTRY(secondary_startup_64)
|
||||
|
||||
/* Set up %gs.
|
||||
*
|
||||
* The base of %gs always points to the bottom of the irqstack
|
||||
* union. If the stack protector canary is enabled, it is
|
||||
* located at %gs:40. Note that, on SMP, the boot cpu uses
|
||||
* init data section till per cpu areas are set up.
|
||||
* The base of %gs always points to fixed_percpu_data. If the
|
||||
* stack protector canary is enabled, it is located at %gs:40.
|
||||
* Note that, on SMP, the boot cpu uses init data section until
|
||||
* the per cpu areas are set up.
|
||||
*/
|
||||
movl $MSR_GS_BASE,%ecx
|
||||
movl initial_gs(%rip),%eax
|
||||
|
@@ -827,10 +827,6 @@ int __init hpet_enable(void)
|
||||
if (!hpet_cfg_working())
|
||||
goto out_nohpet;
|
||||
|
||||
/* Validate that the counter is counting */
|
||||
if (!hpet_counting())
|
||||
goto out_nohpet;
|
||||
|
||||
/*
|
||||
* Read the period and check for a sane value:
|
||||
*/
|
||||
@@ -896,6 +892,14 @@ int __init hpet_enable(void)
|
||||
}
|
||||
hpet_print_config();
|
||||
|
||||
/*
|
||||
* Validate that the counter is counting. This needs to be done
|
||||
* after sanitizing the config registers to properly deal with
|
||||
* force enabled HPETs.
|
||||
*/
|
||||
if (!hpet_counting())
|
||||
goto out_nohpet;
|
||||
|
||||
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
|
||||
|
||||
if (id & HPET_ID_LEGSUP) {
|
||||
|
@@ -308,9 +308,6 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
|
||||
|
||||
static void kvm_guest_cpu_init(void)
|
||||
{
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
|
||||
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
|
||||
|
||||
@@ -625,9 +622,6 @@ static void __init kvm_guest_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
|
||||
paravirt_ops_setup();
|
||||
register_reboot_notifier(&kvm_pv_reboot_nb);
|
||||
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
|
||||
@@ -848,8 +842,6 @@ asm(
|
||||
*/
|
||||
void __init kvm_spinlock_init(void)
|
||||
{
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
|
||||
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
|
||||
return;
|
||||
|
@@ -201,6 +201,7 @@ static int set_segment_reg(struct task_struct *task,
|
||||
case offsetof(struct user_regs_struct, ss):
|
||||
if (unlikely(value == 0))
|
||||
return -EIO;
|
||||
/* Else, fall through */
|
||||
|
||||
default:
|
||||
*pt_regs_access(task_pt_regs(task), offset) = value;
|
||||
|
@@ -100,7 +100,7 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!access_ok(fp, sizeof(*frame)))
|
||||
if (__range_not_ok(fp, sizeof(*frame), TASK_SIZE))
|
||||
return 0;
|
||||
|
||||
ret = 1;
|
||||
|
@@ -230,9 +230,55 @@ static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
|
||||
{},
|
||||
};
|
||||
|
||||
/*
|
||||
* Some devices have a portrait LCD but advertise a landscape resolution (and
|
||||
* pitch). We simply swap width and height for these devices so that we can
|
||||
* correctly deal with some of them coming with multiple resolutions.
|
||||
*/
|
||||
static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
|
||||
{
|
||||
/*
|
||||
* Lenovo MIIX310-10ICR, only some batches have the troublesome
|
||||
* 800x1280 portrait screen. Luckily the portrait version has
|
||||
* its own BIOS version, so we match on that.
|
||||
*/
|
||||
.matches = {
|
||||
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
|
||||
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
|
||||
DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1HCN44WW"),
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Lenovo MIIX 320-10ICR with 800x1280 portrait screen */
|
||||
.matches = {
|
||||
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
|
||||
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
|
||||
"Lenovo MIIX 320-10ICR"),
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Lenovo D330 with 800x1280 or 1200x1920 portrait screen */
|
||||
.matches = {
|
||||
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
|
||||
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
|
||||
"Lenovo ideapad D330-10IGM"),
|
||||
},
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
__init void sysfb_apply_efi_quirks(void)
|
||||
{
|
||||
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
|
||||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
|
||||
dmi_check_system(efifb_dmi_system_table);
|
||||
|
||||
if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
|
||||
dmi_check_system(efifb_dmi_swap_width_height)) {
|
||||
u16 temp = screen_info.lfb_width;
|
||||
|
||||
screen_info.lfb_width = screen_info.lfb_height;
|
||||
screen_info.lfb_height = temp;
|
||||
screen_info.lfb_linelength = 4 * screen_info.lfb_width;
|
||||
}
|
||||
}
|
||||
|
@@ -508,9 +508,12 @@ struct uprobe_xol_ops {
|
||||
void (*abort)(struct arch_uprobe *, struct pt_regs *);
|
||||
};
|
||||
|
||||
static inline int sizeof_long(void)
|
||||
static inline int sizeof_long(struct pt_regs *regs)
|
||||
{
|
||||
return in_ia32_syscall() ? 4 : 8;
|
||||
/*
|
||||
* Check registers for mode as in_xxx_syscall() does not apply here.
|
||||
*/
|
||||
return user_64bit_mode(regs) ? 8 : 4;
|
||||
}
|
||||
|
||||
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
@@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
|
||||
static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
|
||||
{
|
||||
unsigned long new_sp = regs->sp - sizeof_long();
|
||||
unsigned long new_sp = regs->sp - sizeof_long(regs);
|
||||
|
||||
if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
|
||||
if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs)))
|
||||
return -EFAULT;
|
||||
|
||||
regs->sp = new_sp;
|
||||
@@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
|
||||
long correction = utask->vaddr - utask->xol_vaddr;
|
||||
regs->ip += correction;
|
||||
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
|
||||
regs->sp += sizeof_long(); /* Pop incorrect return address */
|
||||
regs->sp += sizeof_long(regs); /* Pop incorrect return address */
|
||||
if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
|
||||
return -ERESTART;
|
||||
}
|
||||
@@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
* "call" insn was executed out-of-line. Just restore ->sp and restart.
|
||||
* We could also restore ->ip and try to call branch_emulate_op() again.
|
||||
*/
|
||||
regs->sp += sizeof_long();
|
||||
regs->sp += sizeof_long(regs);
|
||||
return -ERESTART;
|
||||
}
|
||||
|
||||
@@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
unsigned long
|
||||
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
|
||||
{
|
||||
int rasize = sizeof_long(), nleft;
|
||||
int rasize = sizeof_long(regs), nleft;
|
||||
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
|
||||
|
||||
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
|
||||
|
@@ -8,11 +8,6 @@
|
||||
#include <linux/debugfs.h>
|
||||
#include "lapic.h"
|
||||
|
||||
bool kvm_arch_has_vcpu_debugfs(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static int vcpu_get_timer_advance_ns(void *data, u64 *val)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
|
||||
@@ -48,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
|
||||
|
||||
int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct dentry *ret;
|
||||
debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu,
|
||||
&vcpu_tsc_offset_fops);
|
||||
|
||||
ret = debugfs_create_file("tsc-offset", 0444,
|
||||
vcpu->debugfs_dentry,
|
||||
vcpu, &vcpu_tsc_offset_fops);
|
||||
if (!ret)
|
||||
return -ENOMEM;
|
||||
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
|
||||
vcpu->debugfs_dentry,
|
||||
vcpu, &vcpu_timer_advance_ns_fops);
|
||||
if (!ret)
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (lapic_in_kernel(vcpu))
|
||||
debugfs_create_file("lapic_timer_advance_ns", 0444,
|
||||
vcpu->debugfs_dentry, vcpu,
|
||||
&vcpu_timer_advance_ns_fops);
|
||||
|
||||
if (kvm_has_tsc_control) {
|
||||
ret = debugfs_create_file("tsc-scaling-ratio", 0444,
|
||||
vcpu->debugfs_dentry,
|
||||
vcpu, &vcpu_tsc_scaling_fops);
|
||||
if (!ret)
|
||||
return -ENOMEM;
|
||||
ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
|
||||
vcpu->debugfs_dentry,
|
||||
vcpu, &vcpu_tsc_scaling_frac_fops);
|
||||
if (!ret)
|
||||
return -ENOMEM;
|
||||
|
||||
debugfs_create_file("tsc-scaling-ratio", 0444,
|
||||
vcpu->debugfs_dentry, vcpu,
|
||||
&vcpu_tsc_scaling_fops);
|
||||
debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
|
||||
vcpu->debugfs_dentry, vcpu,
|
||||
&vcpu_tsc_scaling_frac_fops);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1781,7 +1781,7 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
|
||||
int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries)
|
||||
{
|
||||
uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
|
||||
uint16_t evmcs_ver = 0;
|
||||
struct kvm_cpuid_entry2 cpuid_entries[] = {
|
||||
{ .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
|
||||
{ .function = HYPERV_CPUID_INTERFACE },
|
||||
@@ -1793,6 +1793,9 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
};
|
||||
int i, nent = ARRAY_SIZE(cpuid_entries);
|
||||
|
||||
if (kvm_x86_ops->nested_get_evmcs_version)
|
||||
evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
|
||||
|
||||
/* Skip NESTED_FEATURES if eVMCS is not supported */
|
||||
if (!evmcs_ver)
|
||||
--nent;
|
||||
|
@@ -216,6 +216,9 @@ static void recalculate_apic_map(struct kvm *kvm)
|
||||
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
|
||||
new->phys_map[xapic_id] = apic;
|
||||
|
||||
if (!kvm_apic_sw_enabled(apic))
|
||||
continue;
|
||||
|
||||
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
|
||||
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
@@ -258,6 +261,8 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
|
||||
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||
else
|
||||
static_key_slow_inc(&apic_sw_disabled.key);
|
||||
|
||||
recalculate_apic_map(apic->vcpu->kvm);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1548,7 +1553,6 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
|
||||
static void apic_timer_expired(struct kvm_lapic *apic)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
struct swait_queue_head *q = &vcpu->wq;
|
||||
struct kvm_timer *ktimer = &apic->lapic_timer;
|
||||
|
||||
if (atomic_read(&apic->lapic_timer.pending))
|
||||
@@ -1566,13 +1570,6 @@ static void apic_timer_expired(struct kvm_lapic *apic)
|
||||
|
||||
atomic_inc(&apic->lapic_timer.pending);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
/*
|
||||
* For x86, the atomic_inc() is serialized, thus
|
||||
* using swait_active() is safe.
|
||||
*/
|
||||
if (swait_active(q))
|
||||
swake_up_one(q);
|
||||
}
|
||||
|
||||
static void start_sw_tscdeadline(struct kvm_lapic *apic)
|
||||
|
@@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
|
||||
/*
|
||||
* Currently, fast page fault only works for direct mapping
|
||||
* since the gfn is not stable for indirect shadow page. See
|
||||
* Documentation/virtual/kvm/locking.txt to get more detail.
|
||||
* Documentation/virt/kvm/locking.txt to get more detail.
|
||||
*/
|
||||
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
|
||||
iterator.sptep, spte,
|
||||
@@ -5653,38 +5653,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
LIST_HEAD(invalid_list);
|
||||
unsigned long i;
|
||||
bool flush;
|
||||
gfn_t gfn;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (list_empty(&kvm->arch.active_mmu_pages))
|
||||
goto out_unlock;
|
||||
|
||||
flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false);
|
||||
|
||||
for (i = 0; i < slot->npages; i++) {
|
||||
gfn = slot->base_gfn + i;
|
||||
|
||||
for_each_valid_sp(kvm, sp, gfn) {
|
||||
if (sp->gfn != gfn)
|
||||
continue;
|
||||
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
}
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
flush = false;
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_mmu_zap_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_init_vm(struct kvm *kvm)
|
||||
|
@@ -1714,7 +1714,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||
if (!entry)
|
||||
return -EINVAL;
|
||||
|
||||
new_entry = READ_ONCE(*entry);
|
||||
new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
|
||||
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
|
||||
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
|
||||
@@ -2143,12 +2142,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
goto out;
|
||||
}
|
||||
|
||||
svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!svm->vcpu.arch.user_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
|
||||
err = -ENOMEM;
|
||||
goto free_partial_svm;
|
||||
}
|
||||
|
||||
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!svm->vcpu.arch.guest_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
|
||||
err = -ENOMEM;
|
||||
goto free_partial_svm;
|
||||
goto free_user_fpu;
|
||||
}
|
||||
|
||||
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
|
||||
@@ -2211,6 +2218,8 @@ uninit:
|
||||
kvm_vcpu_uninit(&svm->vcpu);
|
||||
free_svm:
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
|
||||
free_user_fpu:
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
|
||||
free_partial_svm:
|
||||
kmem_cache_free(kvm_vcpu_cache, svm);
|
||||
out:
|
||||
@@ -2241,6 +2250,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
__free_page(virt_to_page(svm->nested.hsave));
|
||||
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, svm);
|
||||
}
|
||||
@@ -5179,6 +5189,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
|
||||
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -7113,12 +7128,6 @@ failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Not supported */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version)
|
||||
{
|
||||
@@ -7303,6 +7312,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
|
||||
.pmu_ops = &amd_pmu_ops,
|
||||
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
||||
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
|
||||
.update_pi_irte = svm_update_pi_irte,
|
||||
.setup_mce = svm_setup_mce,
|
||||
|
||||
@@ -7316,7 +7326,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.mem_enc_unreg_region = svm_unregister_enc_region,
|
||||
|
||||
.nested_enable_evmcs = nested_enable_evmcs,
|
||||
.nested_get_evmcs_version = nested_get_evmcs_version,
|
||||
.nested_get_evmcs_version = NULL,
|
||||
|
||||
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
|
||||
};
|
||||
|
@@ -220,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
|
||||
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
|
||||
return;
|
||||
|
||||
kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
|
||||
|
||||
vmx->nested.vmxon = false;
|
||||
vmx->nested.smm.vmxon = false;
|
||||
free_vpid(vmx->nested.vpid02);
|
||||
@@ -232,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
|
||||
vmx->vmcs01.shadow_vmcs = NULL;
|
||||
}
|
||||
kfree(vmx->nested.cached_vmcs12);
|
||||
vmx->nested.cached_vmcs12 = NULL;
|
||||
kfree(vmx->nested.cached_shadow_vmcs12);
|
||||
vmx->nested.cached_shadow_vmcs12 = NULL;
|
||||
/* Unpin physical memory we referred to in the vmcs02 */
|
||||
if (vmx->nested.apic_access_page) {
|
||||
kvm_release_page_dirty(vmx->nested.apic_access_page);
|
||||
|
@@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
|
||||
return max_irr;
|
||||
}
|
||||
|
||||
static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return pi_test_on(vcpu_to_pi_desc(vcpu));
|
||||
}
|
||||
|
||||
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
@@ -6598,6 +6603,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
free_loaded_vmcs(vmx->loaded_vmcs);
|
||||
kfree(vmx->guest_msrs);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vmx);
|
||||
}
|
||||
@@ -6613,12 +6619,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
if (!vmx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->vcpu.arch.user_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
|
||||
err = -ENOMEM;
|
||||
goto free_partial_vcpu;
|
||||
}
|
||||
|
||||
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->vcpu.arch.guest_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
|
||||
err = -ENOMEM;
|
||||
goto free_partial_vcpu;
|
||||
goto free_user_fpu;
|
||||
}
|
||||
|
||||
vmx->vpid = allocate_vpid();
|
||||
@@ -6721,6 +6735,8 @@ uninit_vcpu:
|
||||
free_vcpu:
|
||||
free_vpid(vmx->vpid);
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
|
||||
free_user_fpu:
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
|
||||
free_partial_vcpu:
|
||||
kmem_cache_free(kvm_vcpu_cache, vmx);
|
||||
return ERR_PTR(err);
|
||||
@@ -7715,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
|
||||
.sync_pir_to_irr = vmx_sync_pir_to_irr,
|
||||
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
|
||||
.dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
|
||||
|
||||
.set_tss_addr = vmx_set_tss_addr,
|
||||
.set_identity_map_addr = vmx_set_identity_map_addr,
|
||||
@@ -7780,6 +7797,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.set_nested_state = NULL,
|
||||
.get_vmcs12_pages = NULL,
|
||||
.nested_enable_evmcs = NULL,
|
||||
.nested_get_evmcs_version = NULL,
|
||||
.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
|
||||
};
|
||||
|
||||
|
@@ -3306,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
kvm_x86_ops->vcpu_load(vcpu, cpu);
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
if (test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_return();
|
||||
|
||||
/* Apply any externally detected TSC adjustments (due to suspend) */
|
||||
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
|
||||
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
|
||||
@@ -6590,12 +6594,13 @@ restart:
|
||||
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
toggle_interruptibility(vcpu, ctxt->interruptibility);
|
||||
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
|
||||
kvm_rip_write(vcpu, ctxt->eip);
|
||||
if (r == EMULATE_DONE && ctxt->tf)
|
||||
kvm_vcpu_do_singlestep(vcpu, &r);
|
||||
if (!ctxt->have_exception ||
|
||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
|
||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
|
||||
kvm_rip_write(vcpu, ctxt->eip);
|
||||
if (r == EMULATE_DONE && ctxt->tf)
|
||||
kvm_vcpu_do_singlestep(vcpu, &r);
|
||||
__kvm_set_rflags(vcpu, ctxt->eflags);
|
||||
}
|
||||
|
||||
/*
|
||||
* For STI, interrupts are shadowed; so KVM_REQ_EVENT will
|
||||
@@ -7202,7 +7207,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (target)
|
||||
if (target && READ_ONCE(target->ready))
|
||||
kvm_vcpu_yield_to(target);
|
||||
}
|
||||
|
||||
@@ -7242,6 +7247,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
case KVM_HC_KICK_CPU:
|
||||
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
|
||||
kvm_sched_yield(vcpu->kvm, a1);
|
||||
ret = 0;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -7990,9 +7996,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
trace_kvm_entry(vcpu->vcpu_id);
|
||||
guest_enter_irqoff();
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
if (test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_return();
|
||||
/* The preempt notifier should have taken care of the FPU already. */
|
||||
WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
|
||||
|
||||
if (unlikely(vcpu->arch.switch_db_regs)) {
|
||||
set_debugreg(0, 7);
|
||||
@@ -8270,7 +8275,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
fpregs_lock();
|
||||
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
|
||||
/* PKRU is separately restored in kvm_x86_ops->run. */
|
||||
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
|
||||
~XFEATURE_MASK_PKRU);
|
||||
@@ -8287,7 +8292,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
fpregs_lock();
|
||||
|
||||
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
|
||||
copy_kernel_to_fpregs(¤t->thread.fpu.state);
|
||||
copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
|
||||
|
||||
fpregs_mark_activate();
|
||||
fpregs_unlock();
|
||||
@@ -9694,6 +9699,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
|
||||
}
|
||||
|
||||
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
|
||||
return true;
|
||||
|
||||
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
|
||||
kvm_test_request(KVM_REQ_SMI, vcpu) ||
|
||||
kvm_test_request(KVM_REQ_EVENT, vcpu))
|
||||
return true;
|
||||
|
||||
if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.preempted_in_kernel;
|
||||
|
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/types.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
unsigned int x86_family(unsigned int sig)
|
||||
{
|
||||
|
@@ -178,13 +178,15 @@ void FPU_printall(void)
|
||||
for (i = 0; i < 8; i++) {
|
||||
FPU_REG *r = &st(i);
|
||||
u_char tagi = FPU_gettagi(i);
|
||||
|
||||
switch (tagi) {
|
||||
case TAG_Empty:
|
||||
continue;
|
||||
break;
|
||||
case TAG_Zero:
|
||||
case TAG_Special:
|
||||
/* Update tagi for the printk below */
|
||||
tagi = FPU_Special(r);
|
||||
/* fall through */
|
||||
case TAG_Valid:
|
||||
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
|
||||
getsign(r) ? '-' : '+',
|
||||
@@ -198,7 +200,6 @@ void FPU_printall(void)
|
||||
printk("Whoops! Error in errors.c: tag%d is %d ", i,
|
||||
tagi);
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
printk("%s\n", tag_desc[(int)(unsigned)tagi]);
|
||||
}
|
||||
|
@@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
|
||||
case TW_Denormal:
|
||||
if (denormal_operand() < 0)
|
||||
return;
|
||||
|
||||
/* fall through */
|
||||
case TAG_Zero:
|
||||
case TAG_Valid:
|
||||
setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
|
||||
|
@@ -177,13 +177,14 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
pmd_k = pmd_offset(pud_k, address);
|
||||
|
||||
if (pmd_present(*pmd) != pmd_present(*pmd_k))
|
||||
set_pmd(pmd, *pmd_k);
|
||||
|
||||
if (!pmd_present(*pmd_k))
|
||||
return NULL;
|
||||
|
||||
if (!pmd_present(*pmd))
|
||||
set_pmd(pmd, *pmd_k);
|
||||
else
|
||||
BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
|
||||
BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
|
||||
|
||||
return pmd_k;
|
||||
}
|
||||
@@ -203,17 +204,13 @@ void vmalloc_sync_all(void)
|
||||
spin_lock(&pgd_lock);
|
||||
list_for_each_entry(page, &pgd_list, lru) {
|
||||
spinlock_t *pgt_lock;
|
||||
pmd_t *ret;
|
||||
|
||||
/* the pgt_lock only for Xen */
|
||||
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||
|
||||
spin_lock(pgt_lock);
|
||||
ret = vmalloc_sync_one(page_address(page), address);
|
||||
vmalloc_sync_one(page_address(page), address);
|
||||
spin_unlock(pgt_lock);
|
||||
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
spin_unlock(&pgd_lock);
|
||||
}
|
||||
|
@@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
|
||||
*/
|
||||
static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
|
||||
unsigned long pfn, unsigned long npg,
|
||||
int warnlvl)
|
||||
unsigned long lpsize, int warnlvl)
|
||||
{
|
||||
pgprotval_t forbidden, res;
|
||||
unsigned long end;
|
||||
@@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
|
||||
check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
|
||||
forbidden = res;
|
||||
|
||||
res = protect_kernel_text_ro(start, end);
|
||||
check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
|
||||
forbidden |= res;
|
||||
/*
|
||||
* Special case to preserve a large page. If the change spawns the
|
||||
* full large page mapping then there is no point to split it
|
||||
* up. Happens with ftrace and is going to be removed once ftrace
|
||||
* switched to text_poke().
|
||||
*/
|
||||
if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
|
||||
res = protect_kernel_text_ro(start, end);
|
||||
check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
|
||||
forbidden |= res;
|
||||
}
|
||||
|
||||
/* Check the PFN directly */
|
||||
res = protect_pci_bios(pfn, pfn + npg - 1);
|
||||
@@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
|
||||
* extra conditional required here.
|
||||
*/
|
||||
chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
|
||||
CPA_CONFLICT);
|
||||
psize, CPA_CONFLICT);
|
||||
|
||||
if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
|
||||
/*
|
||||
@@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
|
||||
* protection requirement in the large page.
|
||||
*/
|
||||
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
|
||||
CPA_DETECT);
|
||||
psize, CPA_DETECT);
|
||||
|
||||
/*
|
||||
* If there is a conflict, split the large page.
|
||||
@@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
|
||||
if (!cpa->force_static_prot)
|
||||
goto set;
|
||||
|
||||
prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT);
|
||||
/* Hand in lpsize = 0 to enforce the protection mechanism */
|
||||
prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
|
||||
|
||||
if (pgprot_val(prot) == pgprot_val(ref_prot))
|
||||
goto set;
|
||||
@@ -1503,7 +1512,8 @@ repeat:
|
||||
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
|
||||
|
||||
cpa_inc_4k_install();
|
||||
new_prot = static_protections(new_prot, address, pfn, 1,
|
||||
/* Hand in lpsize = 0 to enforce the protection mechanism */
|
||||
new_prot = static_protections(new_prot, address, pfn, 1, 0,
|
||||
CPA_PROTECT);
|
||||
|
||||
new_prot = pgprot_clear_protnone_bits(new_prot);
|
||||
|
@@ -390,8 +390,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
||||
|
||||
emit_prologue(&prog, bpf_prog->aux->stack_depth,
|
||||
bpf_prog_was_classic(bpf_prog));
|
||||
addrs[0] = prog - temp;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
for (i = 1; i <= insn_cnt; i++, insn++) {
|
||||
const s32 imm32 = insn->imm;
|
||||
u32 dst_reg = insn->dst_reg;
|
||||
u32 src_reg = insn->src_reg;
|
||||
@@ -1105,7 +1106,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
||||
extra_pass = true;
|
||||
goto skip_init_addrs;
|
||||
}
|
||||
addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
|
||||
addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
|
||||
if (!addrs) {
|
||||
prog = orig_prog;
|
||||
goto out_addrs;
|
||||
@@ -1115,7 +1116,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
||||
* Before first pass, make a rough estimation of addrs[]
|
||||
* each BPF instruction is translated to less than 64 bytes
|
||||
*/
|
||||
for (proglen = 0, i = 0; i < prog->len; i++) {
|
||||
for (proglen = 0, i = 0; i <= prog->len; i++) {
|
||||
proglen += 64;
|
||||
addrs[i] = proglen;
|
||||
}
|
||||
@@ -1180,7 +1181,7 @@ out_image:
|
||||
|
||||
if (!image || !prog->is_func || extra_pass) {
|
||||
if (image)
|
||||
bpf_prog_fill_jited_linfo(prog, addrs);
|
||||
bpf_prog_fill_jited_linfo(prog, addrs + 1);
|
||||
out_addrs:
|
||||
kfree(addrs);
|
||||
kfree(jit_data);
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/dmi.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
@@ -23,7 +24,7 @@
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
__visible unsigned long saved_context_ebx;
|
||||
@@ -397,15 +398,14 @@ static int __init bsp_pm_check_init(void)
|
||||
|
||||
core_initcall(bsp_pm_check_init);
|
||||
|
||||
static int msr_init_context(const u32 *msr_id, const int total_num)
|
||||
static int msr_build_context(const u32 *msr_id, const int num)
|
||||
{
|
||||
int i = 0;
|
||||
struct saved_msrs *saved_msrs = &saved_context.saved_msrs;
|
||||
struct saved_msr *msr_array;
|
||||
int total_num;
|
||||
int i, j;
|
||||
|
||||
if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) {
|
||||
pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
total_num = saved_msrs->num + num;
|
||||
|
||||
msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
|
||||
if (!msr_array) {
|
||||
@@ -413,19 +413,30 @@ static int msr_init_context(const u32 *msr_id, const int total_num)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < total_num; i++) {
|
||||
msr_array[i].info.msr_no = msr_id[i];
|
||||
if (saved_msrs->array) {
|
||||
/*
|
||||
* Multiple callbacks can invoke this function, so copy any
|
||||
* MSR save requests from previous invocations.
|
||||
*/
|
||||
memcpy(msr_array, saved_msrs->array,
|
||||
sizeof(struct saved_msr) * saved_msrs->num);
|
||||
|
||||
kfree(saved_msrs->array);
|
||||
}
|
||||
|
||||
for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
|
||||
msr_array[i].info.msr_no = msr_id[j];
|
||||
msr_array[i].valid = false;
|
||||
msr_array[i].info.reg.q = 0;
|
||||
}
|
||||
saved_context.saved_msrs.num = total_num;
|
||||
saved_context.saved_msrs.array = msr_array;
|
||||
saved_msrs->num = total_num;
|
||||
saved_msrs->array = msr_array;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The following section is a quirk framework for problematic BIOSen:
|
||||
* The following sections are a quirk framework for problematic BIOSen:
|
||||
* Sometimes MSRs are modified by the BIOSen after suspended to
|
||||
* RAM, this might cause unexpected behavior after wakeup.
|
||||
* Thus we save/restore these specified MSRs across suspend/resume
|
||||
@@ -440,7 +451,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d)
|
||||
u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
|
||||
|
||||
pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
|
||||
return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
|
||||
return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
|
||||
}
|
||||
|
||||
static const struct dmi_system_id msr_save_dmi_table[] = {
|
||||
@@ -455,9 +466,58 @@ static const struct dmi_system_id msr_save_dmi_table[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
static int msr_save_cpuid_features(const struct x86_cpu_id *c)
|
||||
{
|
||||
u32 cpuid_msr_id[] = {
|
||||
MSR_AMD64_CPUID_FN_1,
|
||||
};
|
||||
|
||||
pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n",
|
||||
c->family);
|
||||
|
||||
return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id));
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id msr_save_cpu_table[] = {
|
||||
{
|
||||
.vendor = X86_VENDOR_AMD,
|
||||
.family = 0x15,
|
||||
.model = X86_MODEL_ANY,
|
||||
.feature = X86_FEATURE_ANY,
|
||||
.driver_data = (kernel_ulong_t)msr_save_cpuid_features,
|
||||
},
|
||||
{
|
||||
.vendor = X86_VENDOR_AMD,
|
||||
.family = 0x16,
|
||||
.model = X86_MODEL_ANY,
|
||||
.feature = X86_FEATURE_ANY,
|
||||
.driver_data = (kernel_ulong_t)msr_save_cpuid_features,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *);
|
||||
static int pm_cpu_check(const struct x86_cpu_id *c)
|
||||
{
|
||||
const struct x86_cpu_id *m;
|
||||
int ret = 0;
|
||||
|
||||
m = x86_match_cpu(msr_save_cpu_table);
|
||||
if (m) {
|
||||
pm_cpu_match_t fn;
|
||||
|
||||
fn = (pm_cpu_match_t)m->driver_data;
|
||||
ret = fn(m);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pm_check_save_msr(void)
|
||||
{
|
||||
dmi_check_system(msr_save_dmi_table);
|
||||
pm_cpu_check(msr_save_cpu_table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
|
||||
targets += $(purgatory-y)
|
||||
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
|
||||
|
||||
$(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
@@ -17,11 +20,34 @@ KCOV_INSTRUMENT := n
|
||||
|
||||
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
|
||||
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
|
||||
# sure how to relocate those. Like kexec-tools, use custom flags.
|
||||
# sure how to relocate those.
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE)
|
||||
endif
|
||||
|
||||
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
|
||||
KBUILD_CFLAGS += -m$(BITS)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
|
||||
ifdef CONFIG_STACKPROTECTOR
|
||||
CFLAGS_REMOVE_sha256.o += -fstack-protector
|
||||
CFLAGS_REMOVE_purgatory.o += -fstack-protector
|
||||
CFLAGS_REMOVE_string.o += -fstack-protector
|
||||
CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector
|
||||
endif
|
||||
|
||||
ifdef CONFIG_STACKPROTECTOR_STRONG
|
||||
CFLAGS_REMOVE_sha256.o += -fstack-protector-strong
|
||||
CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong
|
||||
CFLAGS_REMOVE_string.o += -fstack-protector-strong
|
||||
CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong
|
||||
endif
|
||||
|
||||
ifdef CONFIG_RETPOLINE
|
||||
CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS)
|
||||
CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS)
|
||||
CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS)
|
||||
CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS)
|
||||
endif
|
||||
|
||||
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
@@ -68,3 +68,9 @@ void purgatory(void)
|
||||
}
|
||||
copy_backup_region();
|
||||
}
|
||||
|
||||
/*
|
||||
* Defined in order to reuse memcpy() and memset() from
|
||||
* arch/x86/boot/compressed/string.c
|
||||
*/
|
||||
void warn(const char *msg) {}
|
||||
|
@@ -1,23 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Simple string functions.
|
||||
*
|
||||
* Copyright (C) 2014 Red Hat Inc.
|
||||
*
|
||||
* Author:
|
||||
* Vivek Goyal <vgoyal@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "../boot/string.c"
|
||||
|
||||
void *memcpy(void *dst, const void *src, size_t len)
|
||||
{
|
||||
return __builtin_memcpy(dst, src, len);
|
||||
}
|
||||
|
||||
void *memset(void *dst, int c, size_t len)
|
||||
{
|
||||
return __builtin_memset(dst, c, len);
|
||||
}
|
Atsaukties uz šo jaunā problēmā
Block a user