Merge tag 'v5.7-rc1' into locking/kcsan, to resolve conflicts and refresh

Resolve these conflicts:

	arch/x86/Kconfig
	arch/x86/kernel/Makefile

Do a minor "evil merge" to move the KCSAN entry up a bit by a few lines
in the Kconfig to reduce the probability of future conflicts.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2020-04-13 09:44:39 +02:00
10808 changed files with 507021 additions and 241301 deletions

View File

@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
vsyscall.lds
vsyscall_32.lds
vmlinux.lds

View File

@@ -32,7 +32,6 @@ KASAN_SANITIZE_paravirt.o := n
# by several compilation units. To be safe, disable all instrumentation.
KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
@@ -57,6 +56,8 @@ obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_ia32.o
obj-$(CONFIG_IA32_EMULATION) += sys_ia32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
@@ -158,6 +159,4 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += vsmp_64.o
endif
ifdef CONFIG_EFI
obj-$(CONFIG_IMA) += ima_arch.o
endif
obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o

View File

@@ -45,6 +45,7 @@ EXPORT_SYMBOL(acpi_disabled);
#define PREFIX "ACPI: "
int acpi_noirq; /* skip ACPI IRQ initialization */
static int acpi_nobgrt; /* skip ACPI BGRT */
int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
@@ -1619,7 +1620,7 @@ int __init acpi_boot_init(void)
acpi_process_madt();
acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
if (IS_ENABLED(CONFIG_ACPI_BGRT))
if (IS_ENABLED(CONFIG_ACPI_BGRT) && !acpi_nobgrt)
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
if (!acpi_noirq)
@@ -1671,6 +1672,13 @@ static int __init parse_acpi(char *arg)
}
early_param("acpi", parse_acpi);
static int __init parse_acpi_bgrt(char *arg)
{
acpi_nobgrt = true;
return 0;
}
early_param("bgrt_disable", parse_acpi_bgrt);
/* FIXME: Using pci= for an ACPI parameter is a travesty. */
static int __init parse_pci(char *arg)
{
@@ -1740,7 +1748,7 @@ int __acpi_acquire_global_lock(unsigned int *lock)
new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
val = cmpxchg(lock, old, new);
} while (unlikely (val != old));
return (new < 3) ? -1 : 0;
return ((new & 0x3) < 3) ? -1 : 0;
}
int __acpi_release_global_lock(unsigned int *lock)

View File

@@ -161,7 +161,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
/* Make sure we are running on right CPU */
retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx);
retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx,
false);
if (retval == 0) {
/* Use the hint in CST */
percpu_entry->states[cx->index].eax = cx->address;

View File

@@ -43,7 +43,7 @@ unsigned long acpi_get_wakeup_address(void)
*
* Wrapper around acpi_enter_sleep_state() to be called by assmebly.
*/
acpi_status asmlinkage __visible x86_acpi_enter_sleep_state(u8 state)
asmlinkage acpi_status __visible x86_acpi_enter_sleep_state(u8 state)
{
return acpi_enter_sleep_state(state);
}

View File

@@ -19,4 +19,4 @@ extern void do_suspend_lowlevel(void);
extern int x86_acpi_suspend_lowlevel(void);
acpi_status asmlinkage x86_acpi_enter_sleep_state(u8 state);
asmlinkage acpi_status x86_acpi_enter_sleep_state(u8 state);

View File

@@ -1167,8 +1167,8 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
atomic_cond_read_acquire(&desc.refs, !VAL);
}
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate)
static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate)
{
struct insn insn;

View File

@@ -744,7 +744,8 @@ int __init gart_iommu_init(void)
start_pfn = PFN_DOWN(aper_base);
if (!pfn_range_is_mapped(start_pfn, end_pfn))
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT,
PAGE_KERNEL);
pr_info("PCI-DMA: using GART IOMMU.\n");
iommu_size = check_iommu_size(info.aper_base, aper_size);

View File

@@ -36,10 +36,9 @@ static const struct pci_device_id amd_root_ids[] = {
{}
};
#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704
const struct pci_device_id amd_nb_misc_ids[] = {
static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
@@ -56,7 +55,6 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },

View File

@@ -546,12 +546,6 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
#define DEADLINE_MODEL_MATCH_FUNC(model, func) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
#define DEADLINE_MODEL_MATCH_REV(model, rev) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
static u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
@@ -588,23 +582,23 @@ static u32 skx_deadline_rev(void)
}
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20),
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2),
X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52),
X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52),
{},
};

View File

@@ -556,6 +556,12 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irqd->chip_data = apicd;
irqd->hwirq = virq + i;
irqd_set_single_target(irqd);
/*
* Prevent that any of these interrupts is invoked in
* non interrupt context via e.g. generic_handle_irq()
* as that can corrupt the affinity move state.
*/
irqd_set_handle_enforce_irqctx(irqd);
/*
* Legacy vectors are already assigned when the IOAPIC
* takes them over. They stay on the same vector. This is

View File

@@ -88,7 +88,6 @@ static void __used common(void)
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_init_size, boot_params, hdr.init_size);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));

View File

@@ -3,12 +3,9 @@
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
#include <asm/ucontext.h>
#include <linux/efi.h>
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
#include <asm/ucontext.h>
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
@@ -62,6 +59,5 @@ void foo(void)
#endif
BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
DEFINE(NR_syscalls, sizeof(syscalls));
DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
}

View File

@@ -5,30 +5,6 @@
#include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
#define __SYSCALL_X32(nr, sym, qual)
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#ifdef CONFIG_X86_X32_ABI
#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
static char syscalls_x32[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL_64
#undef __SYSCALL_X32
#endif
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
#undef __SYSCALL_I386
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#include <asm/kvm_para.h>
#endif
@@ -90,17 +66,5 @@ int main(void)
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
#ifdef CONFIG_X86_X32_ABI
DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
#endif
DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
return 0;
}

View File

@@ -1 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
capflags.c

View File

@@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
}
static void amd_detect_ppin(struct cpuinfo_x86 *c)
{
unsigned long long val;
if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
return;
/* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
goto clear_ppin;
/* PPIN is locked in disabled mode, clear feature bit */
if ((val & 3UL) == 1UL)
goto clear_ppin;
/* If PPIN is disabled, try to enable it */
if (!(val & 2UL)) {
wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
}
/* If PPIN_EN bit is 1, return from here; otherwise fall through */
if (val & 2UL)
return;
clear_ppin:
clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
}
u16 amd_get_nb_id(int cpu)
{
return per_cpu(cpu_llc_id, cpu);
@@ -926,7 +955,8 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
case 0x17: init_amd_zn(c); break;
case 0x17: fallthrough;
case 0x19: init_amd_zn(c); break;
}
/*
@@ -941,6 +971,7 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
amd_detect_ppin(c);
init_amd_cacheinfo(c);

View File

@@ -1008,8 +1008,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_ITLB_MULTIHIT BIT(7)
#define NO_SPECTRE_V2 BIT(8)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
#define VULNWL_INTEL(model, whitelist) \
VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
@@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c);
cpu_set_core_cap_bits(c);
fpu__init_system(c);
#ifdef CONFIG_X86_32

View File

@@ -5,6 +5,7 @@
#include <asm/msr-index.h>
#include <asm/processor.h>
#include <asm/vmx.h>
#include "cpu.h"
#undef pr_fmt
#define pr_fmt(fmt) "x86/cpu: " fmt

View File

@@ -19,6 +19,9 @@
#include <asm/microcode_intel.h>
#include <asm/hwcap2.h>
#include <asm/elf.h>
#include <asm/cpu_device_id.h>
#include <asm/cmdline.h>
#include <asm/traps.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -31,6 +34,20 @@
#include <asm/apic.h>
#endif
enum split_lock_detect_state {
sld_off = 0,
sld_warn,
sld_fatal,
};
/*
* Default to sld_off because most systems do not support split lock detection
* split_lock_setup() will switch this to sld_warn on systems that support
* split lock detect, unless there is a command line override.
*/
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
@@ -570,6 +587,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
static void split_lock_init(void);
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
@@ -684,6 +703,8 @@ static void init_intel(struct cpuinfo_x86 *c)
tsx_enable();
if (tsx_ctrl_state == TSX_CTRL_DISABLE)
tsx_disable();
split_lock_init();
}
#ifdef CONFIG_X86_32
@@ -945,3 +966,188 @@ static const struct cpu_dev intel_cpu_dev = {
};
cpu_dev_register(intel_cpu_dev);
#undef pr_fmt
#define pr_fmt(fmt) "x86/split lock detection: " fmt
static const struct {
const char *option;
enum split_lock_detect_state state;
} sld_options[] __initconst = {
{ "off", sld_off },
{ "warn", sld_warn },
{ "fatal", sld_fatal },
};
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
return len == arglen && !strncmp(arg, opt, len);
}
static bool split_lock_verify_msr(bool on)
{
u64 ctrl, tmp;
if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
return false;
if (on)
ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
else
ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
return false;
rdmsrl(MSR_TEST_CTRL, tmp);
return ctrl == tmp;
}
static void __init split_lock_setup(void)
{
enum split_lock_detect_state state = sld_warn;
char arg[20];
int i, ret;
if (!split_lock_verify_msr(false)) {
pr_info("MSR access failed: Disabled\n");
return;
}
ret = cmdline_find_option(boot_command_line, "split_lock_detect",
arg, sizeof(arg));
if (ret >= 0) {
for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
if (match_option(arg, ret, sld_options[i].option)) {
state = sld_options[i].state;
break;
}
}
}
switch (state) {
case sld_off:
pr_info("disabled\n");
return;
case sld_warn:
pr_info("warning about user-space split_locks\n");
break;
case sld_fatal:
pr_info("sending SIGBUS on user-space split_locks\n");
break;
}
rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
if (!split_lock_verify_msr(true)) {
pr_info("MSR access failed: Disabled\n");
return;
}
sld_state = state;
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
}
/*
* MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
* is not implemented as one thread could undo the setting of the other
* thread immediately after dropping the lock anyway.
*/
static void sld_update_msr(bool on)
{
u64 test_ctrl_val = msr_test_ctrl_cache;
if (on)
test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
}
static void split_lock_init(void)
{
split_lock_verify_msr(sld_state != sld_off);
}
static void split_lock_warn(unsigned long ip)
{
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
current->comm, current->pid, ip);
/*
* Disable the split lock detection for this task so it can make
* progress and set TIF_SLD so the detection is re-enabled via
* switch_to_sld() when the task is scheduled out.
*/
sld_update_msr(false);
set_tsk_thread_flag(current, TIF_SLD);
}
bool handle_guest_split_lock(unsigned long ip)
{
if (sld_state == sld_warn) {
split_lock_warn(ip);
return true;
}
pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
current->comm, current->pid,
sld_state == sld_fatal ? "fatal" : "bogus", ip);
current->thread.error_code = 0;
current->thread.trap_nr = X86_TRAP_AC;
force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
return false;
}
EXPORT_SYMBOL_GPL(handle_guest_split_lock);
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
return false;
split_lock_warn(regs->ip);
return true;
}
/*
* This function is called only when switching between tasks with
* different split-lock detection modes. It sets the MSR for the
* mode of the new task. This is right most of the time, but since
* the MSR is shared by hyperthreads on a physical core there can
* be glitches when the two threads need different modes.
*/
void switch_to_sld(unsigned long tifn)
{
sld_update_msr(!(tifn & _TIF_SLD));
}
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
/*
* The following processors have the split lock detection feature. But
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
* be enumerated. Enable it by family and model matching on these
* processors.
*/
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
{}
};
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
{
u64 ia32_core_caps = 0;
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
/* Enumerate split lock detection by family and model. */
if (x86_match_cpu(split_lock_cpu_ids))
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
}
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
split_lock_setup();
}

View File

@@ -16,12 +16,17 @@
* respective wildcard entries.
*
* A typical table entry would be to match a specific CPU
* { X86_VENDOR_INTEL, 6, 0x12 }
* or to match a specific CPU feature
* { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
*
* X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
* X86_FEATURE_ANY, NULL);
*
* Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
* %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
* %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
*
* asm/cpu_device_id.h contains a set of useful macros which are shortcuts
* for various common selections. The above can be shortened to:
*
* X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL);
*
* Arrays used to match for this should also be declared using
* MODULE_DEVICE_TABLE(x86cpu, ...)

View File

@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
rdmsrl(MSR_AMD_PPIN, m->ppin);
m->microcode = boot_cpu_data.microcode;
}
@@ -1213,8 +1215,14 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
* On Intel systems this is entered on all CPUs in parallel through
* MCE broadcast. However some CPUs might be broken beyond repair,
* so be always careful when synchronizing with others.
*
* Tracing and kprobes are disabled: if we interrupted a kernel context
* with IF=1, we need to minimize stack usage. There are also recursion
* issues: if the machine check was due to a failure of the memory
* backing the user stack, tracing that reads the user stack will cause
* potentially infinite recursion.
*/
void do_machine_check(struct pt_regs *regs, long error_code)
void notrace do_machine_check(struct pt_regs *regs, long error_code)
{
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
@@ -1360,6 +1368,7 @@ out_ist:
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
NOKPROBE_SYMBOL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
int memory_failure(unsigned long pfn, int flags)
@@ -1877,6 +1886,8 @@ bool filter_mce(struct mce *m)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return amd_filter_mce(m);
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
return intel_filter_mce(m);
return false;
}
@@ -1892,10 +1903,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code)
{
machine_check_vector(regs, error_code);
}
NOKPROBE_SYMBOL(do_mce);
/*
* Called for each booted CPU to set up machine checks.

View File

@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
static struct mce_log_buffer mcelog = {
.signature = MCE_LOG_SIGNATURE,
.len = MCE_LOG_LEN,
.recordlen = sizeof(struct mce),
};
static struct mce_log_buffer *mcelog;
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
mutex_lock(&mce_chrdev_read_mutex);
entry = mcelog.next;
entry = mcelog->next;
/*
* When the buffer fills up discard new entries. Assume that the
* earlier errors are the more interesting ones:
*/
if (entry >= MCE_LOG_LEN) {
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
if (entry >= mcelog->len) {
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
goto unlock;
}
mcelog.next = entry + 1;
mcelog->next = entry + 1;
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
mcelog.entry[entry].finished = 1;
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
mcelog->entry[entry].finished = 1;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
/* Only supports full reads right now */
err = -EINVAL;
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
goto out;
next = mcelog.next;
next = mcelog->next;
err = 0;
for (i = 0; i < next; i++) {
struct mce *m = &mcelog.entry[i];
struct mce *m = &mcelog->entry[i];
err |= copy_to_user(buf, m, sizeof(*m));
buf += sizeof(*m);
}
memset(mcelog.entry, 0, next * sizeof(struct mce));
mcelog.next = 0;
memset(mcelog->entry, 0, next * sizeof(struct mce));
mcelog->next = 0;
if (err)
err = -EFAULT;
@@ -242,7 +238,7 @@ out:
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
if (READ_ONCE(mcelog.next))
if (READ_ONCE(mcelog->next))
return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return EPOLLIN | EPOLLRDNORM;
@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
return put_user(MCE_LOG_LEN, p);
return put_user(mcelog->len, p);
case MCE_GETCLEAR_FLAGS: {
unsigned flags;
do {
flags = mcelog.flags;
} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
flags = mcelog->flags;
} while (cmpxchg(&mcelog->flags, flags, 0) != flags);
return put_user(flags, p);
}
@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
static __init int dev_mcelog_init_device(void)
{
int mce_log_len;
int err;
mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
if (!mcelog)
return -ENOMEM;
strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
mcelog->len = mce_log_len;
mcelog->recordlen = sizeof(struct mce);
/* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device);
if (err) {
@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
else
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
kfree(mcelog);
return err;
}

View File

@@ -521,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}
bool intel_filter_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
/* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
if ((c->x86 == 6) &&
((c->x86_model == INTEL_FAM6_HASWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_L) ||
(c->x86_model == INTEL_FAM6_BROADWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_G)) &&
(m->bank == 0) &&
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
return true;
return false;
}

View File

@@ -8,6 +8,9 @@
#include <linux/device.h>
#include <asm/mce.h>
/* Pointer to the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
enum severity_level {
MCE_NO_SEVERITY,
MCE_DEFERRED_SEVERITY,
@@ -48,6 +51,7 @@ void cmci_disable_bank(int bank);
void intel_init_cmci(void);
void intel_init_lmce(void);
void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -56,6 +60,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
static inline bool intel_filter_mce(struct mce *m) { return false; };
#endif
void mce_timer_kick(unsigned long interval);

View File

@@ -4,6 +4,7 @@
#include <linux/cpu.h>
#include <asm/msr.h>
#include <asm/mwait.h>
#define UMWAIT_C02_ENABLE 0

View File

@@ -25,6 +25,8 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/clocksource.h>
#include <linux/cpu.h>
#include <linux/reboot.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
@@ -47,6 +49,11 @@
#define VMWARE_CMD_GETVCPU_INFO 68
#define VMWARE_CMD_LEGACY_X2APIC 3
#define VMWARE_CMD_VCPU_RESERVED 31
#define VMWARE_CMD_STEALCLOCK 91
#define STEALCLOCK_NOT_AVAILABLE (-1)
#define STEALCLOCK_DISABLED 0
#define STEALCLOCK_ENABLED 1
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
__asm__("inl (%%dx), %%eax" : \
@@ -86,6 +93,18 @@
} \
} while (0)
struct vmware_steal_time {
union {
uint64_t clock; /* stolen time counter in units of vtsc */
struct {
/* only for little-endian */
uint32_t clock_low;
uint32_t clock_high;
};
};
uint64_t reserved[7];
};
static unsigned long vmware_tsc_khz __ro_after_init;
static u8 vmware_hypercall_mode __ro_after_init;
@@ -103,15 +122,25 @@ static unsigned long vmware_get_tsc_khz(void)
#ifdef CONFIG_PARAVIRT
static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
static int vmw_sched_clock __initdata = 1;
static bool vmw_sched_clock __initdata = true;
static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
static bool has_steal_clock;
static bool steal_acc __initdata = true; /* steal time accounting */
static __init int setup_vmw_sched_clock(char *s)
{
vmw_sched_clock = 0;
vmw_sched_clock = false;
return 0;
}
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
static __init int parse_no_stealacc(char *arg)
{
steal_acc = false;
return 0;
}
early_param("no-steal-acc", parse_no_stealacc);
static unsigned long long notrace vmware_sched_clock(void)
{
unsigned long long ns;
@@ -122,7 +151,7 @@ static unsigned long long notrace vmware_sched_clock(void)
return ns;
}
static void __init vmware_sched_clock_setup(void)
static void __init vmware_cyc2ns_setup(void)
{
struct cyc2ns_data *d = &vmware_cyc2ns;
unsigned long long tsc_now = rdtsc();
@@ -132,17 +161,201 @@ static void __init vmware_sched_clock_setup(void)
d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
d->cyc2ns_shift);
pv_ops.time.sched_clock = vmware_sched_clock;
pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset);
pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
}
static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
{
uint32_t result, info;
asm volatile (VMWARE_HYPERCALL :
"=a"(result),
"=c"(info) :
"a"(VMWARE_HYPERVISOR_MAGIC),
"b"(0),
"c"(VMWARE_CMD_STEALCLOCK),
"d"(0),
"S"(arg1),
"D"(arg2) :
"memory");
return result;
}
static bool stealclock_enable(phys_addr_t pa)
{
return vmware_cmd_stealclock(upper_32_bits(pa),
lower_32_bits(pa)) == STEALCLOCK_ENABLED;
}
static int __stealclock_disable(void)
{
return vmware_cmd_stealclock(0, 1);
}
static void stealclock_disable(void)
{
__stealclock_disable();
}
static bool vmware_is_stealclock_available(void)
{
return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
}
/**
* vmware_steal_clock() - read the per-cpu steal clock
* @cpu: the cpu number whose steal clock we want to read
*
* The function reads the steal clock if we are on a 64-bit system, otherwise
* reads it in parts, checking that the high part didn't change in the
* meantime.
*
* Return:
* The steal clock reading in ns.
*/
static uint64_t vmware_steal_clock(int cpu)
{
struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
uint64_t clock;
if (IS_ENABLED(CONFIG_64BIT))
clock = READ_ONCE(steal->clock);
else {
uint32_t initial_high, low, high;
do {
initial_high = READ_ONCE(steal->clock_high);
/* Do not reorder initial_high and high readings */
virt_rmb();
low = READ_ONCE(steal->clock_low);
/* Keep low reading in between */
virt_rmb();
high = READ_ONCE(steal->clock_high);
} while (initial_high != high);
clock = ((uint64_t)high << 32) | low;
}
return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
vmware_cyc2ns.cyc2ns_shift);
}
static void vmware_register_steal_time(void)
{
int cpu = smp_processor_id();
struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
if (!has_steal_clock)
return;
if (!stealclock_enable(slow_virt_to_phys(st))) {
has_steal_clock = false;
return;
}
pr_info("vmware-stealtime: cpu %d, pa %llx\n",
cpu, (unsigned long long) slow_virt_to_phys(st));
}
static void vmware_disable_steal_time(void)
{
if (!has_steal_clock)
return;
stealclock_disable();
}
static void vmware_guest_cpu_init(void)
{
if (has_steal_clock)
vmware_register_steal_time();
}
static void vmware_pv_guest_cpu_reboot(void *unused)
{
vmware_disable_steal_time();
}
static int vmware_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused)
{
if (code == SYS_RESTART)
on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE;
}
static struct notifier_block vmware_pv_reboot_nb = {
.notifier_call = vmware_pv_reboot_notify,
};
#ifdef CONFIG_SMP
static void __init vmware_smp_prepare_boot_cpu(void)
{
vmware_guest_cpu_init();
native_smp_prepare_boot_cpu();
}
static int vmware_cpu_online(unsigned int cpu)
{
local_irq_disable();
vmware_guest_cpu_init();
local_irq_enable();
return 0;
}
static int vmware_cpu_down_prepare(unsigned int cpu)
{
local_irq_disable();
vmware_disable_steal_time();
local_irq_enable();
return 0;
}
#endif
static __init int activate_jump_labels(void)
{
if (has_steal_clock) {
static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
static_key_slow_inc(&paravirt_steal_rq_enabled);
}
return 0;
}
arch_initcall(activate_jump_labels);
static void __init vmware_paravirt_ops_setup(void)
{
pv_info.name = "VMware hypervisor";
pv_ops.cpu.io_delay = paravirt_nop;
if (vmware_tsc_khz && vmw_sched_clock)
vmware_sched_clock_setup();
if (vmware_tsc_khz == 0)
return;
vmware_cyc2ns_setup();
if (vmw_sched_clock)
pv_ops.time.sched_clock = vmware_sched_clock;
if (vmware_is_stealclock_available()) {
has_steal_clock = true;
pv_ops.time.steal_clock = vmware_steal_clock;
/* We use reboot notifier only to disable steal clock */
register_reboot_notifier(&vmware_pv_reboot_nb);
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu =
vmware_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"x86/vmware:online",
vmware_cpu_online,
vmware_cpu_down_prepare) < 0)
pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
#else
vmware_guest_cpu_init();
#endif
}
}
#else
#define vmware_paravirt_ops_setup() do {} while (0)
@@ -213,7 +426,7 @@ static void __init vmware_platform_setup(void)
vmware_set_capabilities();
}
static u8 vmware_select_hypercall(void)
static u8 __init vmware_select_hypercall(void)
{
int eax, ebx, ecx, edx;

View File

@@ -120,11 +120,6 @@ static bool xfeature_is_supervisor(int xfeature_nr)
return ecx & 1;
}
static bool xfeature_is_user(int xfeature_nr)
{
return !xfeature_is_supervisor(xfeature_nr);
}
/*
* When executing XSAVEOPT (or other optimized XSAVE instructions), if
* a processor implementation detects that an FPU state component is still
@@ -265,21 +260,25 @@ static void __init setup_xstate_features(void)
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
/*
* If an xfeature is supervisor state, the offset
* in EBX is invalid. We leave it to -1.
*/
if (xfeature_is_user(i))
xstate_offsets[i] = ebx;
xstate_sizes[i] = eax;
/*
* In our xstate size checks, we assume that the
* highest-numbered xstate feature has the
* highest offset in the buffer. Ensure it does.
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
if (xfeature_is_supervisor(i))
continue;
xstate_offsets[i] = ebx;
/*
* In our xstate size checks, we assume that the highest-numbered
* xstate feature has the highest offset in the buffer. Ensure
* it does.
*/
WARN_ONCE(last_good_offset > xstate_offsets[i],
"x86/fpu: misordered xstate at %d\n", last_good_offset);
"x86/fpu: misordered xstate at %d\n", last_good_offset);
last_good_offset = xstate_offsets[i];
}
}
@@ -326,6 +325,13 @@ static int xfeature_is_aligned(int xfeature_nr)
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
if (!xfeature_enabled(xfeature_nr)) {
WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
xfeature_nr);
return 0;
}
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
/*
* The value returned by ECX[1] indicates the alignment
@@ -338,11 +344,11 @@ static int xfeature_is_aligned(int xfeature_nr)
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
* of the xsave aread.
* of the xsave area.
*/
static void __init setup_xstate_comp(void)
static void __init setup_xstate_comp_offsets(void)
{
unsigned int xstate_comp_sizes[XFEATURE_MAX];
unsigned int next_offset;
int i;
/*
@@ -356,31 +362,23 @@ static void __init setup_xstate_comp(void)
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (xfeature_enabled(i)) {
if (xfeature_enabled(i))
xstate_comp_offsets[i] = xstate_offsets[i];
xstate_comp_sizes[i] = xstate_sizes[i];
}
}
return;
}
xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
FXSAVE_SIZE + XSAVE_HDR_SIZE;
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (xfeature_enabled(i))
xstate_comp_sizes[i] = xstate_sizes[i];
else
xstate_comp_sizes[i] = 0;
if (!xfeature_enabled(i))
continue;
if (i > FIRST_EXTENDED_XFEATURE) {
xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ xstate_comp_sizes[i-1];
if (xfeature_is_aligned(i))
next_offset = ALIGN(next_offset, 64);
if (xfeature_is_aligned(i))
xstate_comp_offsets[i] =
ALIGN(xstate_comp_offsets[i], 64);
}
xstate_comp_offsets[i] = next_offset;
next_offset += xstate_sizes[i];
}
}
@@ -774,7 +772,7 @@ void __init fpu__init_system_xstate(void)
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_comp();
setup_xstate_comp_offsets();
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
@@ -897,8 +895,6 @@ const void *get_xsave_field_ptr(int xfeature_nr)
#ifdef CONFIG_ARCH_HAS_PKEYS
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
@@ -917,6 +913,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
/*
* This code should only be called with valid 'pkey'
* values originating from in-kernel users. Complain
* if a bad value is observed.
*/
WARN_ON_ONCE(pkey >= arch_max_pkey());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
new_pkru_bits |= PKRU_AD_BIT;

View File

@@ -67,11 +67,6 @@ __HEAD
SYM_CODE_START(startup_32)
movl pa(initial_stack),%ecx
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 2f
/*
* Set segments to known values.
*/
@@ -82,7 +77,6 @@ SYM_CODE_START(startup_32)
movl %eax,%fs
movl %eax,%gs
movl %eax,%ss
2:
leal -__PAGE_OFFSET(%ecx),%esp
/*

View File

@@ -17,7 +17,7 @@ static enum efi_secureboot_mode get_sb_mode(void)
size = sizeof(secboot);
if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
pr_info("ima: secureboot mode unknown, no efi\n");
return efi_secureboot_mode_unknown;
}

View File

@@ -13,6 +13,7 @@
#include <asm/io_bitmap.h>
#include <asm/desc.h>
#include <asm/syscalls.h>
#ifdef CONFIG_X86_IOPL_IOPERM

View File

@@ -230,7 +230,7 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
__visible void __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc * desc;
@@ -263,7 +263,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
exiting_irq();
set_irq_regs(old_regs);
return 1;
}
#ifdef CONFIG_X86_LOCAL_APIC

View File

@@ -44,15 +44,6 @@
* (these are usually mapped into the 0x30-0xff vector range)
*/
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
static struct irqaction irq2 = {
.handler = no_action,
.name = "cascade",
.flags = IRQF_NO_THREAD,
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
@@ -84,7 +75,7 @@ void __init init_IRQ(void)
* On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
* these IRQ's are handled by more mordern controllers like IO-APIC,
* these IRQs are handled by more modern controllers like IO-APIC,
* then this vector space can be freed and re-used dynamically as the
* irq's migrate etc.
*/
@@ -104,6 +95,9 @@ void __init native_init_IRQ(void)
idt_setup_apic_and_irq_gates();
lapic_assign_system_vectors();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
/* IRQ2 is cascade interrupt to second interrupt controller */
if (request_irq(2, no_action, IRQF_NO_THREAD, "cascade", NULL))
pr_err("%s: request_irq() failed\n", "cascade");
}
}

View File

@@ -58,7 +58,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
return code;
}
static void inline __jump_label_transform(struct jump_entry *entry,
static inline void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{

View File

@@ -141,9 +141,8 @@ prepare_add_efi_setup_data(struct boot_params *params,
struct setup_data *sd = (void *)params + efi_setup_data_offset;
struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
esd->fw_vendor = efi.fw_vendor;
esd->runtime = efi.runtime;
esd->tables = efi.config_table;
esd->fw_vendor = efi_fw_vendor;
esd->tables = efi_config_table;
esd->smbios = efi.smbios;
sd->type = SETUP_EFI;

View File

@@ -71,6 +71,21 @@ found:
return (unsigned long)buf;
}
static void synthesize_clac(kprobe_opcode_t *addr)
{
/*
* Can't be static_cpu_has() due to how objtool treats this feature bit.
* This isn't a fast path anyway.
*/
if (!boot_cpu_has(X86_FEATURE_SMAP))
return;
/* Replace the NOP3 with CLAC */
addr[0] = 0x0f;
addr[1] = 0x01;
addr[2] = 0xca;
}
/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
{
@@ -92,6 +107,9 @@ asm (
/* We don't bother saving the ss register */
" pushq %rsp\n"
" pushfq\n"
".global optprobe_template_clac\n"
"optprobe_template_clac:\n"
ASM_NOP3
SAVE_REGS_STRING
" movq %rsp, %rsi\n"
".global optprobe_template_val\n"
@@ -111,6 +129,9 @@ asm (
#else /* CONFIG_X86_32 */
" pushl %esp\n"
" pushfl\n"
".global optprobe_template_clac\n"
"optprobe_template_clac:\n"
ASM_NOP3
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -134,6 +155,8 @@ asm (
void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_CLAC_IDX \
((long)optprobe_template_clac - (long)optprobe_template_entry)
#define TMPL_MOVE_IDX \
((long)optprobe_template_val - (long)optprobe_template_entry)
#define TMPL_CALL_IDX \
@@ -389,6 +412,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
op->optinsn.size = ret;
len = TMPL_END_IDX + op->optinsn.size;
synthesize_clac(buf + TMPL_CLAC_IDX);
/* Set probe information */
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);

View File

@@ -159,12 +159,19 @@ bool kvm_check_and_clear_guest_paused(void)
return ret;
}
static int kvm_cs_enable(struct clocksource *cs)
{
vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
return 0;
}
struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.enable = kvm_cs_enable,
};
EXPORT_SYMBOL_GPL(kvm_clock);
@@ -272,7 +279,7 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
if (!(flags & PVCLOCK_TSC_STABLE_BIT))
return 0;
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
#endif
kvmclock_init_mem();

View File

@@ -27,7 +27,6 @@
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
#include <asm/pgtable_areas.h>
/* This is a multiple of PAGE_SIZE. */

View File

@@ -403,9 +403,9 @@ static void default_do_nmi(struct pt_regs *regs)
* a 'real' unknown NMI. For example, while processing
* a perf NMI another perf NMI comes in along with a
* 'real' unknown NMI. These two NMIs get combined into
* one (as descibed above). When the next NMI gets
* one (as described above). When the next NMI gets
* processed, it will be flagged by perf as handled, but
* noone will know that there was a 'real' unknown NMI sent
* no one will know that there was a 'real' unknown NMI sent
* also. As a result it gets swallowed. Or if the first
* perf NMI returns two events handled then the second
* NMI will get eaten by the logic below, again losing a

View File

@@ -28,7 +28,6 @@
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
#include <linux/uaccess.h>
#include <asm/mwait.h>
#include <asm/fpu/internal.h>
@@ -650,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
/* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn);
}
if ((tifp ^ tifn) & _TIF_SLD)
switch_to_sld(tifn);
}
/*

View File

@@ -49,7 +49,6 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>

View File

@@ -48,7 +48,6 @@
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>

View File

@@ -145,7 +145,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
{
WARN_ON(vclock_was_used(VCLOCK_PVCLOCK));
WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK));
pvti_cpu0_va = pvti;
}

View File

@@ -531,7 +531,7 @@ static void emergency_vmx_disable_all(void)
/*
* We need to disable VMX on all CPUs before rebooting, otherwise
* we risk hanging up the machine, because the CPU ignore INIT
* we risk hanging up the machine, because the CPU ignores INIT
* signals when VMX is enabled.
*
* We can't take any locks and we may be on an inconsistent

View File

@@ -9,6 +9,8 @@
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
/*
* Must be relocatable PIC code callable as a C function
@@ -39,6 +41,7 @@
.align PAGE_SIZE
.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
UNWIND_HINT_EMPTY
/*
* %rdi indirection_page
* %rsi page_list
@@ -105,6 +108,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
UNWIND_HINT_EMPTY
/* set return address to 0 if not preserving context */
pushq $0
/* store the start address on the stack */
@@ -192,14 +196,12 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
1:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
ANNOTATE_RETPOLINE_SAFE
call *%rdx
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
call 1f
1:
popq %r8
subq $(1b - relocate_kernel), %r8
leaq relocate_kernel(%rip), %r8
movq CP_PA_SWAP_PAGE(%r8), %r10
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
movq CP_PA_TABLE_PAGE(%r8), %rax
@@ -212,6 +214,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
UNWIND_HINT_EMPTY
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
@@ -233,6 +236,7 @@ SYM_CODE_END(virtual_mapped)
/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
UNWIND_HINT_EMPTY
movq %rdi, %rcx /* Put the page_list in %rcx */
xorl %edi, %edi
xorl %esi, %esi

View File

@@ -16,6 +16,7 @@
#include <linux/pci.h>
#include <linux/root_dev.h>
#include <linux/sfi.h>
#include <linux/hugetlb.h>
#include <linux/tboot.h>
#include <linux/usb/xhci-dbgp.h>
@@ -64,7 +65,6 @@ RESERVE_BRK(dmi_alloc, 65536);
* at link time, with RESERVE_BRK*() facility reserving additional
* chunks.
*/
static __initdata
unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
@@ -1158,6 +1158,9 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
if (boot_cpu_has(X86_FEATURE_GBPAGES))
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
/*
* Reserve memory for crash kernel after SRAT is parsed so that it
* won't consume hotpluggable memory.

View File

@@ -42,29 +42,9 @@
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
#include <asm/syscalls.h>
#include <asm/sigframe.h>
#include <asm/signal.h>
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
} while (0)
#define GET_SEG(seg) ({ \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
tmp; \
})
#define COPY_SEG(seg) do { \
regs->seg = GET_SEG(seg); \
} while (0)
#define COPY_SEG_CPL3(seg) do { \
regs->seg = GET_SEG(seg) | 3; \
} while (0)
#ifdef CONFIG_X86_64
/*
* If regs->ss will cause an IRET fault, change it. Otherwise leave it
@@ -92,53 +72,58 @@ static void force_valid_ss(struct pt_regs *regs)
ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
regs->ss = __USER_DS;
}
# define CONTEXT_COPY_SIZE offsetof(struct sigcontext, reserved1)
#else
# define CONTEXT_COPY_SIZE sizeof(struct sigcontext)
#endif
static int restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc,
struct sigcontext __user *usc,
unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0;
struct sigcontext sc;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
get_user_try {
if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE))
return -EFAULT;
#ifdef CONFIG_X86_32
set_user_gs(regs, GET_SEG(gs));
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
set_user_gs(regs, sc.gs);
regs->fs = sc.fs;
regs->es = sc.es;
regs->ds = sc.ds;
#endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
regs->bx = sc.bx;
regs->cx = sc.cx;
regs->dx = sc.dx;
regs->si = sc.si;
regs->di = sc.di;
regs->bp = sc.bp;
regs->ax = sc.ax;
regs->sp = sc.sp;
regs->ip = sc.ip;
#ifdef CONFIG_X86_64
COPY(r8);
COPY(r9);
COPY(r10);
COPY(r11);
COPY(r12);
COPY(r13);
COPY(r14);
COPY(r15);
regs->r8 = sc.r8;
regs->r9 = sc.r9;
regs->r10 = sc.r10;
regs->r11 = sc.r11;
regs->r12 = sc.r12;
regs->r13 = sc.r13;
regs->r14 = sc.r14;
regs->r15 = sc.r15;
#endif /* CONFIG_X86_64 */
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
/* Get CS/SS and force CPL3 */
regs->cs = sc.cs | 0x03;
regs->ss = sc.ss | 0x03;
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf_val, &sc->fpstate);
buf = (void __user *)buf_val;
} get_user_catch(err);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
/* disable syscall checks */
regs->orig_ax = -1;
#ifdef CONFIG_X86_64
/*
@@ -149,70 +134,78 @@ static int restore_sigcontext(struct pt_regs *regs,
force_valid_ss(regs);
#endif
err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
return err;
return fpu__restore_sig((void __user *)sc.fpstate,
IS_ENABLED(CONFIG_X86_32));
}
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
static __always_inline int
__unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
int err = 0;
put_user_try {
#ifdef CONFIG_X86_32
put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
put_user_ex(regs->es, (unsigned int __user *)&sc->es);
put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
unsafe_put_user(get_user_gs(regs),
(unsigned int __user *)&sc->gs, Efault);
unsafe_put_user(regs->fs, (unsigned int __user *)&sc->fs, Efault);
unsafe_put_user(regs->es, (unsigned int __user *)&sc->es, Efault);
unsafe_put_user(regs->ds, (unsigned int __user *)&sc->ds, Efault);
#endif /* CONFIG_X86_32 */
put_user_ex(regs->di, &sc->di);
put_user_ex(regs->si, &sc->si);
put_user_ex(regs->bp, &sc->bp);
put_user_ex(regs->sp, &sc->sp);
put_user_ex(regs->bx, &sc->bx);
put_user_ex(regs->dx, &sc->dx);
put_user_ex(regs->cx, &sc->cx);
put_user_ex(regs->ax, &sc->ax);
unsafe_put_user(regs->di, &sc->di, Efault);
unsafe_put_user(regs->si, &sc->si, Efault);
unsafe_put_user(regs->bp, &sc->bp, Efault);
unsafe_put_user(regs->sp, &sc->sp, Efault);
unsafe_put_user(regs->bx, &sc->bx, Efault);
unsafe_put_user(regs->dx, &sc->dx, Efault);
unsafe_put_user(regs->cx, &sc->cx, Efault);
unsafe_put_user(regs->ax, &sc->ax, Efault);
#ifdef CONFIG_X86_64
put_user_ex(regs->r8, &sc->r8);
put_user_ex(regs->r9, &sc->r9);
put_user_ex(regs->r10, &sc->r10);
put_user_ex(regs->r11, &sc->r11);
put_user_ex(regs->r12, &sc->r12);
put_user_ex(regs->r13, &sc->r13);
put_user_ex(regs->r14, &sc->r14);
put_user_ex(regs->r15, &sc->r15);
unsafe_put_user(regs->r8, &sc->r8, Efault);
unsafe_put_user(regs->r9, &sc->r9, Efault);
unsafe_put_user(regs->r10, &sc->r10, Efault);
unsafe_put_user(regs->r11, &sc->r11, Efault);
unsafe_put_user(regs->r12, &sc->r12, Efault);
unsafe_put_user(regs->r13, &sc->r13, Efault);
unsafe_put_user(regs->r14, &sc->r14, Efault);
unsafe_put_user(regs->r15, &sc->r15, Efault);
#endif /* CONFIG_X86_64 */
put_user_ex(current->thread.trap_nr, &sc->trapno);
put_user_ex(current->thread.error_code, &sc->err);
put_user_ex(regs->ip, &sc->ip);
unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
unsafe_put_user(current->thread.error_code, &sc->err, Efault);
unsafe_put_user(regs->ip, &sc->ip, Efault);
#ifdef CONFIG_X86_32
put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->sp, &sc->sp_at_signal);
put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
unsafe_put_user(regs->cs, (unsigned int __user *)&sc->cs, Efault);
unsafe_put_user(regs->flags, &sc->flags, Efault);
unsafe_put_user(regs->sp, &sc->sp_at_signal, Efault);
unsafe_put_user(regs->ss, (unsigned int __user *)&sc->ss, Efault);
#else /* !CONFIG_X86_32 */
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
put_user_ex(regs->ss, &sc->ss);
unsafe_put_user(regs->flags, &sc->flags, Efault);
unsafe_put_user(regs->cs, &sc->cs, Efault);
unsafe_put_user(0, &sc->gs, Efault);
unsafe_put_user(0, &sc->fs, Efault);
unsafe_put_user(regs->ss, &sc->ss, Efault);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate);
unsafe_put_user(fpstate, (unsigned long __user *)&sc->fpstate, Efault);
/* non-iBCS2 extensions.. */
put_user_ex(mask, &sc->oldmask);
put_user_ex(current->thread.cr2, &sc->cr2);
} put_user_catch(err);
return err;
/* non-iBCS2 extensions.. */
unsafe_put_user(mask, &sc->oldmask, Efault);
unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
return 0;
Efault:
return -EFAULT;
}
#define unsafe_put_sigcontext(sc, fp, regs, set, label) \
do { \
if (__unsafe_setup_sigcontext(sc, fp, regs, set->sig[0])) \
goto label; \
} while(0);
#define unsafe_put_sigmask(set, frame, label) \
unsafe_put_user(*(__u64 *)(set), \
(__u64 __user *)&(frame)->uc.uc_sigmask, \
label)
/*
* Set up a signal frame.
*/
@@ -312,26 +305,16 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
{
struct sigframe __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
void __user *fp = NULL;
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
if (!access_ok(frame, sizeof(*frame)))
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
if (__put_user(sig, &frame->sig))
return -EFAULT;
if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
return -EFAULT;
if (_NSIG_WORDS > 1) {
if (__copy_to_user(&frame->extramask, &set->sig[1],
sizeof(frame->extramask)))
return -EFAULT;
}
unsafe_put_user(sig, &frame->sig, Efault);
unsafe_put_sigcontext(&frame->sc, fp, regs, set, Efault);
unsafe_put_user(set->sig[1], &frame->extramask[0], Efault);
if (current->mm->context.vdso)
restorer = current->mm->context.vdso +
vdso_image_32.sym___kernel_sigreturn;
@@ -341,7 +324,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
restorer = ksig->ka.sa.sa_restorer;
/* Set up to return from userspace. */
err |= __put_user(restorer, &frame->pretcode);
unsafe_put_user(restorer, &frame->pretcode, Efault);
/*
* This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
@@ -350,10 +333,8 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
unsafe_put_user(*((u64 *)&retcode), (u64 *)frame->retcode, Efault);
user_access_end();
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
@@ -368,6 +349,10 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
regs->cs = __USER_CS;
return 0;
Efault:
user_access_end();
return -EFAULT;
}
static int __setup_rt_frame(int sig, struct ksignal *ksig,
@@ -375,50 +360,45 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
void __user *fp = NULL;
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
if (!access_ok(frame, sizeof(*frame)))
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
put_user_try {
put_user_ex(sig, &frame->sig);
put_user_ex(&frame->info, &frame->pinfo);
put_user_ex(&frame->uc, &frame->puc);
unsafe_put_user(sig, &frame->sig, Efault);
unsafe_put_user(&frame->info, &frame->pinfo, Efault);
unsafe_put_user(&frame->uc, &frame->puc, Efault);
/* Create the ucontext. */
if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Create the ucontext. */
if (static_cpu_has(X86_FEATURE_XSAVE))
unsafe_put_user(UC_FP_XSTATE, &frame->uc.uc_flags, Efault);
else
unsafe_put_user(0, &frame->uc.uc_flags, Efault);
unsafe_put_user(0, &frame->uc.uc_link, Efault);
unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
/* Set up to return from userspace. */
restorer = current->mm->context.vdso +
vdso_image_32.sym___kernel_rt_sigreturn;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
put_user_ex(restorer, &frame->pretcode);
/* Set up to return from userspace. */
restorer = current->mm->context.vdso +
vdso_image_32.sym___kernel_rt_sigreturn;
if (ksig->ka.sa.sa_flags & SA_RESTORER)
restorer = ksig->ka.sa.sa_restorer;
unsafe_put_user(restorer, &frame->pretcode, Efault);
/*
* This is movl $__NR_rt_sigreturn, %ax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
} put_user_catch(err);
/*
* This is movl $__NR_rt_sigreturn, %ax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
unsafe_put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode, Efault);
unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
unsafe_put_sigmask(set, frame, Efault);
user_access_end();
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
if (copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
/* Set up registers for signal handler */
@@ -434,6 +414,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->cs = __USER_CS;
return 0;
Efault:
user_access_end();
return -EFAULT;
}
#else /* !CONFIG_X86_32 */
static unsigned long frame_uc_flags(struct pt_regs *regs)
@@ -457,43 +440,34 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
struct rt_sigframe __user *frame;
void __user *fp = NULL;
unsigned long uc_flags;
int err = 0;
/* x86-64 should always use SA_RESTORER. */
if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
return -EFAULT;
frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
uc_flags = frame_uc_flags(regs);
if (!access_ok(frame, sizeof(*frame)))
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
/* Create the ucontext. */
unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
unsafe_put_user(0, &frame->uc.uc_link, Efault);
unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
unsafe_put_sigmask(set, frame, Efault);
user_access_end();
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
}
uc_flags = frame_uc_flags(regs);
put_user_try {
/* Create the ucontext. */
put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
err |= -EFAULT;
}
} put_user_catch(err);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
/* In case the signal handler was declared without prototypes */
@@ -530,6 +504,10 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
force_valid_ss(regs);
return 0;
Efault:
user_access_end();
return -EFAULT;
}
#endif /* CONFIG_X86_32 */
@@ -541,45 +519,34 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
struct rt_sigframe_x32 __user *frame;
unsigned long uc_flags;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
void __user *fp = NULL;
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(frame, sizeof(*frame)))
if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
return -EFAULT;
frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fp);
uc_flags = frame_uc_flags(regs);
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
/* Create the ucontext. */
unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
unsafe_put_user(0, &frame->uc.uc_link, Efault);
unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
unsafe_put_user(0, &frame->uc.uc__pad0, Efault);
restorer = ksig->ka.sa.sa_restorer;
unsafe_put_user(restorer, (unsigned long __user *)&frame->pretcode, Efault);
unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
unsafe_put_sigmask(set, frame, Efault);
user_access_end();
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
return -EFAULT;
}
uc_flags = frame_uc_flags(regs);
put_user_try {
/* Create the ucontext. */
put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
restorer = ksig->ka.sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, (unsigned long __user *)&frame->pretcode);
} put_user_catch(err);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
@@ -597,6 +564,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
#endif /* CONFIG_X86_X32_ABI */
return 0;
#ifdef CONFIG_X86_X32_ABI
Efault:
user_access_end();
return -EFAULT;
#endif
}
/*
@@ -613,9 +585,8 @@ SYSCALL_DEFINE0(sigreturn)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
&& __copy_from_user(&set.sig[1], &frame->extramask,
sizeof(frame->extramask))))
if (__get_user(set.sig[0], &frame->sc.oldmask) ||
__get_user(set.sig[1], &frame->extramask[0]))
goto badframe;
set_current_blocked(&set);
@@ -645,7 +616,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;
@@ -859,7 +830,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
asmlinkage long sys32_x32_rt_sigreturn(void)
COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
@@ -870,7 +841,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
goto badframe;
if (__get_user(uc_flags, &frame->uc.uc_flags))
goto badframe;

View File

@@ -147,6 +147,8 @@ static inline void smpboot_restore_warm_reset_vector(void)
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
static void init_freq_invariance(void);
/*
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
@@ -183,6 +185,8 @@ static void smp_callin(void)
*/
set_cpu_sibling_map(raw_smp_processor_id());
init_freq_invariance();
/*
* Get our bogomips.
* Update loops_per_jiffy in cpu_data. Previous call to
@@ -466,7 +470,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
*/
static const struct x86_cpu_id snc_cpu[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
{}
};
@@ -1337,7 +1341,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
init_freq_invariance();
smp_sanity_check();
switch (apic_intr_mode) {
@@ -1434,7 +1438,7 @@ early_param("possible_cpus", _setup_possible_cpus);
/*
* cpu_possible_mask should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
* are allocated by some modules at init time, and dont expect to
* are allocated by some modules at init time, and don't expect to
* do this dynamically on cpu arrival/departure.
* cpu_present_mask on the other hand can change dynamically.
* In case when cpu_hotplug is not compiled, then we resort to current
@@ -1764,3 +1768,287 @@ void native_play_dead(void)
}
#endif
/*
* APERF/MPERF frequency ratio computation.
*
* The scheduler wants to do frequency invariant accounting and needs a <1
* ratio to account for the 'current' frequency, corresponding to
* freq_curr / freq_max.
*
* Since the frequency freq_curr on x86 is controlled by micro-controller and
* our P-state setting is little more than a request/hint, we need to observe
* the effective frequency 'BusyMHz', i.e. the average frequency over a time
* interval after discarding idle time. This is given by:
*
* BusyMHz = delta_APERF / delta_MPERF * freq_base
*
* where freq_base is the max non-turbo P-state.
*
* The freq_max term has to be set to a somewhat arbitrary value, because we
* can't know which turbo states will be available at a given point in time:
* it all depends on the thermal headroom of the entire package. We set it to
* the turbo level with 4 cores active.
*
* Benchmarks show that's a good compromise between the 1C turbo ratio
* (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
* which would ignore the entire turbo range (a conspicuous part, making
* freq_curr/freq_max always maxed out).
*
* An exception to the heuristic above is the Atom uarch, where we choose the
* highest turbo level for freq_max since Atom's are generally oriented towards
* power efficiency.
*
* Setting freq_max to anything less than the 1C turbo ratio makes the ratio
* freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
*/
DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
static DEFINE_PER_CPU(u64, arch_prev_aperf);
static DEFINE_PER_CPU(u64, arch_prev_mperf);
static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
void arch_set_max_freq_ratio(bool turbo_disabled)
{
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
arch_turbo_freq_ratio;
}
static bool turbo_disabled(void)
{
u64 misc_en;
int err;
err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
if (err)
return false;
return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
}
static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
int err;
err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
if (err)
return false;
err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
if (err)
return false;
*base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
*turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
return true;
}
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#define ICPU(model) \
{X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
ICPU(INTEL_FAM6_XEON_PHI_KNL),
ICPU(INTEL_FAM6_XEON_PHI_KNM),
{}
};
static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
ICPU(INTEL_FAM6_SKYLAKE_X),
{}
};
static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
ICPU(INTEL_FAM6_ATOM_GOLDMONT),
ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
{}
};
static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
int num_delta_fratio)
{
int fratio, delta_fratio, found;
int err, i;
u64 msr;
if (!x86_match_cpu(has_knl_turbo_ratio_limits))
return false;
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
if (err)
return false;
fratio = (msr >> 8) & 0xFF;
i = 16;
found = 0;
do {
if (found >= num_delta_fratio) {
*turbo_freq = fratio;
return true;
}
delta_fratio = (msr >> (i + 5)) & 0x7;
if (delta_fratio) {
found += 1;
fratio -= delta_fratio;
}
i += 8;
} while (i < 64);
return true;
}
static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
{
u64 ratios, counts;
u32 group_size;
int err, i;
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
if (err)
return false;
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
if (err)
return false;
for (i = 0; i < 64; i += 8) {
group_size = (counts >> i) & 0xFF;
if (group_size >= size) {
*turbo_freq = (ratios >> i) & 0xFF;
return true;
}
}
return false;
}
static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
int err;
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
if (err)
return false;
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_freq);
if (err)
return false;
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
*turbo_freq = (*turbo_freq >> 24) & 0xFF; /* 4C turbo */
return true;
}
static bool intel_set_max_freq_ratio(void)
{
u64 base_freq, turbo_freq;
if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
goto out;
if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
if (knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
goto out;
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
goto out;
if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
goto out;
return false;
out:
arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
base_freq);
arch_set_max_freq_ratio(turbo_disabled());
return true;
}
static void init_counter_refs(void *arg)
{
u64 aperf, mperf;
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
this_cpu_write(arch_prev_aperf, aperf);
this_cpu_write(arch_prev_mperf, mperf);
}
static void init_freq_invariance(void)
{
bool ret = false;
if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = intel_set_max_freq_ratio();
if (ret) {
on_each_cpu(init_counter_refs, NULL, 1);
static_branch_enable(&arch_scale_freq_key);
} else {
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
}
}
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
void arch_scale_freq_tick(void)
{
u64 freq_scale;
u64 aperf, mperf;
u64 acnt, mcnt;
if (!arch_scale_freq_invariant())
return;
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
acnt = aperf - this_cpu_read(arch_prev_aperf);
mcnt = mperf - this_cpu_read(arch_prev_mperf);
if (!mcnt)
return;
this_cpu_write(arch_prev_aperf, aperf);
this_cpu_write(arch_prev_mperf, mperf);
acnt <<= 2*SCHED_CAPACITY_SHIFT;
mcnt *= arch_max_freq_ratio;
freq_scale = div64_u64(acnt, mcnt);
if (freq_scale > SCHED_CAPACITY_SCALE)
freq_scale = SCHED_CAPACITY_SCALE;
this_cpu_write(arch_freq_scale, freq_scale);
}

View File

@@ -96,7 +96,8 @@ struct stack_frame_user {
};
static int
copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
copy_stack_frame(const struct stack_frame_user __user *fp,
struct stack_frame_user *frame)
{
int ret;
@@ -105,7 +106,8 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
ret = 1;
pagefault_disable();
if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
if (__get_user(frame->next_fp, &fp->next_fp) ||
__get_user(frame->ret_addr, &fp->ret_addr))
ret = 0;
pagefault_enable();

255
arch/x86/kernel/sys_ia32.c Normal file
View File

@@ -0,0 +1,255 @@
// SPDX-License-Identifier: GPL-2.0
/*
* sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
* sys_sparc32
*
* Copyright (C) 2000 VA Linux Co
* Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
* Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
*
* These routines maintain argument size conversion between 32bit and 64bit
* environment. In 2.5 most of this should be moved to a generic directory.
*
* This file assumes that there is a hole at the end of user address space.
*
* Some of the functions are LE specific currently. These are
* hopefully all marked. This should be fixed.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/signal.h>
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/utsname.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
#include <linux/rwsem.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
#include <linux/highuid.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <linux/sched/task.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <asm/vgtod.h>
#include <asm/ia32.h>
#define AA(__x) ((unsigned long)(__x))
SYSCALL_DEFINE3(ia32_truncate64, const char __user *, filename,
unsigned long, offset_low, unsigned long, offset_high)
{
return ksys_truncate(filename,
((loff_t) offset_high << 32) | offset_low);
}
SYSCALL_DEFINE3(ia32_ftruncate64, unsigned int, fd,
unsigned long, offset_low, unsigned long, offset_high)
{
return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
/* warning: next two assume little endian */
SYSCALL_DEFINE5(ia32_pread64, unsigned int, fd, char __user *, ubuf,
u32, count, u32, poslo, u32, poshi)
{
return ksys_pread64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
SYSCALL_DEFINE5(ia32_pwrite64, unsigned int, fd, const char __user *, ubuf,
u32, count, u32, poslo, u32, poshi)
{
return ksys_pwrite64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
/*
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
SYSCALL_DEFINE6(ia32_fadvise64_64, int, fd, __u32, offset_low,
__u32, offset_high, __u32, len_low, __u32, len_high,
int, advice)
{
return ksys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
(((u64)len_high)<<32) | len_low,
advice);
}
SYSCALL_DEFINE4(ia32_readahead, int, fd, unsigned int, off_lo,
unsigned int, off_hi, size_t, count)
{
return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
SYSCALL_DEFINE6(ia32_sync_file_range, int, fd, unsigned int, off_low,
unsigned int, off_hi, unsigned int, n_low,
unsigned int, n_hi, int, flags)
{
return ksys_sync_file_range(fd,
((u64)off_hi << 32) | off_low,
((u64)n_hi << 32) | n_low, flags);
}
SYSCALL_DEFINE5(ia32_fadvise64, int, fd, unsigned int, offset_lo,
unsigned int, offset_hi, size_t, len, int, advice)
{
return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
SYSCALL_DEFINE6(ia32_fallocate, int, fd, int, mode,
unsigned int, offset_lo, unsigned int, offset_hi,
unsigned int, len_lo, unsigned int, len_hi)
{
return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
#ifdef CONFIG_IA32_EMULATION
/*
* Another set for IA32/LFS -- x86_64 struct stat is different due to
* support for 64bit inode numbers.
*/
static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
{
typeof(ubuf->st_uid) uid = 0;
typeof(ubuf->st_gid) gid = 0;
SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
if (!access_ok(ubuf, sizeof(struct stat64)) ||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
__put_user(stat->ino, &ubuf->__st_ino) ||
__put_user(stat->ino, &ubuf->st_ino) ||
__put_user(stat->mode, &ubuf->st_mode) ||
__put_user(stat->nlink, &ubuf->st_nlink) ||
__put_user(uid, &ubuf->st_uid) ||
__put_user(gid, &ubuf->st_gid) ||
__put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
__put_user(stat->size, &ubuf->st_size) ||
__put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
__put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
__put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
__put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
__put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
__put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
__put_user(stat->blksize, &ubuf->st_blksize) ||
__put_user(stat->blocks, &ubuf->st_blocks))
return -EFAULT;
return 0;
}
COMPAT_SYSCALL_DEFINE2(ia32_stat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE2(ia32_lstat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE2(ia32_fstat64, unsigned int, fd,
struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE4(ia32_fstatat64, unsigned int, dfd,
const char __user *, filename,
struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_stat64(statbuf, &stat);
}
/*
* Linux/i386 didn't use to be able to handle more than
* 4 system call parameters, so these system calls used a memory
* block for parameter passing..
*/
struct mmap_arg_struct32 {
unsigned int addr;
unsigned int len;
unsigned int prot;
unsigned int flags;
unsigned int fd;
unsigned int offset;
};
COMPAT_SYSCALL_DEFINE1(ia32_mmap, struct mmap_arg_struct32 __user *, arg)
{
struct mmap_arg_struct32 a;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
if (a.offset & ~PAGE_MASK)
return -EINVAL;
return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset>>PAGE_SHIFT);
}
/*
* The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
*/
COMPAT_SYSCALL_DEFINE5(ia32_clone, unsigned long, clone_flags,
unsigned long, newsp, int __user *, parent_tidptr,
unsigned long, tls_val, int __user *, child_tidptr)
{
struct kernel_clone_args args = {
.flags = (clone_flags & ~CSIGNAL),
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
.exit_signal = (clone_flags & CSIGNAL),
.stack = newsp,
.tls = tls_val,
};
if (!legacy_clone_args_valid(&args))
return -EINVAL;
return _do_fork(&args);
}
#endif /* CONFIG_IA32_EMULATION */

View File

@@ -21,7 +21,6 @@
#include <asm/elf.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.

View File

@@ -62,19 +62,16 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
.name = "timer"
};
static void __init setup_default_timer_irq(void)
{
unsigned long flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER;
/*
* Unconditionally register the legacy timer; even without legacy
* PIC/PIT we need this for the HPET0 in legacy replacement mode.
* Unconditionally register the legacy timer interrupt; even
* without legacy PIC/PIT we need this for the HPET0 in legacy
* replacement mode.
*/
if (setup_irq(0, &irq0))
if (request_irq(0, timer_interrupt, flags, "timer", NULL))
pr_info("Failed to register legacy timer interrupt\n");
}
@@ -122,18 +119,12 @@ void __init time_init(void)
*/
void clocksource_arch_init(struct clocksource *cs)
{
if (cs->archdata.vclock_mode == VCLOCK_NONE)
if (cs->vdso_clock_mode == VDSO_CLOCKMODE_NONE)
return;
if (cs->archdata.vclock_mode > VCLOCK_MAX) {
pr_warn("clocksource %s registered with invalid vclock_mode %d. Disabling vclock.\n",
cs->name, cs->archdata.vclock_mode);
cs->archdata.vclock_mode = VCLOCK_NONE;
}
if (cs->mask != CLOCKSOURCE_MASK(64)) {
pr_warn("clocksource %s registered with invalid mask %016llx. Disabling vclock.\n",
pr_warn("clocksource %s registered with invalid mask %016llx for VDSO. Disabling VDSO support.\n",
cs->name, cs->mask);
cs->archdata.vclock_mode = VCLOCK_NONE;
cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
}
}

View File

@@ -59,39 +59,29 @@ __setup("cpu0_hotplug", enable_cpu0_hotplug);
*/
int _debug_hotplug_cpu(int cpu, int action)
{
struct device *dev = get_cpu_device(cpu);
int ret;
if (!cpu_is_hotpluggable(cpu))
return -EINVAL;
lock_device_hotplug();
switch (action) {
case 0:
ret = cpu_down(cpu);
if (!ret) {
ret = remove_cpu(cpu);
if (!ret)
pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
dev->offline = true;
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
} else
else
pr_debug("Can't offline CPU%d.\n", cpu);
break;
case 1:
ret = cpu_up(cpu);
if (!ret) {
dev->offline = false;
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
} else {
ret = add_cpu(cpu);
if (ret)
pr_debug("Can't online CPU%d.\n", cpu);
}
break;
default:
ret = -EINVAL;
}
unlock_device_hotplug();
return ret;
}

View File

@@ -46,6 +46,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
@@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
{
struct task_struct *tsk = current;
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
return;
@@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check)
#undef IP
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
{
char *str = "alignment check";
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
return;
if (!user_mode(regs))
die("Split lock detected\n", regs, error_code);
local_irq_enable();
if (handle_user_split_lock(regs, error_code))
return;
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
error_code, BUS_ADRALN, NULL);
}
#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
@@ -572,14 +592,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
return;
/*
* Use ist_enter despite the fact that we don't use an IST stack.
* We can be called from a kprobe in non-CONTEXT_KERNEL kernel
* mode or even during context tracking state changes.
* Unlike any other non-IST entry, we can be called from a kprobe in
* non-CONTEXT_KERNEL kernel mode or even during context tracking
* state changes. Make sure that we wake up RCU even if we're coming
* from kernel code.
*
* This means that we can't schedule. That's okay.
* This means that we can't schedule even if we came from a
* preemptible kernel context. That's okay.
*/
ist_enter(regs);
if (!user_mode(regs)) {
rcu_nmi_enter();
preempt_disable();
}
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
@@ -600,7 +626,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
cond_local_irq_disable(regs);
exit:
ist_exit(regs);
if (!user_mode(regs)) {
preempt_enable_no_resched();
rcu_nmi_exit();
}
}
NOKPROBE_SYMBOL(do_int3);
@@ -862,7 +891,25 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
cond_local_irq_enable(regs);
/*
* This addresses a Pentium Pro Erratum:
*
* PROBLEM: If the APIC subsystem is configured in mixed mode with
* Virtual Wire mode implemented through the local APIC, an
* interrupt vector of 0Fh (Intel reserved encoding) may be
* generated by the local APIC (Int 15). This vector may be
* generated upon receipt of a spurious interrupt (an interrupt
* which is removed before the system receives the INTA sequence)
* instead of the programmed 8259 spurious interrupt vector.
*
* IMPLICATION: The spurious interrupt vector programmed in the
* 8259 is normally handled by an operating system's spurious
* interrupt handler. However, a vector of 0Fh is unknown to some
* operating systems, which would crash if this erratum occurred.
*
* In theory this could be limited to 32bit, but the handler is not
* hurting and who knows which other CPUs suffer from this.
*/
}
dotraplinkage void

View File

@@ -477,7 +477,7 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* transition from one expected value to another with a fairly
* high accuracy, and we didn't miss any events. We can thus
* use the TSC value at the transitions to calculate a pretty
* good value for the TSC frequencty.
* good value for the TSC frequency.
*/
static inline int pit_verify_msb(unsigned char val)
{
@@ -1108,17 +1108,24 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
sched_clock_tick_stable();
}
static int tsc_cs_enable(struct clocksource *cs)
{
vclocks_set_used(VDSO_CLOCKMODE_TSC);
return 0;
}
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
static struct clocksource clocksource_tsc_early = {
.name = "tsc-early",
.rating = 299,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
.name = "tsc-early",
.rating = 299,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
@@ -1131,14 +1138,15 @@ static struct clocksource clocksource_tsc_early = {
* been found good.
*/
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,

View File

@@ -15,18 +15,46 @@
#include <asm/param.h>
#include <asm/tsc.h>
#define MAX_NUM_FREQS 9
#define MAX_NUM_FREQS 16 /* 4 bits to select the frequency */
/*
* The frequency numbers in the SDM are e.g. 83.3 MHz, which does not contain a
* lot of accuracy which leads to clock drift. As far as we know Bay Trail SoCs
* use a 25 MHz crystal and Cherry Trail uses a 19.2 MHz crystal, the crystal
* is the source clk for a root PLL which outputs 1600 and 100 MHz. It is
* unclear if the root PLL outputs are used directly by the CPU clock PLL or
* if there is another PLL in between.
* This does not matter though, we can model the chain of PLLs as a single PLL
* with a quotient equal to the quotients of all PLLs in the chain multiplied.
* So we can create a simplified model of the CPU clock setup using a reference
* clock of 100 MHz plus a quotient which gets us as close to the frequency
* from the SDM as possible.
* For the 83.3 MHz example from above this would give us 100 MHz * 5 / 6 =
* 83 and 1/3 MHz, which matches exactly what has been measured on actual hw.
*/
#define TSC_REFERENCE_KHZ 100000
struct muldiv {
u32 multiplier;
u32 divider;
};
/*
* If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
* read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
* Unfortunately some Intel Atom SoCs aren't quite compliant to this,
* so we need manually differentiate SoC families. This is what the
* field msr_plat does.
* field use_msr_plat does.
*/
struct freq_desc {
u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
bool use_msr_plat;
struct muldiv muldiv[MAX_NUM_FREQS];
/*
* Some CPU frequencies in the SDM do not map to known PLL freqs, in
* that case the muldiv array is empty and the freqs array is used.
*/
u32 freqs[MAX_NUM_FREQS];
u32 mask;
};
/*
@@ -35,41 +63,91 @@ struct freq_desc {
* by MSR based on SDM.
*/
static const struct freq_desc freq_desc_pnw = {
0, { 0, 0, 0, 0, 0, 99840, 0, 83200 }
.use_msr_plat = false,
.freqs = { 0, 0, 0, 0, 0, 99840, 0, 83200 },
.mask = 0x07,
};
static const struct freq_desc freq_desc_clv = {
0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 }
.use_msr_plat = false,
.freqs = { 0, 133200, 0, 0, 0, 99840, 0, 83200 },
.mask = 0x07,
};
/*
* Bay Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
* 000: 100 * 5 / 6 = 83.3333 MHz
* 001: 100 * 1 / 1 = 100.0000 MHz
* 010: 100 * 4 / 3 = 133.3333 MHz
* 011: 100 * 7 / 6 = 116.6667 MHz
* 100: 100 * 4 / 5 = 80.0000 MHz
*/
static const struct freq_desc freq_desc_byt = {
1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 }
.use_msr_plat = true,
.muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 },
{ 4, 5 } },
.mask = 0x07,
};
/*
* Cherry Trail SDM MSR_FSB_FREQ frequencies simplified PLL model:
* 0000: 100 * 5 / 6 = 83.3333 MHz
* 0001: 100 * 1 / 1 = 100.0000 MHz
* 0010: 100 * 4 / 3 = 133.3333 MHz
* 0011: 100 * 7 / 6 = 116.6667 MHz
* 0100: 100 * 4 / 5 = 80.0000 MHz
* 0101: 100 * 14 / 15 = 93.3333 MHz
* 0110: 100 * 9 / 10 = 90.0000 MHz
* 0111: 100 * 8 / 9 = 88.8889 MHz
* 1000: 100 * 7 / 8 = 87.5000 MHz
*/
static const struct freq_desc freq_desc_cht = {
1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 }
.use_msr_plat = true,
.muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 },
{ 4, 5 }, { 14, 15 }, { 9, 10 }, { 8, 9 },
{ 7, 8 } },
.mask = 0x0f,
};
/*
* Merriefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
* 0001: 100 * 1 / 1 = 100.0000 MHz
* 0010: 100 * 4 / 3 = 133.3333 MHz
*/
static const struct freq_desc freq_desc_tng = {
1, { 0, 100000, 133300, 0, 0, 0, 0, 0 }
.use_msr_plat = true,
.muldiv = { { 0, 0 }, { 1, 1 }, { 4, 3 } },
.mask = 0x07,
};
/*
* Moorefield SDM MSR_FSB_FREQ frequencies simplified PLL model:
* 0000: 100 * 5 / 6 = 83.3333 MHz
* 0001: 100 * 1 / 1 = 100.0000 MHz
* 0010: 100 * 4 / 3 = 133.3333 MHz
* 0011: 100 * 1 / 1 = 100.0000 MHz
*/
static const struct freq_desc freq_desc_ann = {
1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
.use_msr_plat = true,
.muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 1, 1 } },
.mask = 0x0f,
};
/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
static const struct freq_desc freq_desc_lgm = {
1, { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }
.use_msr_plat = true,
.freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
.mask = 0x0f,
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
INTEL_CPU_FAM6(ATOM_AIRMONT_NP, freq_desc_lgm),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID, &freq_desc_pnw),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_TABLET,&freq_desc_clv),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &freq_desc_byt),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &freq_desc_tng),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &freq_desc_cht),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &freq_desc_ann),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_NP, &freq_desc_lgm),
{}
};
@@ -81,17 +159,19 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
*/
unsigned long cpu_khz_from_msr(void)
{
u32 lo, hi, ratio, freq;
u32 lo, hi, ratio, freq, tscref;
const struct freq_desc *freq_desc;
const struct x86_cpu_id *id;
const struct muldiv *md;
unsigned long res;
int index;
id = x86_match_cpu(tsc_msr_cpu_ids);
if (!id)
return 0;
freq_desc = (struct freq_desc *)id->driver_data;
if (freq_desc->msr_plat) {
if (freq_desc->use_msr_plat) {
rdmsr(MSR_PLATFORM_INFO, lo, hi);
ratio = (lo >> 8) & 0xff;
} else {
@@ -101,12 +181,28 @@ unsigned long cpu_khz_from_msr(void)
/* Get FSB FREQ ID */
rdmsr(MSR_FSB_FREQ, lo, hi);
index = lo & freq_desc->mask;
md = &freq_desc->muldiv[index];
/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
freq = freq_desc->freqs[lo & 0x7];
/*
* Note this also catches cases where the index points to an unpopulated
* part of muldiv, in that case the else will set freq and res to 0.
*/
if (md->divider) {
tscref = TSC_REFERENCE_KHZ * md->multiplier;
freq = DIV_ROUND_CLOSEST(tscref, md->divider);
/*
* Multiplying by ratio before the division has better
* accuracy than just calculating freq * ratio.
*/
res = DIV_ROUND_CLOSEST(tscref * ratio, md->divider);
} else {
freq = freq_desc->freqs[index];
res = freq * ratio;
}
/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
res = freq * ratio;
if (freq == 0)
pr_err("Error MSR_FSB_FREQ index %d is unknown\n", index);
#ifdef CONFIG_X86_LOCAL_APIC
lapic_timer_period = (freq * 1000) / HZ;

View File

@@ -295,7 +295,7 @@ static cycles_t check_tsc_warp(unsigned int timeout)
* But as the TSC is per-logical CPU and can potentially be modified wrongly
* by the bios, TSC sync test for smaller duration should be able
* to catch such errors. Also this will catch the condition where all the
* cores in the socket doesn't get reset at the same time.
* cores in the socket don't get reset at the same time.
*/
static inline unsigned int loop_timeout(int cpu)
{

View File

@@ -98,7 +98,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
long err = 0;
/*
* This gets called from entry.S with interrupts disabled, but
@@ -114,37 +113,30 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
if (!access_ok(user, vm86->vm86plus.is_vm86pus ?
if (!user_access_begin(user, vm86->vm86plus.is_vm86pus ?
sizeof(struct vm86plus_struct) :
sizeof(struct vm86_struct))) {
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
}
sizeof(struct vm86_struct)))
goto Efault;
put_user_try {
put_user_ex(regs->pt.bx, &user->regs.ebx);
put_user_ex(regs->pt.cx, &user->regs.ecx);
put_user_ex(regs->pt.dx, &user->regs.edx);
put_user_ex(regs->pt.si, &user->regs.esi);
put_user_ex(regs->pt.di, &user->regs.edi);
put_user_ex(regs->pt.bp, &user->regs.ebp);
put_user_ex(regs->pt.ax, &user->regs.eax);
put_user_ex(regs->pt.ip, &user->regs.eip);
put_user_ex(regs->pt.cs, &user->regs.cs);
put_user_ex(regs->pt.flags, &user->regs.eflags);
put_user_ex(regs->pt.sp, &user->regs.esp);
put_user_ex(regs->pt.ss, &user->regs.ss);
put_user_ex(regs->es, &user->regs.es);
put_user_ex(regs->ds, &user->regs.ds);
put_user_ex(regs->fs, &user->regs.fs);
put_user_ex(regs->gs, &user->regs.gs);
unsafe_put_user(regs->pt.bx, &user->regs.ebx, Efault_end);
unsafe_put_user(regs->pt.cx, &user->regs.ecx, Efault_end);
unsafe_put_user(regs->pt.dx, &user->regs.edx, Efault_end);
unsafe_put_user(regs->pt.si, &user->regs.esi, Efault_end);
unsafe_put_user(regs->pt.di, &user->regs.edi, Efault_end);
unsafe_put_user(regs->pt.bp, &user->regs.ebp, Efault_end);
unsafe_put_user(regs->pt.ax, &user->regs.eax, Efault_end);
unsafe_put_user(regs->pt.ip, &user->regs.eip, Efault_end);
unsafe_put_user(regs->pt.cs, &user->regs.cs, Efault_end);
unsafe_put_user(regs->pt.flags, &user->regs.eflags, Efault_end);
unsafe_put_user(regs->pt.sp, &user->regs.esp, Efault_end);
unsafe_put_user(regs->pt.ss, &user->regs.ss, Efault_end);
unsafe_put_user(regs->es, &user->regs.es, Efault_end);
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
} put_user_catch(err);
if (err) {
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
}
user_access_end();
preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
@@ -159,6 +151,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
lazy_load_gs(vm86->regs32.gs);
regs->pt.ax = retval;
return;
Efault_end:
user_access_end();
Efault:
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
}
static void mark_screen_rdonly(struct mm_struct *mm)
@@ -243,6 +242,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
struct kernel_vm86_regs vm86regs;
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;
struct vm86_struct v;
err = security_mmap_addr(0);
if (err) {
@@ -278,39 +278,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
if (vm86->saved_sp0)
return -EPERM;
if (!access_ok(user_vm86, plus ?
sizeof(struct vm86_struct) :
sizeof(struct vm86plus_struct)))
if (copy_from_user(&v, user_vm86,
offsetof(struct vm86_struct, int_revectored)))
return -EFAULT;
memset(&vm86regs, 0, sizeof(vm86regs));
get_user_try {
unsigned short seg;
get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
get_user_ex(seg, &user_vm86->regs.cs);
vm86regs.pt.cs = seg;
get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
get_user_ex(seg, &user_vm86->regs.ss);
vm86regs.pt.ss = seg;
get_user_ex(vm86regs.es, &user_vm86->regs.es);
get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
get_user_ex(vm86->flags, &user_vm86->flags);
get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
} get_user_catch(err);
if (err)
return err;
vm86regs.pt.bx = v.regs.ebx;
vm86regs.pt.cx = v.regs.ecx;
vm86regs.pt.dx = v.regs.edx;
vm86regs.pt.si = v.regs.esi;
vm86regs.pt.di = v.regs.edi;
vm86regs.pt.bp = v.regs.ebp;
vm86regs.pt.ax = v.regs.eax;
vm86regs.pt.ip = v.regs.eip;
vm86regs.pt.cs = v.regs.cs;
vm86regs.pt.flags = v.regs.eflags;
vm86regs.pt.sp = v.regs.esp;
vm86regs.pt.ss = v.regs.ss;
vm86regs.es = v.regs.es;
vm86regs.ds = v.regs.ds;
vm86regs.fs = v.regs.fs;
vm86regs.gs = v.regs.gs;
vm86->flags = v.flags;
vm86->screen_bitmap = v.screen_bitmap;
vm86->cpu_type = v.cpu_type;
if (copy_from_user(&vm86->int_revectored,
&user_vm86->int_revectored,

View File

@@ -21,6 +21,7 @@
#define LOAD_OFFSET __START_KERNEL_map
#endif
#define RUNTIME_DISCARD_EXIT
#define EMITS_PT_NOTE
#define RO_EXCEPTION_TABLE_ALIGN 16
@@ -313,8 +314,8 @@ SECTIONS
. = ALIGN(8);
/*
* .exit.text is discard at runtime, not link time, to deal with
* references from .altinstructions and .eh_frame
* .exit.text is discarded at runtime, not link time, to deal with
* references from .altinstructions
*/
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
EXIT_TEXT
@@ -412,9 +413,6 @@ SECTIONS
DWARF_DEBUG
DISCARDS
/DISCARD/ : {
*(.eh_frame)
}
}