Merge commit 'v3.7-rc1' into stable/for-linus-3.7
* commit 'v3.7-rc1': (10892 commits) Linux 3.7-rc1 x86, boot: Explicitly include autoconf.h for hostprogs perf: Fix UAPI fallout ARM: config: make sure that platforms are ordered by option string ARM: config: sort select statements alphanumerically UAPI: (Scripted) Disintegrate include/linux/byteorder UAPI: (Scripted) Disintegrate include/linux UAPI: Unexport linux/blk_types.h UAPI: Unexport part of linux/ppp-comp.h perf: Handle new rbtree implementation procfs: don't need a PATH_MAX allocation to hold a string representation of an int vfs: embed struct filename inside of names_cache allocation if possible audit: make audit_inode take struct filename vfs: make path_openat take a struct filename pointer vfs: turn do_path_lookup into wrapper around struct filename variant audit: allow audit code to satisfy getname requests from its names_list vfs: define struct filename and have getname() return it btrfs: Fix compilation with user namespace support enabled userns: Fix posix_acl_file_xattr_userns gid conversion userns: Properly print bluetooth socket uids ...
This commit is contained in:
@@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
|
||||
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
obj-y += probe_roms.o
|
||||
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
|
||||
obj-$(CONFIG_X86_32) += i386_ksyms_32.o
|
||||
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||
obj-y += syscall_$(BITS).o
|
||||
obj-$(CONFIG_X86_64) += vsyscall_64.o
|
||||
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
|
||||
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
|
||||
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
||||
|
||||
obj-$(CONFIG_KVM_GUEST) += kvm.o
|
||||
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
|
||||
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
|
||||
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
|
||||
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
|
||||
@@ -100,6 +99,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
|
||||
obj-$(CONFIG_OF) += devicetree.o
|
||||
obj-$(CONFIG_UPROBES) += uprobes.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
|
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
|
||||
acpi_register_lapic(physid, ACPI_MADT_ENABLED);
|
||||
|
||||
/*
|
||||
* If mp_register_lapic successfully generates a new logical cpu
|
||||
* If acpi_register_lapic successfully generates a new logical cpu
|
||||
* number, then the following will get us exactly what was mapped
|
||||
*/
|
||||
cpumask_andnot(new_map, cpu_present_mask, tmp_map);
|
||||
|
@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void)
|
||||
|
||||
header->video_mode = saved_video_mode;
|
||||
|
||||
header->pmode_behavior = 0;
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
store_gdt((struct desc_ptr *)&header->pmode_gdt);
|
||||
|
||||
if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
|
||||
&header->pmode_efer_high))
|
||||
header->pmode_efer_low = header->pmode_efer_high = 0;
|
||||
if (!rdmsr_safe(MSR_EFER,
|
||||
&header->pmode_efer_low,
|
||||
&header->pmode_efer_high))
|
||||
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
|
||||
#endif /* !CONFIG_64BIT */
|
||||
|
||||
header->pmode_cr0 = read_cr0();
|
||||
header->pmode_cr4 = read_cr4_safe();
|
||||
header->pmode_behavior = 0;
|
||||
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
|
||||
header->pmode_cr4 = read_cr4();
|
||||
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
|
||||
}
|
||||
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
|
||||
&header->pmode_misc_en_low,
|
||||
&header->pmode_misc_en_high))
|
||||
|
@@ -23,19 +23,6 @@
|
||||
|
||||
#define MAX_PATCH_LEN (255-1)
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int smp_alt_once;
|
||||
|
||||
static int __init bootonly(char *str)
|
||||
{
|
||||
smp_alt_once = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("smp-alt-boot", bootonly);
|
||||
#else
|
||||
#define smp_alt_once 1
|
||||
#endif
|
||||
|
||||
static int __initdata_or_module debug_alternative;
|
||||
|
||||
static int __init debug_alt(char *str)
|
||||
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
|
||||
/* turn DS segment override prefix into lock prefix */
|
||||
if (*ptr == 0x3e)
|
||||
text_poke(ptr, ((unsigned char []){0xf0}), 1);
|
||||
};
|
||||
}
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
|
||||
{
|
||||
const s32 *poff;
|
||||
|
||||
if (noreplace_smp)
|
||||
return;
|
||||
|
||||
mutex_lock(&text_mutex);
|
||||
for (poff = start; poff < end; poff++) {
|
||||
u8 *ptr = (u8 *)poff + *poff;
|
||||
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
|
||||
/* turn lock prefix into DS segment override prefix */
|
||||
if (*ptr == 0xf0)
|
||||
text_poke(ptr, ((unsigned char []){0x3E}), 1);
|
||||
};
|
||||
}
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
@@ -359,7 +343,7 @@ struct smp_alt_module {
|
||||
};
|
||||
static LIST_HEAD(smp_alt_modules);
|
||||
static DEFINE_MUTEX(smp_alt);
|
||||
static int smp_mode = 1; /* protected by smp_alt */
|
||||
static bool uniproc_patched = false; /* protected by smp_alt */
|
||||
|
||||
void __init_or_module alternatives_smp_module_add(struct module *mod,
|
||||
char *name,
|
||||
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
|
||||
{
|
||||
struct smp_alt_module *smp;
|
||||
|
||||
if (noreplace_smp)
|
||||
return;
|
||||
mutex_lock(&smp_alt);
|
||||
if (!uniproc_patched)
|
||||
goto unlock;
|
||||
|
||||
if (smp_alt_once) {
|
||||
if (boot_cpu_has(X86_FEATURE_UP))
|
||||
alternatives_smp_unlock(locks, locks_end,
|
||||
text, text_end);
|
||||
return;
|
||||
}
|
||||
if (num_possible_cpus() == 1)
|
||||
/* Don't bother remembering, we'll never have to undo it. */
|
||||
goto smp_unlock;
|
||||
|
||||
smp = kzalloc(sizeof(*smp), GFP_KERNEL);
|
||||
if (NULL == smp)
|
||||
return; /* we'll run the (safe but slow) SMP code then ... */
|
||||
/* we'll run the (safe but slow) SMP code then ... */
|
||||
goto unlock;
|
||||
|
||||
smp->mod = mod;
|
||||
smp->name = name;
|
||||
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
|
||||
__func__, smp->locks, smp->locks_end,
|
||||
smp->text, smp->text_end, smp->name);
|
||||
|
||||
mutex_lock(&smp_alt);
|
||||
list_add_tail(&smp->next, &smp_alt_modules);
|
||||
if (boot_cpu_has(X86_FEATURE_UP))
|
||||
alternatives_smp_unlock(smp->locks, smp->locks_end,
|
||||
smp->text, smp->text_end);
|
||||
smp_unlock:
|
||||
alternatives_smp_unlock(locks, locks_end, text, text_end);
|
||||
unlock:
|
||||
mutex_unlock(&smp_alt);
|
||||
}
|
||||
|
||||
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
|
||||
{
|
||||
struct smp_alt_module *item;
|
||||
|
||||
if (smp_alt_once || noreplace_smp)
|
||||
return;
|
||||
|
||||
mutex_lock(&smp_alt);
|
||||
list_for_each_entry(item, &smp_alt_modules, next) {
|
||||
if (mod != item->mod)
|
||||
continue;
|
||||
list_del(&item->next);
|
||||
mutex_unlock(&smp_alt);
|
||||
DPRINTK("%s: %s\n", __func__, item->name);
|
||||
kfree(item);
|
||||
return;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&smp_alt);
|
||||
}
|
||||
|
||||
bool skip_smp_alternatives;
|
||||
void alternatives_smp_switch(int smp)
|
||||
void alternatives_enable_smp(void)
|
||||
{
|
||||
struct smp_alt_module *mod;
|
||||
|
||||
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
|
||||
pr_info("lockdep: fixing up alternatives\n");
|
||||
#endif
|
||||
|
||||
if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
|
||||
return;
|
||||
BUG_ON(!smp && (num_online_cpus() > 1));
|
||||
/* Why bother if there are no other CPUs? */
|
||||
BUG_ON(num_possible_cpus() == 1);
|
||||
|
||||
mutex_lock(&smp_alt);
|
||||
|
||||
/*
|
||||
* Avoid unnecessary switches because it forces JIT based VMs to
|
||||
* throw away all cached translations, which can be quite costly.
|
||||
*/
|
||||
if (smp == smp_mode) {
|
||||
/* nothing */
|
||||
} else if (smp) {
|
||||
if (uniproc_patched) {
|
||||
pr_info("switching to SMP code\n");
|
||||
BUG_ON(num_online_cpus() != 1);
|
||||
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
|
||||
clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
|
||||
list_for_each_entry(mod, &smp_alt_modules, next)
|
||||
alternatives_smp_lock(mod->locks, mod->locks_end,
|
||||
mod->text, mod->text_end);
|
||||
} else {
|
||||
pr_info("switching to UP code\n");
|
||||
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
|
||||
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
|
||||
list_for_each_entry(mod, &smp_alt_modules, next)
|
||||
alternatives_smp_unlock(mod->locks, mod->locks_end,
|
||||
mod->text, mod->text_end);
|
||||
uniproc_patched = false;
|
||||
}
|
||||
smp_mode = smp;
|
||||
mutex_unlock(&smp_alt);
|
||||
}
|
||||
|
||||
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
|
||||
|
||||
apply_alternatives(__alt_instructions, __alt_instructions_end);
|
||||
|
||||
/* switch to patch-once-at-boottime-only mode and free the
|
||||
* tables in case we know the number of CPUs will never ever
|
||||
* change */
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
if (num_possible_cpus() < 2)
|
||||
smp_alt_once = 1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_alt_once) {
|
||||
if (1 == num_possible_cpus()) {
|
||||
pr_info("switching to UP code\n");
|
||||
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
|
||||
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
|
||||
|
||||
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
|
||||
_text, _etext);
|
||||
}
|
||||
} else {
|
||||
/* Patch to UP if other cpus not imminent. */
|
||||
if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
|
||||
uniproc_patched = true;
|
||||
alternatives_smp_module_add(NULL, "core kernel",
|
||||
__smp_locks, __smp_locks_end,
|
||||
_text, _etext);
|
||||
|
||||
/* Only switch to UP mode if we don't immediately boot others */
|
||||
if (num_present_cpus() == 1 || setup_max_cpus <= 1)
|
||||
alternatives_smp_switch(0);
|
||||
}
|
||||
#endif
|
||||
apply_paravirt(__parainstructions, __parainstructions_end);
|
||||
|
||||
if (smp_alt_once)
|
||||
if (!uniproc_patched || num_possible_cpus() == 1)
|
||||
free_init_pages("SMP alternatives",
|
||||
(unsigned long)__smp_locks,
|
||||
(unsigned long)__smp_locks_end);
|
||||
#endif
|
||||
|
||||
apply_paravirt(__parainstructions, __parainstructions_end);
|
||||
|
||||
restart_nmi();
|
||||
}
|
||||
|
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
|
||||
apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
|
||||
i++;
|
||||
v1 >>= 1;
|
||||
};
|
||||
}
|
||||
|
||||
apic_printk(APIC_DEBUG, KERN_CONT "\n");
|
||||
|
||||
|
@@ -30,7 +30,7 @@
|
||||
|
||||
static int numachip_system __read_mostly;
|
||||
|
||||
static struct apic apic_numachip __read_mostly;
|
||||
static const struct apic apic_numachip __read_mostly;
|
||||
|
||||
static unsigned int get_apic_id(unsigned long x)
|
||||
{
|
||||
@@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct apic apic_numachip __refconst = {
|
||||
static const struct apic apic_numachip __refconst = {
|
||||
|
||||
.name = "NumaConnect system",
|
||||
.probe = numachip_probe,
|
||||
|
@@ -69,4 +69,7 @@ void common(void) {
|
||||
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
|
||||
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
|
||||
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
|
||||
|
||||
BLANK();
|
||||
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
|
||||
}
|
||||
|
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
|
||||
endif
|
||||
|
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_invlpg)
|
||||
return;
|
||||
|
||||
tlb_flushall_shift = 5;
|
||||
|
||||
if (c->x86 <= 0x11)
|
||||
tlb_flushall_shift = 4;
|
||||
}
|
||||
|
||||
static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 ebx, eax, ecx, edx;
|
||||
u16 mask = 0xfff;
|
||||
|
||||
if (c->x86 < 0xf)
|
||||
return;
|
||||
|
||||
if (c->extended_cpuid_level < 0x80000006)
|
||||
return;
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
|
||||
tlb_lli_4k[ENTRIES] = ebx & mask;
|
||||
|
||||
/*
|
||||
* K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
|
||||
* characteristics from the CPUID function 0x80000005 instead.
|
||||
*/
|
||||
if (c->x86 == 0xf) {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
mask = 0xff;
|
||||
}
|
||||
|
||||
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!((eax >> 16) & mask)) {
|
||||
u32 a, b, c, d;
|
||||
|
||||
cpuid(0x80000005, &a, &b, &c, &d);
|
||||
tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
|
||||
} else {
|
||||
tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
|
||||
}
|
||||
|
||||
/* a 4M entry uses two 2M entries */
|
||||
tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
|
||||
|
||||
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!(eax & mask)) {
|
||||
/* Erratum 658 */
|
||||
if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
|
||||
tlb_lli_2m[ENTRIES] = 1024;
|
||||
} else {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
tlb_lli_2m[ENTRIES] = eax & 0xff;
|
||||
}
|
||||
} else
|
||||
tlb_lli_2m[ENTRIES] = eax & mask;
|
||||
|
||||
tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
|
||||
|
||||
cpu_set_tlb_flushall_shift(c);
|
||||
}
|
||||
|
||||
static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
|
||||
.c_vendor = "AMD",
|
||||
.c_ident = { "AuthenticAMD" },
|
||||
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
|
||||
.c_size_cache = amd_size_cache,
|
||||
#endif
|
||||
.c_early_init = early_init_amd,
|
||||
.c_detect_tlb = cpu_detect_tlb_amd,
|
||||
.c_bsp_init = bsp_init_amd,
|
||||
.c_init = init_amd,
|
||||
.c_x86_vendor = X86_VENDOR_AMD,
|
||||
|
@@ -165,10 +165,15 @@ void __init check_bugs(void)
|
||||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
check_config();
|
||||
check_fpu();
|
||||
check_hlt();
|
||||
check_popad();
|
||||
init_utsname()->machine[1] =
|
||||
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
|
||||
/*
|
||||
* kernel_fpu_begin/end() in check_fpu() relies on the patched
|
||||
* alternative instructions.
|
||||
*/
|
||||
check_fpu();
|
||||
}
|
||||
|
@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int disable_smep __cpuinitdata;
|
||||
static __init int setup_disable_smep(char *arg)
|
||||
{
|
||||
disable_smep = 1;
|
||||
setup_clear_cpu_cap(X86_FEATURE_SMEP);
|
||||
return 1;
|
||||
}
|
||||
__setup("nosmep", setup_disable_smep);
|
||||
|
||||
static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
|
||||
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (cpu_has(c, X86_FEATURE_SMEP)) {
|
||||
if (unlikely(disable_smep)) {
|
||||
setup_clear_cpu_cap(X86_FEATURE_SMEP);
|
||||
clear_in_cr4(X86_CR4_SMEP);
|
||||
} else
|
||||
set_in_cr4(X86_CR4_SMEP);
|
||||
}
|
||||
if (cpu_has(c, X86_FEATURE_SMEP))
|
||||
set_in_cr4(X86_CR4_SMEP);
|
||||
}
|
||||
|
||||
static __init int setup_disable_smap(char *arg)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_SMAP);
|
||||
return 1;
|
||||
}
|
||||
__setup("nosmap", setup_disable_smap);
|
||||
|
||||
static __always_inline void setup_smap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned long eflags;
|
||||
|
||||
/* This should have been cleared long ago */
|
||||
raw_local_save_flags(eflags);
|
||||
BUG_ON(eflags & X86_EFLAGS_AC);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_SMAP))
|
||||
set_in_cr4(X86_CR4_SMAP);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
|
||||
|
||||
printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
|
||||
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
|
||||
"tlb_flushall_shift is 0x%x\n",
|
||||
"tlb_flushall_shift: %d\n",
|
||||
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
|
||||
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
|
||||
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
|
||||
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
c->cpu_index = 0;
|
||||
filter_cpuid_features(c, false);
|
||||
|
||||
setup_smep(c);
|
||||
|
||||
if (this_cpu->c_bsp_init)
|
||||
this_cpu->c_bsp_init(c);
|
||||
}
|
||||
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
|
||||
c->phys_proc_id = c->initial_apicid;
|
||||
}
|
||||
|
||||
setup_smep(c);
|
||||
|
||||
get_model_name(c); /* Default name */
|
||||
|
||||
detect_nopl(c);
|
||||
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
/* Disable the PN if appropriate */
|
||||
squash_the_stupid_serial_number(c);
|
||||
|
||||
/* Set up SMEP/SMAP */
|
||||
setup_smep(c);
|
||||
setup_smap(c);
|
||||
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void)
|
||||
#else
|
||||
vgetcpu_set_mode();
|
||||
#endif
|
||||
if (boot_cpu_data.cpuid_level >= 2)
|
||||
cpu_detect_tlb(&boot_cpu_data);
|
||||
cpu_detect_tlb(&boot_cpu_data);
|
||||
}
|
||||
|
||||
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
||||
printk(KERN_CONT "%s ", vendor);
|
||||
|
||||
if (c->x86_model_id[0])
|
||||
printk(KERN_CONT "%s", c->x86_model_id);
|
||||
printk(KERN_CONT "%s", strim(c->x86_model_id));
|
||||
else
|
||||
printk(KERN_CONT "%d86", c->x86);
|
||||
|
||||
printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
|
||||
|
||||
if (c->x86_mask || c->cpuid_level >= 0)
|
||||
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
||||
printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
|
||||
else
|
||||
printk(KERN_CONT "\n");
|
||||
printk(KERN_CONT ")\n");
|
||||
|
||||
print_cpu_msr(c);
|
||||
}
|
||||
@@ -1113,11 +1127,10 @@ void syscall_init(void)
|
||||
|
||||
/* Flags to clear on syscall */
|
||||
wrmsrl(MSR_SYSCALL_MASK,
|
||||
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
||||
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
|
||||
X86_EFLAGS_IOPL|X86_EFLAGS_AC);
|
||||
}
|
||||
|
||||
unsigned long kernel_eflags;
|
||||
|
||||
/*
|
||||
* Copies of the original ist values from the tss are only accessed during
|
||||
* debugging, no special alignment required.
|
||||
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void)
|
||||
dbg_restore_debug_regs();
|
||||
|
||||
fpu_init();
|
||||
xsave_init();
|
||||
|
||||
raw_local_save_flags(kernel_eflags);
|
||||
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void)
|
||||
dbg_restore_debug_regs();
|
||||
|
||||
fpu_init();
|
||||
xsave_init();
|
||||
}
|
||||
#endif
|
||||
|
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
|
||||
int i, j, n;
|
||||
unsigned int regs[4];
|
||||
unsigned char *desc = (unsigned char *)regs;
|
||||
|
||||
if (c->cpuid_level < 2)
|
||||
return;
|
||||
|
||||
/* Number of times to iterate */
|
||||
n = cpuid_eax(2) & 0xFF;
|
||||
|
||||
|
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
}
|
||||
|
||||
static cpumask_var_t mce_inject_cpumask;
|
||||
static DEFINE_MUTEX(mce_inject_mutex);
|
||||
|
||||
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
|
||||
put_online_cpus();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
preempt_disable();
|
||||
raise_local();
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
*/
|
||||
schedule_timeout(2);
|
||||
|
||||
mutex_lock(&mce_inject_mutex);
|
||||
raise_mce(&m);
|
||||
mutex_unlock(&mce_inject_mutex);
|
||||
return usize;
|
||||
}
|
||||
|
||||
|
@@ -28,6 +28,18 @@ extern int mce_ser;
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval);
|
||||
void mce_intel_cmci_poll(void);
|
||||
void mce_intel_hcpu_update(unsigned long cpu);
|
||||
#else
|
||||
# define mce_intel_adjust_timer mce_adjust_timer_default
|
||||
static inline void mce_intel_cmci_poll(void) { }
|
||||
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
|
||||
#endif
|
||||
|
||||
void mce_timer_kick(unsigned long interval);
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
int apei_write_mce(struct mce *m);
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
|
||||
|
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
|
||||
int mce_cmci_disabled __read_mostly;
|
||||
int mce_ignore_ce __read_mostly;
|
||||
int mce_ser __read_mostly;
|
||||
int mce_bios_cmci_threshold __read_mostly;
|
||||
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
|
||||
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
|
||||
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
|
||||
static DEFINE_PER_CPU(struct timer_list, mce_timer);
|
||||
|
||||
static unsigned long mce_adjust_timer_default(unsigned long interval)
|
||||
{
|
||||
return interval;
|
||||
}
|
||||
|
||||
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
|
||||
mce_adjust_timer_default;
|
||||
|
||||
static void mce_timer_fn(unsigned long data)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
|
||||
if (mce_available(__this_cpu_ptr(&cpu_info))) {
|
||||
machine_check_poll(MCP_TIMESTAMP,
|
||||
&__get_cpu_var(mce_poll_banks));
|
||||
mce_intel_cmci_poll();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
|
||||
* polling interval, otherwise increase the polling interval.
|
||||
*/
|
||||
iv = __this_cpu_read(mce_next_interval);
|
||||
if (mce_notify_irq())
|
||||
if (mce_notify_irq()) {
|
||||
iv = max(iv / 2, (unsigned long) HZ/100);
|
||||
else
|
||||
} else {
|
||||
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
|
||||
iv = mce_adjust_timer(iv);
|
||||
}
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
/* Might have become 0 after CMCI storm subsided */
|
||||
if (iv) {
|
||||
t->expires = jiffies + iv;
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
}
|
||||
|
||||
t->expires = jiffies + iv;
|
||||
add_timer_on(t, smp_processor_id());
|
||||
/*
|
||||
* Ensure that the timer is firing in @interval from now.
|
||||
*/
|
||||
void mce_timer_kick(unsigned long interval)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
unsigned long when = jiffies + interval;
|
||||
unsigned long iv = __this_cpu_read(mce_next_interval);
|
||||
|
||||
if (timer_pending(t)) {
|
||||
if (time_before(when, t->expires))
|
||||
mod_timer_pinned(t, when);
|
||||
} else {
|
||||
t->expires = round_jiffies(when);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
if (interval < iv)
|
||||
__this_cpu_write(mce_next_interval, interval);
|
||||
}
|
||||
|
||||
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
|
||||
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
mce_intel_feature_init(c);
|
||||
mce_adjust_timer = mce_intel_adjust_timer;
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
mce_amd_feature_init(c);
|
||||
@@ -1594,21 +1629,26 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
||||
{
|
||||
unsigned long iv = mce_adjust_timer(check_interval * HZ);
|
||||
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
|
||||
if (mce_ignore_ce || !iv)
|
||||
return;
|
||||
|
||||
t->expires = round_jiffies(jiffies + iv);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_timer(void)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
unsigned long iv = check_interval * HZ;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
setup_timer(t, mce_timer_fn, smp_processor_id());
|
||||
|
||||
if (mce_ignore_ce)
|
||||
return;
|
||||
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
if (!iv)
|
||||
return;
|
||||
t->expires = round_jiffies(jiffies + iv);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
setup_timer(t, mce_timer_fn, cpu);
|
||||
mce_start_timer(cpu, t);
|
||||
}
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
|
||||
* check, or 0 to not wait
|
||||
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
||||
* mce=nobootlog Don't log MCEs from before booting.
|
||||
* mce=bios_cmci_threshold Don't program the CMCI threshold
|
||||
*/
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
|
||||
mce_ignore_ce = 1;
|
||||
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
||||
mce_bootlog = (str[0] == 'b');
|
||||
else if (!strcmp(str, "bios_cmci_threshold"))
|
||||
mce_bios_cmci_threshold = 1;
|
||||
else if (isdigit(str[0])) {
|
||||
get_option(&str, &tolerant);
|
||||
if (*str == ',') {
|
||||
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
||||
&mce_cmci_disabled
|
||||
};
|
||||
|
||||
static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
|
||||
__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
|
||||
&mce_bios_cmci_threshold
|
||||
};
|
||||
|
||||
static struct device_attribute *mce_device_attrs[] = {
|
||||
&dev_attr_tolerant.attr,
|
||||
&dev_attr_check_interval.attr,
|
||||
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
|
||||
&dev_attr_dont_log_ce.attr,
|
||||
&dev_attr_ignore_ce.attr,
|
||||
&dev_attr_cmci_disabled.attr,
|
||||
&dev_attr_bios_cmci_threshold.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct timer_list *t = &per_cpu(mce_timer, cpu);
|
||||
|
||||
switch (action) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
mce_device_create(cpu);
|
||||
if (threshold_cpu_callback)
|
||||
threshold_cpu_callback(action, cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
if (threshold_cpu_callback)
|
||||
threshold_cpu_callback(action, cpu);
|
||||
mce_device_remove(cpu);
|
||||
mce_intel_hcpu_update(cpu);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
del_timer_sync(t);
|
||||
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
|
||||
del_timer_sync(t);
|
||||
break;
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
if (!mce_ignore_ce && check_interval) {
|
||||
t->expires = round_jiffies(jiffies +
|
||||
per_cpu(mce_next_interval, cpu));
|
||||
add_timer_on(t, cpu);
|
||||
}
|
||||
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
|
||||
break;
|
||||
case CPU_POST_DEAD:
|
||||
/* intentionally ignoring frozen here */
|
||||
cmci_rediscover(cpu);
|
||||
mce_start_timer(cpu, t);
|
||||
break;
|
||||
}
|
||||
|
||||
if (action == CPU_POST_DEAD) {
|
||||
/* intentionally ignoring frozen here */
|
||||
cmci_rediscover(cpu);
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
@@ -15,6 +15,8 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
*/
|
||||
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_POLL_INTERVAL (30 * HZ)
|
||||
#define CMCI_STORM_INTERVAL (1 * HZ)
|
||||
#define CMCI_STORM_THRESHOLD 15
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
|
||||
|
||||
enum {
|
||||
CMCI_STORM_NONE,
|
||||
CMCI_STORM_ACTIVE,
|
||||
CMCI_STORM_SUBSIDED,
|
||||
};
|
||||
|
||||
static atomic_t cmci_storm_on_cpus;
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
void mce_intel_cmci_poll(void)
|
||||
{
|
||||
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
|
||||
void mce_intel_hcpu_update(unsigned long cpu)
|
||||
{
|
||||
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
|
||||
atomic_dec(&cmci_storm_on_cpus);
|
||||
|
||||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (interval < CMCI_POLL_INTERVAL)
|
||||
return interval;
|
||||
|
||||
switch (__this_cpu_read(cmci_storm_state)) {
|
||||
case CMCI_STORM_ACTIVE:
|
||||
/*
|
||||
* We switch back to interrupt mode once the poll timer has
|
||||
* silenced itself. That means no events recorded and the
|
||||
* timer interval is back to our poll interval.
|
||||
*/
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
|
||||
r = atomic_sub_return(1, &cmci_storm_on_cpus);
|
||||
if (r == 0)
|
||||
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case CMCI_STORM_SUBSIDED:
|
||||
/*
|
||||
* We wait for all cpus to go back to SUBSIDED
|
||||
* state. When that happens we switch back to
|
||||
* interrupt mode.
|
||||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
default:
|
||||
/*
|
||||
* We have shiny weather. Let the poll do whatever it
|
||||
* thinks.
|
||||
*/
|
||||
return interval;
|
||||
}
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
unsigned long ts = __this_cpu_read(cmci_time_stamp);
|
||||
unsigned long now = jiffies;
|
||||
int r;
|
||||
|
||||
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
|
||||
return true;
|
||||
|
||||
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
|
||||
cnt++;
|
||||
} else {
|
||||
cnt = 1;
|
||||
__this_cpu_write(cmci_time_stamp, now);
|
||||
}
|
||||
__this_cpu_write(cmci_storm_cnt, cnt);
|
||||
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_clear();
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_POLL_INTERVAL);
|
||||
|
||||
if (r == 1)
|
||||
pr_notice("CMCI storm detected: switching to poll mode\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
|
||||
*/
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
if (cmci_storm_detect())
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
static void print_update(char *type, int *hdr, int num)
|
||||
{
|
||||
if (*hdr == 0)
|
||||
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
|
||||
*hdr = 1;
|
||||
printk(KERN_CONT " %s:%d", type, num);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks, int boot)
|
||||
static void cmci_discover(int banks)
|
||||
{
|
||||
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int hdr = 0;
|
||||
int i;
|
||||
int bios_wrong_thresh = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
int bios_zero_thresh = 0;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (test_and_clear_bit(i, owned) && !boot)
|
||||
print_update("SHD", &hdr, i);
|
||||
clear_bit(i, owned);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
|
||||
if (!mce_bios_cmci_threshold) {
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= CMCI_THRESHOLD;
|
||||
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||
/*
|
||||
* If bios_cmci_threshold boot option was specified
|
||||
* but the threshold is zero, we'll try to initialize
|
||||
* it to 1.
|
||||
*/
|
||||
bios_zero_thresh = 1;
|
||||
val |= CMCI_THRESHOLD;
|
||||
}
|
||||
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (!test_and_set_bit(i, owned) && !boot)
|
||||
print_update("CMCI", &hdr, i);
|
||||
set_bit(i, owned);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
/*
|
||||
* We are able to set thresholds for some banks that
|
||||
* had a threshold of 0. This means the BIOS has not
|
||||
* set the thresholds properly or does not work with
|
||||
* this boot option. Note down now and report later.
|
||||
*/
|
||||
if (mce_bios_cmci_threshold && bios_zero_thresh &&
|
||||
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||
bios_wrong_thresh = 1;
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (hdr)
|
||||
printk(KERN_CONT "\n");
|
||||
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -156,7 +278,7 @@ void cmci_clear(void)
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
|
||||
continue;
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, old);
|
||||
@@ -200,7 +322,7 @@ void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks, 1);
|
||||
cmci_discover(banks);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
|
@@ -8,7 +8,10 @@
|
||||
open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
|
||||
open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
|
||||
|
||||
print OUT "#include <asm/cpufeature.h>\n\n";
|
||||
print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
|
||||
print OUT "#include <asm/cpufeature.h>\n";
|
||||
print OUT "#endif\n";
|
||||
print OUT "\n";
|
||||
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
|
||||
|
||||
%features = ();
|
||||
|
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_snb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
@@ -624,6 +626,8 @@ int p4_pmu_init(void);
|
||||
|
||||
int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
|
||||
};
|
||||
|
||||
struct perf_ibs {
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
u64 (*get_count)(u64 config);
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
|
||||
struct attribute **format_attrs;
|
||||
struct attribute_group format_group;
|
||||
const struct attribute_group *attr_groups[2];
|
||||
|
||||
u64 (*get_count)(u64 config);
|
||||
};
|
||||
|
||||
struct perf_ibs_data {
|
||||
@@ -209,6 +214,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const struct perf_event_attr ibs_notsupp = {
|
||||
.exclude_user = 1,
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
.exclude_idle = 1,
|
||||
.exclude_host = 1,
|
||||
.exclude_guest = 1,
|
||||
};
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
@@ -229,6 +243,9 @@ static int perf_ibs_init(struct perf_event *event)
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
|
||||
return -EINVAL;
|
||||
|
||||
if (config & ~perf_ibs->config_mask)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -434,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
|
||||
|
||||
static void perf_ibs_read(struct perf_event *event) { }
|
||||
|
||||
PMU_FORMAT_ATTR(rand_en, "config:57");
|
||||
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
|
||||
|
||||
static struct attribute *ibs_fetch_format_attrs[] = {
|
||||
&format_attr_rand_en.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ibs_op_format_attrs[] = {
|
||||
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
@@ -453,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
|
||||
.max_period = IBS_FETCH_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
|
||||
.format_attrs = ibs_fetch_format_attrs,
|
||||
|
||||
.get_count = get_ibs_fetch_count,
|
||||
};
|
||||
@@ -476,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
|
||||
.max_period = IBS_OP_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
|
||||
.format_attrs = ibs_op_format_attrs,
|
||||
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
@@ -585,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
|
||||
perf_ibs->pcpu = pcpu;
|
||||
|
||||
/* register attributes */
|
||||
if (perf_ibs->format_attrs[0]) {
|
||||
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
|
||||
perf_ibs->format_group.name = "format";
|
||||
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
|
||||
|
||||
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
|
||||
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
|
||||
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
|
||||
}
|
||||
|
||||
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
|
||||
if (ret) {
|
||||
perf_ibs->pcpu = NULL;
|
||||
@@ -596,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
|
||||
static __init int perf_event_ibs_init(void)
|
||||
{
|
||||
struct attribute **attr = ibs_op_format_attrs;
|
||||
|
||||
if (!ibs_caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
if (ibs_caps & IBS_CAPS_OPCNT)
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNT) {
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
*attr++ = &format_attr_cnt_ctl.attr;
|
||||
}
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
|
||||
|
||||
|
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x6:
|
||||
return p6_pmu_init();
|
||||
case 0xb:
|
||||
return knc_pmu_init();
|
||||
case 0xf:
|
||||
return p4_pmu_init();
|
||||
}
|
||||
@@ -2008,6 +2010,7 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case 28: /* Atom */
|
||||
case 54: /* Cedariew */
|
||||
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
@@ -2047,7 +2050,6 @@ __init int intel_pmu_init(void)
|
||||
case 42: /* SandyBridge */
|
||||
case 45: /* SandyBridge, "Romely-EP" */
|
||||
x86_add_quirk(intel_sandybridge_quirk);
|
||||
case 58: /* IvyBridge */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
|
||||
@@ -2072,6 +2074,29 @@ __init int intel_pmu_init(void)
|
||||
|
||||
pr_cont("SandyBridge events, ");
|
||||
break;
|
||||
case 58: /* IvyBridge */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_snb();
|
||||
|
||||
x86_pmu.event_constraints = intel_snb_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
|
||||
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
||||
x86_pmu.extra_regs = intel_snb_extra_regs;
|
||||
/* all extra regs are per-cpu when HT is on */
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
|
||||
pr_cont("IvyBridge events, ");
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
|
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
|
||||
* to have an operational LBR which can freeze
|
||||
* on PMU interrupt
|
||||
*/
|
||||
if (boot_cpu_data.x86_mask < 10) {
|
||||
if (boot_cpu_data.x86_model == 28
|
||||
&& boot_cpu_data.x86_mask < 10) {
|
||||
pr_cont("LBR disabled due to erratum");
|
||||
return;
|
||||
}
|
||||
|
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
}
|
||||
}
|
||||
|
||||
static struct uncore_event_desc snb_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct attribute *snb_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
|
||||
.constraints = snb_uncore_cbox_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
.event_descs = snb_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
@@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
|
||||
static struct intel_uncore_box *
|
||||
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
|
||||
{
|
||||
static struct intel_uncore_box *box;
|
||||
struct intel_uncore_box *box;
|
||||
|
||||
box = *per_cpu_ptr(pmu->box, cpu);
|
||||
if (box)
|
||||
@@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t uncore_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
|
||||
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *uncore_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group uncore_pmu_attr_group = {
|
||||
.attrs = uncore_pmu_attrs,
|
||||
};
|
||||
|
||||
static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
int ret;
|
||||
@@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
|
||||
free_percpu(type->pmus[i].box);
|
||||
kfree(type->pmus);
|
||||
type->pmus = NULL;
|
||||
kfree(type->attr_groups[1]);
|
||||
type->attr_groups[1] = NULL;
|
||||
kfree(type->events_group);
|
||||
type->events_group = NULL;
|
||||
}
|
||||
|
||||
static void __init uncore_types_exit(struct intel_uncore_type **types)
|
||||
@@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &type->event_descs[j].attr.attr;
|
||||
|
||||
type->attr_groups[1] = events_group;
|
||||
type->events_group = events_group;
|
||||
}
|
||||
|
||||
type->pmu_group = &uncore_pmu_attr_group;
|
||||
type->pmus = pmus;
|
||||
return 0;
|
||||
fail:
|
||||
|
@@ -369,10 +369,12 @@ struct intel_uncore_type {
|
||||
struct intel_uncore_pmu *pmus;
|
||||
struct intel_uncore_ops *ops;
|
||||
struct uncore_event_desc *event_descs;
|
||||
const struct attribute_group *attr_groups[3];
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define pmu_group attr_groups[0]
|
||||
#define format_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
|
||||
struct intel_uncore_ops {
|
||||
void (*init_box)(struct intel_uncore_box *);
|
||||
|
248
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
248
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
@@ -0,0 +1,248 @@
|
||||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static const u64 knc_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
|
||||
};
|
||||
|
||||
static __initconst u64 knc_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
/* On Xeon Phi event "0" is a valid DATA_READ */
|
||||
/* (L1 Data Cache Reads) Instruction. */
|
||||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
|
||||
/* bit will always be set in x86_pmu_hw_config(). */
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
|
||||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
|
||||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
/* see note on L1 OP_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
|
||||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static u64 knc_pmu_event_map(int hw_event)
|
||||
{
|
||||
return knc_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static struct event_constraint knc_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
|
||||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
|
||||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
|
||||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
|
||||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
|
||||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
|
||||
|
||||
#define KNC_ENABLE_COUNTER0 0x00000001
|
||||
#define KNC_ENABLE_COUNTER1 0x00000002
|
||||
|
||||
static void knc_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_all(int added)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
knc_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
if (cpuc->enabled)
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
if (cpuc->enabled)
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_knc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __initconst struct x86_pmu knc_pmu = {
|
||||
.name = "knc",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = knc_pmu_disable_all,
|
||||
.enable_all = knc_pmu_enable_all,
|
||||
.enable = knc_pmu_enable_event,
|
||||
.disable = knc_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_KNC_EVNTSEL0,
|
||||
.perfctr = MSR_KNC_PERFCTR0,
|
||||
.event_map = knc_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
/* in theory 40 bits, early silicon is buggy though */
|
||||
.cntval_bits = 32,
|
||||
.cntval_mask = (1ULL << 32) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = knc_event_constraints,
|
||||
.format_attrs = intel_knc_formats_attr,
|
||||
};
|
||||
|
||||
__init int knc_pmu_init(void)
|
||||
{
|
||||
x86_pmu = knc_pmu;
|
||||
|
||||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_PERFCTR0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_PERFCTR0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BPU_PERFCTR0;
|
||||
}
|
||||
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_EVNTSEL0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_EVNTSEL0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BSU_ESCR0;
|
||||
}
|
||||
|
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
|
||||
static void *c_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
if (*pos == 0) /* just in case, cpu 0 is not the first */
|
||||
*pos = cpumask_first(cpu_online_mask);
|
||||
else
|
||||
*pos = cpumask_next(*pos - 1, cpu_online_mask);
|
||||
*pos = cpumask_next(*pos - 1, cpu_online_mask);
|
||||
if ((*pos) < nr_cpu_ids)
|
||||
return &cpu_data(*pos);
|
||||
return NULL;
|
||||
|
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
|
||||
goto out_chrdev;
|
||||
}
|
||||
cpuid_class->devnode = cpuid_devnode;
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(i) {
|
||||
err = cpuid_device_create(i);
|
||||
if (err != 0)
|
||||
goto out_class;
|
||||
}
|
||||
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
|
||||
put_online_cpus();
|
||||
|
||||
err = 0;
|
||||
goto out;
|
||||
@@ -214,6 +216,7 @@ out_class:
|
||||
for_each_online_cpu(i) {
|
||||
cpuid_device_destroy(i);
|
||||
}
|
||||
put_online_cpus();
|
||||
class_destroy(cpuid_class);
|
||||
out_chrdev:
|
||||
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
|
||||
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
|
||||
{
|
||||
int cpu = 0;
|
||||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
cpuid_device_destroy(cpu);
|
||||
class_destroy(cpuid_class);
|
||||
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
|
||||
unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
module_init(cpuid_init);
|
||||
|
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
|
||||
.xlate = ioapic_xlate,
|
||||
};
|
||||
|
||||
static void dt_add_ioapic_domain(unsigned int ioapic_num,
|
||||
struct device_node *np)
|
||||
{
|
||||
struct irq_domain *id;
|
||||
struct mp_ioapic_gsi *gsi_cfg;
|
||||
int ret;
|
||||
int num;
|
||||
|
||||
gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
|
||||
num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
|
||||
|
||||
id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
|
||||
(void *)ioapic_num);
|
||||
BUG_ON(!id);
|
||||
if (gsi_cfg->gsi_base == 0) {
|
||||
/*
|
||||
* The first NR_IRQS_LEGACY irq descs are allocated in
|
||||
* early_irq_init() and need just a mapping. The
|
||||
* remaining irqs need both. All of them are preallocated
|
||||
* and assigned so we can keep the 1:1 mapping which the ioapic
|
||||
* is having.
|
||||
*/
|
||||
ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
|
||||
if (ret)
|
||||
pr_err("Error mapping legacy IRQs: %d\n", ret);
|
||||
|
||||
if (num > NR_IRQS_LEGACY) {
|
||||
ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
|
||||
NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
|
||||
if (ret)
|
||||
pr_err("Error creating mapping for the "
|
||||
"remaining IRQs: %d\n", ret);
|
||||
}
|
||||
irq_set_default_host(id);
|
||||
} else {
|
||||
ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
|
||||
if (ret)
|
||||
pr_err("Error creating IRQ mapping: %d\n", ret);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init ioapic_add_ofnode(struct device_node *np)
|
||||
{
|
||||
struct resource r;
|
||||
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
|
||||
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
if (r.start == mpc_ioapic_addr(i)) {
|
||||
struct irq_domain *id;
|
||||
struct mp_ioapic_gsi *gsi_cfg;
|
||||
|
||||
gsi_cfg = mp_ioapic_gsi_routing(i);
|
||||
|
||||
id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
|
||||
&ioapic_irq_domain_ops,
|
||||
(void*)i);
|
||||
BUG_ON(!id);
|
||||
dt_add_ioapic_domain(i, np);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@@ -57,6 +57,7 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
#include <linux/elf-em.h>
|
||||
@@ -298,6 +299,21 @@ ENTRY(ret_from_fork)
|
||||
CFI_ENDPROC
|
||||
END(ret_from_fork)
|
||||
|
||||
ENTRY(ret_from_kernel_thread)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %eax
|
||||
call schedule_tail
|
||||
GET_THREAD_INFO(%ebp)
|
||||
popl_cfi %eax
|
||||
pushl_cfi $0x0202 # Reset kernel eflags
|
||||
popfl_cfi
|
||||
movl PT_EBP(%esp),%eax
|
||||
call *PT_EBX(%esp)
|
||||
movl $0,PT_EAX(%esp)
|
||||
jmp syscall_exit
|
||||
CFI_ENDPROC
|
||||
ENDPROC(ret_from_kernel_thread)
|
||||
|
||||
/*
|
||||
* Interrupt exit functions should be protected against kprobes
|
||||
*/
|
||||
@@ -322,8 +338,7 @@ ret_from_intr:
|
||||
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
|
||||
#else
|
||||
/*
|
||||
* We can be coming here from a syscall done in the kernel space,
|
||||
* e.g. a failed kernel_execve().
|
||||
* We can be coming here from child spawned by kernel_thread().
|
||||
*/
|
||||
movl PT_CS(%esp), %eax
|
||||
andl $SEGMENT_RPL_MASK, %eax
|
||||
@@ -407,7 +422,9 @@ sysenter_past_esp:
|
||||
*/
|
||||
cmpl $__PAGE_OFFSET-3,%ebp
|
||||
jae syscall_fault
|
||||
ASM_STAC
|
||||
1: movl (%ebp),%ebp
|
||||
ASM_CLAC
|
||||
movl %ebp,PT_EBP(%esp)
|
||||
_ASM_EXTABLE(1b,syscall_fault)
|
||||
|
||||
@@ -488,6 +505,7 @@ ENDPROC(ia32_sysenter_target)
|
||||
# system call handler stub
|
||||
ENTRY(system_call)
|
||||
RING0_INT_FRAME # can't unwind into user space anyway
|
||||
ASM_CLAC
|
||||
pushl_cfi %eax # save orig_eax
|
||||
SAVE_ALL
|
||||
GET_THREAD_INFO(%ebp)
|
||||
@@ -612,22 +630,7 @@ work_notifysig: # deal with pending signals and
|
||||
movl %esp, %eax
|
||||
jne work_notifysig_v86 # returning to kernel-space or
|
||||
# vm86-space
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movb PT_CS(%esp), %bl
|
||||
andb $SEGMENT_RPL_MASK, %bl
|
||||
cmpb $USER_RPL, %bl
|
||||
jb resume_kernel
|
||||
xorl %edx, %edx
|
||||
call do_notify_resume
|
||||
jmp resume_userspace
|
||||
|
||||
ALIGN
|
||||
work_notifysig_v86:
|
||||
pushl_cfi %ecx # save ti_flags for do_notify_resume
|
||||
call save_v86_state # %eax contains pt_regs pointer
|
||||
popl_cfi %ecx
|
||||
movl %eax, %esp
|
||||
1:
|
||||
#else
|
||||
movl %esp, %eax
|
||||
#endif
|
||||
@@ -640,6 +643,16 @@ work_notifysig_v86:
|
||||
xorl %edx, %edx
|
||||
call do_notify_resume
|
||||
jmp resume_userspace
|
||||
|
||||
#ifdef CONFIG_VM86
|
||||
ALIGN
|
||||
work_notifysig_v86:
|
||||
pushl_cfi %ecx # save ti_flags for do_notify_resume
|
||||
call save_v86_state # %eax contains pt_regs pointer
|
||||
popl_cfi %ecx
|
||||
movl %eax, %esp
|
||||
jmp 1b
|
||||
#endif
|
||||
END(work_pending)
|
||||
|
||||
# perform syscall exit tracing
|
||||
@@ -670,6 +683,7 @@ END(syscall_exit_work)
|
||||
|
||||
RING0_INT_FRAME # can't unwind into user space anyway
|
||||
syscall_fault:
|
||||
ASM_CLAC
|
||||
GET_THREAD_INFO(%ebp)
|
||||
movl $-EFAULT,PT_EAX(%esp)
|
||||
jmp resume_userspace
|
||||
@@ -727,7 +741,6 @@ ENDPROC(ptregs_##name)
|
||||
PTREGSCALL1(iopl)
|
||||
PTREGSCALL0(fork)
|
||||
PTREGSCALL0(vfork)
|
||||
PTREGSCALL3(execve)
|
||||
PTREGSCALL2(sigaltstack)
|
||||
PTREGSCALL0(sigreturn)
|
||||
PTREGSCALL0(rt_sigreturn)
|
||||
@@ -825,6 +838,7 @@ END(interrupt)
|
||||
*/
|
||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||
common_interrupt:
|
||||
ASM_CLAC
|
||||
addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
|
||||
SAVE_ALL
|
||||
TRACE_IRQS_OFF
|
||||
@@ -841,6 +855,7 @@ ENDPROC(common_interrupt)
|
||||
#define BUILD_INTERRUPT3(name, nr, fn) \
|
||||
ENTRY(name) \
|
||||
RING0_INT_FRAME; \
|
||||
ASM_CLAC; \
|
||||
pushl_cfi $~(nr); \
|
||||
SAVE_ALL; \
|
||||
TRACE_IRQS_OFF \
|
||||
@@ -857,6 +872,7 @@ ENDPROC(name)
|
||||
|
||||
ENTRY(coprocessor_error)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_coprocessor_error
|
||||
jmp error_code
|
||||
@@ -865,6 +881,7 @@ END(coprocessor_error)
|
||||
|
||||
ENTRY(simd_coprocessor_error)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
#ifdef CONFIG_X86_INVD_BUG
|
||||
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
|
||||
@@ -886,6 +903,7 @@ END(simd_coprocessor_error)
|
||||
|
||||
ENTRY(device_not_available)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $-1 # mark this as an int
|
||||
pushl_cfi $do_device_not_available
|
||||
jmp error_code
|
||||
@@ -906,6 +924,7 @@ END(native_irq_enable_sysexit)
|
||||
|
||||
ENTRY(overflow)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_overflow
|
||||
jmp error_code
|
||||
@@ -914,6 +933,7 @@ END(overflow)
|
||||
|
||||
ENTRY(bounds)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_bounds
|
||||
jmp error_code
|
||||
@@ -922,6 +942,7 @@ END(bounds)
|
||||
|
||||
ENTRY(invalid_op)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_invalid_op
|
||||
jmp error_code
|
||||
@@ -930,6 +951,7 @@ END(invalid_op)
|
||||
|
||||
ENTRY(coprocessor_segment_overrun)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_coprocessor_segment_overrun
|
||||
jmp error_code
|
||||
@@ -938,6 +960,7 @@ END(coprocessor_segment_overrun)
|
||||
|
||||
ENTRY(invalid_TSS)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_invalid_TSS
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
@@ -945,6 +968,7 @@ END(invalid_TSS)
|
||||
|
||||
ENTRY(segment_not_present)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_segment_not_present
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
@@ -952,6 +976,7 @@ END(segment_not_present)
|
||||
|
||||
ENTRY(stack_segment)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_stack_segment
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
@@ -959,6 +984,7 @@ END(stack_segment)
|
||||
|
||||
ENTRY(alignment_check)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_alignment_check
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
@@ -966,6 +992,7 @@ END(alignment_check)
|
||||
|
||||
ENTRY(divide_error)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0 # no error code
|
||||
pushl_cfi $do_divide_error
|
||||
jmp error_code
|
||||
@@ -975,6 +1002,7 @@ END(divide_error)
|
||||
#ifdef CONFIG_X86_MCE
|
||||
ENTRY(machine_check)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi machine_check_vector
|
||||
jmp error_code
|
||||
@@ -984,6 +1012,7 @@ END(machine_check)
|
||||
|
||||
ENTRY(spurious_interrupt_bug)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $0
|
||||
pushl_cfi $do_spurious_interrupt_bug
|
||||
jmp error_code
|
||||
@@ -994,16 +1023,6 @@ END(spurious_interrupt_bug)
|
||||
*/
|
||||
.popsection
|
||||
|
||||
ENTRY(kernel_thread_helper)
|
||||
pushl $0 # fake return address for unwinder
|
||||
CFI_STARTPROC
|
||||
movl %edi,%eax
|
||||
call *%esi
|
||||
call do_exit
|
||||
ud2 # padding for call trace
|
||||
CFI_ENDPROC
|
||||
ENDPROC(kernel_thread_helper)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/* Xen doesn't set %esp to be precisely what the normal sysenter
|
||||
entrypoint expects, so fix it up before using the normal path. */
|
||||
@@ -1111,17 +1130,21 @@ ENTRY(ftrace_caller)
|
||||
pushl %eax
|
||||
pushl %ecx
|
||||
pushl %edx
|
||||
movl 0xc(%esp), %eax
|
||||
pushl $0 /* Pass NULL as regs pointer */
|
||||
movl 4*4(%esp), %eax
|
||||
movl 0x4(%ebp), %edx
|
||||
leal function_trace_op, %ecx
|
||||
subl $MCOUNT_INSN_SIZE, %eax
|
||||
|
||||
.globl ftrace_call
|
||||
ftrace_call:
|
||||
call ftrace_stub
|
||||
|
||||
addl $4,%esp /* skip NULL pointer */
|
||||
popl %edx
|
||||
popl %ecx
|
||||
popl %eax
|
||||
ftrace_ret:
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
.globl ftrace_graph_call
|
||||
ftrace_graph_call:
|
||||
@@ -1133,6 +1156,71 @@ ftrace_stub:
|
||||
ret
|
||||
END(ftrace_caller)
|
||||
|
||||
ENTRY(ftrace_regs_caller)
|
||||
pushf /* push flags before compare (in cs location) */
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_restore_flags
|
||||
|
||||
/*
|
||||
* i386 does not save SS and ESP when coming from kernel.
|
||||
* Instead, to get sp, ®s->sp is used (see ptrace.h).
|
||||
* Unfortunately, that means eflags must be at the same location
|
||||
* as the current return ip is. We move the return ip into the
|
||||
* ip location, and move flags into the return ip location.
|
||||
*/
|
||||
pushl 4(%esp) /* save return ip into ip slot */
|
||||
|
||||
pushl $0 /* Load 0 into orig_ax */
|
||||
pushl %gs
|
||||
pushl %fs
|
||||
pushl %es
|
||||
pushl %ds
|
||||
pushl %eax
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %edx
|
||||
pushl %ecx
|
||||
pushl %ebx
|
||||
|
||||
movl 13*4(%esp), %eax /* Get the saved flags */
|
||||
movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
|
||||
/* clobbering return ip */
|
||||
movl $__KERNEL_CS,13*4(%esp)
|
||||
|
||||
movl 12*4(%esp), %eax /* Load ip (1st parameter) */
|
||||
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
|
||||
movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
|
||||
leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
|
||||
pushl %esp /* Save pt_regs as 4th parameter */
|
||||
|
||||
GLOBAL(ftrace_regs_call)
|
||||
call ftrace_stub
|
||||
|
||||
addl $4, %esp /* Skip pt_regs */
|
||||
movl 14*4(%esp), %eax /* Move flags back into cs */
|
||||
movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
|
||||
movl 12*4(%esp), %eax /* Get return ip from regs->ip */
|
||||
movl %eax, 14*4(%esp) /* Put return ip back for ret */
|
||||
|
||||
popl %ebx
|
||||
popl %ecx
|
||||
popl %edx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %eax
|
||||
popl %ds
|
||||
popl %es
|
||||
popl %fs
|
||||
popl %gs
|
||||
addl $8, %esp /* Skip orig_ax and ip */
|
||||
popf /* Pop flags at end (no addl to corrupt flags) */
|
||||
jmp ftrace_ret
|
||||
|
||||
ftrace_restore_flags:
|
||||
popf
|
||||
jmp ftrace_stub
|
||||
#else /* ! CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
ENTRY(mcount)
|
||||
@@ -1173,9 +1261,6 @@ END(mcount)
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
ENTRY(ftrace_graph_caller)
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_stub
|
||||
|
||||
pushl %eax
|
||||
pushl %ecx
|
||||
pushl %edx
|
||||
@@ -1209,6 +1294,7 @@ return_to_handler:
|
||||
|
||||
ENTRY(page_fault)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_page_fault
|
||||
ALIGN
|
||||
error_code:
|
||||
@@ -1281,6 +1367,7 @@ END(page_fault)
|
||||
|
||||
ENTRY(debug)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
cmpl $ia32_sysenter_target,(%esp)
|
||||
jne debug_stack_correct
|
||||
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
|
||||
@@ -1305,6 +1392,7 @@ END(debug)
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi %eax
|
||||
movl %ss, %eax
|
||||
cmpw $__ESPFIX_SS, %ax
|
||||
@@ -1375,6 +1463,7 @@ END(nmi)
|
||||
|
||||
ENTRY(int3)
|
||||
RING0_INT_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $-1 # mark this as an int
|
||||
SAVE_ALL
|
||||
TRACE_IRQS_OFF
|
||||
@@ -1395,6 +1484,7 @@ END(general_protection)
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
ENTRY(async_page_fault)
|
||||
RING0_EC_FRAME
|
||||
ASM_CLAC
|
||||
pushl_cfi $do_async_page_fault
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
|
@@ -56,6 +56,8 @@
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/rcu.h>
|
||||
#include <asm/smap.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
@@ -68,25 +70,51 @@
|
||||
.section .entry.text, "ax"
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
|
||||
#ifdef CC_USING_FENTRY
|
||||
# define function_hook __fentry__
|
||||
#else
|
||||
# define function_hook mcount
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
ENTRY(mcount)
|
||||
|
||||
ENTRY(function_hook)
|
||||
retq
|
||||
END(mcount)
|
||||
END(function_hook)
|
||||
|
||||
/* skip is set if stack has been adjusted */
|
||||
.macro ftrace_caller_setup skip=0
|
||||
MCOUNT_SAVE_FRAME \skip
|
||||
|
||||
/* Load the ftrace_ops into the 3rd parameter */
|
||||
leaq function_trace_op, %rdx
|
||||
|
||||
/* Load ip into the first parameter */
|
||||
movq RIP(%rsp), %rdi
|
||||
subq $MCOUNT_INSN_SIZE, %rdi
|
||||
/* Load the parent_ip into the second parameter */
|
||||
#ifdef CC_USING_FENTRY
|
||||
movq SS+16(%rsp), %rsi
|
||||
#else
|
||||
movq 8(%rbp), %rsi
|
||||
#endif
|
||||
.endm
|
||||
|
||||
ENTRY(ftrace_caller)
|
||||
/* Check if tracing was disabled (quick check) */
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_stub
|
||||
|
||||
MCOUNT_SAVE_FRAME
|
||||
|
||||
movq 0x38(%rsp), %rdi
|
||||
movq 8(%rbp), %rsi
|
||||
subq $MCOUNT_INSN_SIZE, %rdi
|
||||
ftrace_caller_setup
|
||||
/* regs go into 4th parameter (but make it NULL) */
|
||||
movq $0, %rcx
|
||||
|
||||
GLOBAL(ftrace_call)
|
||||
call ftrace_stub
|
||||
|
||||
MCOUNT_RESTORE_FRAME
|
||||
ftrace_return:
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
GLOBAL(ftrace_graph_call)
|
||||
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub)
|
||||
retq
|
||||
END(ftrace_caller)
|
||||
|
||||
ENTRY(ftrace_regs_caller)
|
||||
/* Save the current flags before compare (in SS location)*/
|
||||
pushfq
|
||||
|
||||
/* Check if tracing was disabled (quick check) */
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_restore_flags
|
||||
|
||||
/* skip=8 to skip flags saved in SS */
|
||||
ftrace_caller_setup 8
|
||||
|
||||
/* Save the rest of pt_regs */
|
||||
movq %r15, R15(%rsp)
|
||||
movq %r14, R14(%rsp)
|
||||
movq %r13, R13(%rsp)
|
||||
movq %r12, R12(%rsp)
|
||||
movq %r11, R11(%rsp)
|
||||
movq %r10, R10(%rsp)
|
||||
movq %rbp, RBP(%rsp)
|
||||
movq %rbx, RBX(%rsp)
|
||||
/* Copy saved flags */
|
||||
movq SS(%rsp), %rcx
|
||||
movq %rcx, EFLAGS(%rsp)
|
||||
/* Kernel segments */
|
||||
movq $__KERNEL_DS, %rcx
|
||||
movq %rcx, SS(%rsp)
|
||||
movq $__KERNEL_CS, %rcx
|
||||
movq %rcx, CS(%rsp)
|
||||
/* Stack - skipping return address */
|
||||
leaq SS+16(%rsp), %rcx
|
||||
movq %rcx, RSP(%rsp)
|
||||
|
||||
/* regs go into 4th parameter */
|
||||
leaq (%rsp), %rcx
|
||||
|
||||
GLOBAL(ftrace_regs_call)
|
||||
call ftrace_stub
|
||||
|
||||
/* Copy flags back to SS, to restore them */
|
||||
movq EFLAGS(%rsp), %rax
|
||||
movq %rax, SS(%rsp)
|
||||
|
||||
/* Handlers can change the RIP */
|
||||
movq RIP(%rsp), %rax
|
||||
movq %rax, SS+8(%rsp)
|
||||
|
||||
/* restore the rest of pt_regs */
|
||||
movq R15(%rsp), %r15
|
||||
movq R14(%rsp), %r14
|
||||
movq R13(%rsp), %r13
|
||||
movq R12(%rsp), %r12
|
||||
movq R10(%rsp), %r10
|
||||
movq RBP(%rsp), %rbp
|
||||
movq RBX(%rsp), %rbx
|
||||
|
||||
/* skip=8 to skip flags saved in SS */
|
||||
MCOUNT_RESTORE_FRAME 8
|
||||
|
||||
/* Restore flags */
|
||||
popfq
|
||||
|
||||
jmp ftrace_return
|
||||
ftrace_restore_flags:
|
||||
popfq
|
||||
jmp ftrace_stub
|
||||
|
||||
END(ftrace_regs_caller)
|
||||
|
||||
|
||||
#else /* ! CONFIG_DYNAMIC_FTRACE */
|
||||
ENTRY(mcount)
|
||||
|
||||
ENTRY(function_hook)
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_stub
|
||||
|
||||
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub)
|
||||
trace:
|
||||
MCOUNT_SAVE_FRAME
|
||||
|
||||
movq 0x38(%rsp), %rdi
|
||||
movq RIP(%rsp), %rdi
|
||||
#ifdef CC_USING_FENTRY
|
||||
movq SS+16(%rsp), %rsi
|
||||
#else
|
||||
movq 8(%rbp), %rsi
|
||||
#endif
|
||||
subq $MCOUNT_INSN_SIZE, %rdi
|
||||
|
||||
call *ftrace_trace_function
|
||||
@@ -128,20 +230,22 @@ trace:
|
||||
MCOUNT_RESTORE_FRAME
|
||||
|
||||
jmp ftrace_stub
|
||||
END(mcount)
|
||||
END(function_hook)
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
ENTRY(ftrace_graph_caller)
|
||||
cmpl $0, function_trace_stop
|
||||
jne ftrace_stub
|
||||
|
||||
MCOUNT_SAVE_FRAME
|
||||
|
||||
#ifdef CC_USING_FENTRY
|
||||
leaq SS+16(%rsp), %rdi
|
||||
movq $0, %rdx /* No framepointers needed */
|
||||
#else
|
||||
leaq 8(%rbp), %rdi
|
||||
movq 0x38(%rsp), %rsi
|
||||
movq (%rbp), %rdx
|
||||
#endif
|
||||
movq RIP(%rsp), %rsi
|
||||
subq $MCOUNT_INSN_SIZE, %rsi
|
||||
|
||||
call prepare_ftrace_return
|
||||
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64)
|
||||
.macro SAVE_ARGS_IRQ
|
||||
cld
|
||||
/* start from rbp in pt_regs and jump over */
|
||||
movq_cfi rdi, RDI-RBP
|
||||
movq_cfi rsi, RSI-RBP
|
||||
movq_cfi rdx, RDX-RBP
|
||||
movq_cfi rcx, RCX-RBP
|
||||
movq_cfi rax, RAX-RBP
|
||||
movq_cfi r8, R8-RBP
|
||||
movq_cfi r9, R9-RBP
|
||||
movq_cfi r10, R10-RBP
|
||||
movq_cfi r11, R11-RBP
|
||||
movq_cfi rdi, (RDI-RBP)
|
||||
movq_cfi rsi, (RSI-RBP)
|
||||
movq_cfi rdx, (RDX-RBP)
|
||||
movq_cfi rcx, (RCX-RBP)
|
||||
movq_cfi rax, (RAX-RBP)
|
||||
movq_cfi r8, (R8-RBP)
|
||||
movq_cfi r9, (R9-RBP)
|
||||
movq_cfi r10, (R10-RBP)
|
||||
movq_cfi r11, (R11-RBP)
|
||||
|
||||
/* Save rbp so that we can unwind from get_irq_regs() */
|
||||
movq_cfi rbp, 0
|
||||
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64)
|
||||
.endm
|
||||
|
||||
ENTRY(save_rest)
|
||||
PARTIAL_FRAME 1 REST_SKIP+8
|
||||
PARTIAL_FRAME 1 (REST_SKIP+8)
|
||||
movq 5*8+16(%rsp), %r11 /* save return address */
|
||||
movq_cfi rbx, RBX+16
|
||||
movq_cfi rbp, RBP+16
|
||||
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork)
|
||||
|
||||
LOCK ; btr $TIF_FORK,TI_flags(%r8)
|
||||
|
||||
pushq_cfi kernel_eflags(%rip)
|
||||
pushq_cfi $0x0002
|
||||
popfq_cfi # reset kernel eflags
|
||||
|
||||
call schedule_tail # rdi: 'prev' task parameter
|
||||
@@ -450,7 +554,7 @@ ENTRY(ret_from_fork)
|
||||
RESTORE_REST
|
||||
|
||||
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
|
||||
jz retint_restore_args
|
||||
jz 1f
|
||||
|
||||
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
|
||||
jnz int_ret_from_sys_call
|
||||
@@ -458,6 +562,14 @@ ENTRY(ret_from_fork)
|
||||
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
|
||||
jmp ret_from_sys_call # go to the SYSRET fastpath
|
||||
|
||||
1:
|
||||
subq $REST_SKIP, %rsp # leave space for volatiles
|
||||
CFI_ADJUST_CFA_OFFSET REST_SKIP
|
||||
movq %rbp, %rdi
|
||||
call *%rbx
|
||||
movl $0, RAX(%rsp)
|
||||
RESTORE_REST
|
||||
jmp int_ret_from_sys_call
|
||||
CFI_ENDPROC
|
||||
END(ret_from_fork)
|
||||
|
||||
@@ -465,7 +577,8 @@ END(ret_from_fork)
|
||||
* System call entry. Up to 6 arguments in registers are supported.
|
||||
*
|
||||
* SYSCALL does not save anything on the stack and does not change the
|
||||
* stack pointer.
|
||||
* stack pointer. However, it does mask the flags register for us, so
|
||||
* CLD and CLAC are not needed.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -565,7 +678,7 @@ sysret_careful:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
jmp sysret_check
|
||||
|
||||
@@ -678,7 +791,7 @@ int_careful:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
@@ -757,7 +870,6 @@ ENTRY(stub_execve)
|
||||
PARTIAL_FRAME 0
|
||||
SAVE_REST
|
||||
FIXUP_TOP_OF_STACK %r11
|
||||
movq %rsp, %rcx
|
||||
call sys_execve
|
||||
RESTORE_TOP_OF_STACK %r11
|
||||
movq %rax,RAX(%rsp)
|
||||
@@ -807,8 +919,7 @@ ENTRY(stub_x32_execve)
|
||||
PARTIAL_FRAME 0
|
||||
SAVE_REST
|
||||
FIXUP_TOP_OF_STACK %r11
|
||||
movq %rsp, %rcx
|
||||
call sys32_execve
|
||||
call compat_sys_execve
|
||||
RESTORE_TOP_OF_STACK %r11
|
||||
movq %rax,RAX(%rsp)
|
||||
RESTORE_REST
|
||||
@@ -884,6 +995,7 @@ END(interrupt)
|
||||
*/
|
||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||
common_interrupt:
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
|
||||
interrupt do_IRQ
|
||||
@@ -974,7 +1086,7 @@ retint_careful:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
GET_THREAD_INFO(%rcx)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
@@ -1023,6 +1135,7 @@ END(common_interrupt)
|
||||
*/
|
||||
.macro apicinterrupt num sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
pushq_cfi $~(\num)
|
||||
.Lcommon_\sym:
|
||||
@@ -1077,6 +1190,7 @@ apicinterrupt IRQ_WORK_VECTOR \
|
||||
*/
|
||||
.macro zeroentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
@@ -1094,6 +1208,7 @@ END(\sym)
|
||||
|
||||
.macro paranoidzeroentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
@@ -1112,6 +1227,7 @@ END(\sym)
|
||||
#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
|
||||
.macro paranoidzeroentry_ist sym do_sym ist
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
INTR_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
@@ -1131,6 +1247,7 @@ END(\sym)
|
||||
|
||||
.macro errorentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
@@ -1149,6 +1266,7 @@ END(\sym)
|
||||
/* error code is on the stack already */
|
||||
.macro paranoiderrorentry sym do_sym
|
||||
ENTRY(\sym)
|
||||
ASM_CLAC
|
||||
XCPT_FRAME
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
subq $ORIG_RAX-R15, %rsp
|
||||
@@ -1206,52 +1324,6 @@ bad_gs:
|
||||
jmp 2b
|
||||
.previous
|
||||
|
||||
ENTRY(kernel_thread_helper)
|
||||
pushq $0 # fake return address
|
||||
CFI_STARTPROC
|
||||
/*
|
||||
* Here we are in the child and the registers are set as they were
|
||||
* at kernel_thread() invocation in the parent.
|
||||
*/
|
||||
call *%rsi
|
||||
# exit
|
||||
mov %eax, %edi
|
||||
call do_exit
|
||||
ud2 # padding for call trace
|
||||
CFI_ENDPROC
|
||||
END(kernel_thread_helper)
|
||||
|
||||
/*
|
||||
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
|
||||
*
|
||||
* C extern interface:
|
||||
* extern long execve(const char *name, char **argv, char **envp)
|
||||
*
|
||||
* asm input arguments:
|
||||
* rdi: name, rsi: argv, rdx: envp
|
||||
*
|
||||
* We want to fallback into:
|
||||
* extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
|
||||
*
|
||||
* do_sys_execve asm fallback arguments:
|
||||
* rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
|
||||
*/
|
||||
ENTRY(kernel_execve)
|
||||
CFI_STARTPROC
|
||||
FAKE_STACK_FRAME $0
|
||||
SAVE_ALL
|
||||
movq %rsp,%rcx
|
||||
call sys_execve
|
||||
movq %rax, RAX(%rsp)
|
||||
RESTORE_REST
|
||||
testq %rax,%rax
|
||||
je int_ret_from_sys_call
|
||||
RESTORE_ARGS
|
||||
UNFAKE_STACK_FRAME
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(kernel_execve)
|
||||
|
||||
/* Call softirq on interrupt stack. Interrupts are off. */
|
||||
ENTRY(call_softirq)
|
||||
CFI_STARTPROC
|
||||
@@ -1449,7 +1521,7 @@ paranoid_userspace:
|
||||
paranoid_schedule:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
jmp paranoid_userspace
|
||||
|
@@ -206,6 +206,21 @@ static int
|
||||
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
|
||||
unsigned const char *new_code);
|
||||
|
||||
/*
|
||||
* Should never be called:
|
||||
* As it is only called by __ftrace_replace_code() which is called by
|
||||
* ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
|
||||
* which is called to turn mcount into nops or nops into function calls
|
||||
* but not to convert a function from not using regs to one that uses
|
||||
* regs, which ftrace_modify_call() is for.
|
||||
*/
|
||||
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
||||
unsigned long addr)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int ftrace_update_ftrace_func(ftrace_func_t func)
|
||||
{
|
||||
unsigned long ip = (unsigned long)(&ftrace_call);
|
||||
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
|
||||
|
||||
ret = ftrace_modify_code(ip, old, new);
|
||||
|
||||
/* Also update the regs callback function */
|
||||
if (!ret) {
|
||||
ip = (unsigned long)(&ftrace_regs_call);
|
||||
memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
|
||||
new = ftrace_call_replace(ip, (unsigned long)func);
|
||||
ret = ftrace_modify_code(ip, old, new);
|
||||
}
|
||||
|
||||
atomic_dec(&modifying_ftrace_code);
|
||||
|
||||
return ret;
|
||||
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
|
||||
return add_break(rec->ip, old);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the record has the FTRACE_FL_REGS set, that means that it
|
||||
* wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
|
||||
* is not not set, then it wants to convert to the normal callback.
|
||||
*/
|
||||
static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
|
||||
{
|
||||
if (rec->flags & FTRACE_FL_REGS)
|
||||
return (unsigned long)FTRACE_REGS_ADDR;
|
||||
else
|
||||
return (unsigned long)FTRACE_ADDR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The FTRACE_FL_REGS_EN is set when the record already points to
|
||||
* a function that saves all the regs. Basically the '_EN' version
|
||||
* represents the current state of the function.
|
||||
*/
|
||||
static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
|
||||
{
|
||||
if (rec->flags & FTRACE_FL_REGS_EN)
|
||||
return (unsigned long)FTRACE_REGS_ADDR;
|
||||
else
|
||||
return (unsigned long)FTRACE_ADDR;
|
||||
}
|
||||
|
||||
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
|
||||
|
||||
ret = ftrace_test_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
ftrace_addr = get_ftrace_addr(rec);
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
|
||||
/* converting nop to call */
|
||||
return add_brk_on_nop(rec);
|
||||
|
||||
case FTRACE_UPDATE_MODIFY_CALL_REGS:
|
||||
case FTRACE_UPDATE_MODIFY_CALL:
|
||||
ftrace_addr = get_ftrace_old_addr(rec);
|
||||
/* fall through */
|
||||
case FTRACE_UPDATE_MAKE_NOP:
|
||||
/* converting a call to a nop */
|
||||
return add_brk_on_call(rec, ftrace_addr);
|
||||
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
|
||||
* If not, don't touch the breakpoint, we make just create
|
||||
* a disaster.
|
||||
*/
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
ftrace_addr = get_ftrace_addr(rec);
|
||||
nop = ftrace_call_replace(ip, ftrace_addr);
|
||||
|
||||
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
|
||||
goto update;
|
||||
|
||||
/* Check both ftrace_addr and ftrace_old_addr */
|
||||
ftrace_addr = get_ftrace_old_addr(rec);
|
||||
nop = ftrace_call_replace(ip, ftrace_addr);
|
||||
|
||||
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
update:
|
||||
return probe_kernel_write((void *)ip, &nop[0], 1);
|
||||
}
|
||||
|
||||
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
|
||||
|
||||
ret = ftrace_test_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
ftrace_addr = get_ftrace_addr(rec);
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
return 0;
|
||||
|
||||
case FTRACE_UPDATE_MODIFY_CALL_REGS:
|
||||
case FTRACE_UPDATE_MODIFY_CALL:
|
||||
case FTRACE_UPDATE_MAKE_CALL:
|
||||
/* converting nop to call */
|
||||
return add_update_call(rec, ftrace_addr);
|
||||
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
|
||||
|
||||
ret = ftrace_update_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
ftrace_addr = get_ftrace_addr(rec);
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
return 0;
|
||||
|
||||
case FTRACE_UPDATE_MODIFY_CALL_REGS:
|
||||
case FTRACE_UPDATE_MODIFY_CALL:
|
||||
case FTRACE_UPDATE_MAKE_CALL:
|
||||
/* converting nop to call */
|
||||
return finish_update_call(rec, ftrace_addr);
|
||||
|
@@ -287,27 +287,28 @@ ENTRY(startup_32_smp)
|
||||
leal -__PAGE_OFFSET(%ecx),%esp
|
||||
|
||||
default_entry:
|
||||
|
||||
/*
|
||||
* New page tables may be in 4Mbyte page mode and may
|
||||
* be using the global pages.
|
||||
*
|
||||
* NOTE! If we are on a 486 we may have no cr4 at all!
|
||||
* So we do not try to touch it unless we really have
|
||||
* some bits in it to set. This won't work if the BSP
|
||||
* implements cr4 but this AP does not -- very unlikely
|
||||
* but be warned! The same applies to the pse feature
|
||||
* if not equally supported. --macro
|
||||
*
|
||||
* NOTE! We have to correct for the fact that we're
|
||||
* not yet offset PAGE_OFFSET..
|
||||
* Specifically, cr4 exists if and only if CPUID exists,
|
||||
* which in turn exists if and only if EFLAGS.ID exists.
|
||||
*/
|
||||
#define cr4_bits pa(mmu_cr4_features)
|
||||
movl cr4_bits,%edx
|
||||
andl %edx,%edx
|
||||
jz 6f
|
||||
movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
|
||||
orl %edx,%eax
|
||||
movl $X86_EFLAGS_ID,%ecx
|
||||
pushl %ecx
|
||||
popfl
|
||||
pushfl
|
||||
popl %eax
|
||||
pushl $0
|
||||
popfl
|
||||
pushfl
|
||||
popl %edx
|
||||
xorl %edx,%eax
|
||||
testl %ecx,%eax
|
||||
jz 6f # No ID flag = no CPUID = no CR4
|
||||
|
||||
movl pa(mmu_cr4_features),%eax
|
||||
movl %eax,%cr4
|
||||
|
||||
testb $X86_CR4_PAE, %al # check if PAE is enabled
|
||||
|
@@ -19,24 +19,17 @@
|
||||
#include <asm/fpu-internal.h>
|
||||
#include <asm/user.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/sigcontext32.h>
|
||||
# include <asm/user32.h>
|
||||
#else
|
||||
# define save_i387_xstate_ia32 save_i387_xstate
|
||||
# define restore_i387_xstate_ia32 restore_i387_xstate
|
||||
# define _fpstate_ia32 _fpstate
|
||||
# define _xstate_ia32 _xstate
|
||||
# define sig_xstate_ia32_size sig_xstate_size
|
||||
# define fx_sw_reserved_ia32 fx_sw_reserved
|
||||
# define user_i387_ia32_struct user_i387_struct
|
||||
# define user32_fxsr_struct user_fxsr_struct
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Were we in an interrupt that interrupted kernel mode?
|
||||
*
|
||||
* We can do a kernel_fpu_begin/end() pair *ONLY* if that
|
||||
* For now, with eagerfpu we will return interrupted kernel FPU
|
||||
* state as not-idle. TBD: Ideally we can change the return value
|
||||
* to something like __thread_has_fpu(current). But we need to
|
||||
* be careful of doing __thread_clear_has_fpu() before saving
|
||||
* the FPU etc for supporting nested uses etc. For now, take
|
||||
* the simple route!
|
||||
*
|
||||
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
|
||||
* pair does nothing at all: the thread must not have fpu (so
|
||||
* that we don't try to save the FPU state), and TS must
|
||||
* be set (so that the clts/stts pair does nothing that is
|
||||
@@ -44,6 +37,9 @@
|
||||
*/
|
||||
static inline bool interrupted_kernel_fpu_idle(void)
|
||||
{
|
||||
if (use_eager_fpu())
|
||||
return 0;
|
||||
|
||||
return !__thread_has_fpu(current) &&
|
||||
(read_cr0() & X86_CR0_TS);
|
||||
}
|
||||
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
|
||||
}
|
||||
EXPORT_SYMBOL(irq_fpu_usable);
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
void __kernel_fpu_begin(void)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
|
||||
WARN_ON_ONCE(!irq_fpu_usable());
|
||||
preempt_disable();
|
||||
if (__thread_has_fpu(me)) {
|
||||
__save_init_fpu(me);
|
||||
__thread_clear_has_fpu(me);
|
||||
/* We do 'stts()' in kernel_fpu_end() */
|
||||
} else {
|
||||
/* We do 'stts()' in __kernel_fpu_end() */
|
||||
} else if (!use_eager_fpu()) {
|
||||
this_cpu_write(fpu_owner_task, NULL);
|
||||
clts();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_fpu_begin);
|
||||
EXPORT_SYMBOL(__kernel_fpu_begin);
|
||||
|
||||
void kernel_fpu_end(void)
|
||||
void __kernel_fpu_end(void)
|
||||
{
|
||||
stts();
|
||||
preempt_enable();
|
||||
if (use_eager_fpu())
|
||||
math_state_restore();
|
||||
else
|
||||
stts();
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_fpu_end);
|
||||
EXPORT_SYMBOL(__kernel_fpu_end);
|
||||
|
||||
void unlazy_fpu(struct task_struct *tsk)
|
||||
{
|
||||
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
|
||||
}
|
||||
EXPORT_SYMBOL(unlazy_fpu);
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
# define HAVE_HWFP (boot_cpu_data.hard_math)
|
||||
#else
|
||||
# define HAVE_HWFP 1
|
||||
#endif
|
||||
|
||||
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
|
||||
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
|
||||
unsigned int xstate_size;
|
||||
EXPORT_SYMBOL_GPL(xstate_size);
|
||||
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
|
||||
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
|
||||
|
||||
static void __cpuinit mxcsr_feature_mask_init(void)
|
||||
{
|
||||
unsigned long mask = 0;
|
||||
|
||||
clts();
|
||||
if (cpu_has_fxsr) {
|
||||
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
|
||||
asm volatile("fxsave %0" : : "m" (fx_scratch));
|
||||
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
|
||||
mask = 0x0000ffbf;
|
||||
}
|
||||
mxcsr_feature_mask &= mask;
|
||||
stts();
|
||||
}
|
||||
|
||||
static void __cpuinit init_thread_xstate(void)
|
||||
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
|
||||
init_thread_xstate();
|
||||
|
||||
mxcsr_feature_mask_init();
|
||||
/* clean state in init */
|
||||
current_thread_info()->status = 0;
|
||||
clear_used_math();
|
||||
xsave_init();
|
||||
eager_fpu_init();
|
||||
}
|
||||
|
||||
void fpu_finit(struct fpu *fpu)
|
||||
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
|
||||
}
|
||||
|
||||
if (cpu_has_fxsr) {
|
||||
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
|
||||
|
||||
memset(fx, 0, xstate_size);
|
||||
fx->cwd = 0x37f;
|
||||
if (cpu_has_xmm)
|
||||
fx->mxcsr = MXCSR_DEFAULT;
|
||||
fx_finit(&fpu->state->fxsave);
|
||||
} else {
|
||||
struct i387_fsave_struct *fp = &fpu->state->fsave;
|
||||
memset(fp, 0, xstate_size);
|
||||
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
|
||||
* FXSR floating point environment conversions.
|
||||
*/
|
||||
|
||||
static void
|
||||
void
|
||||
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
|
||||
{
|
||||
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
|
||||
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
|
||||
memcpy(&to[i], &from[i], sizeof(to[0]));
|
||||
}
|
||||
|
||||
static void convert_to_fxsr(struct task_struct *tsk,
|
||||
const struct user_i387_ia32_struct *env)
|
||||
void convert_to_fxsr(struct task_struct *tsk,
|
||||
const struct user_i387_ia32_struct *env)
|
||||
|
||||
{
|
||||
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
|
||||
@@ -588,223 +569,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Signal frame handlers.
|
||||
*/
|
||||
|
||||
static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
|
||||
|
||||
fp->status = fp->swd;
|
||||
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err = 0;
|
||||
|
||||
convert_from_fxsr(&env, tsk);
|
||||
if (__copy_to_user(buf, &env, sizeof(env)))
|
||||
return -1;
|
||||
|
||||
err |= __put_user(fx->swd, &buf->status);
|
||||
err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int save_i387_xsave(void __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct _fpstate_ia32 __user *fx = buf;
|
||||
int err = 0;
|
||||
|
||||
|
||||
sanitize_i387_state(tsk);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context.
|
||||
* This will enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware applications can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
if (save_i387_fxsave(fx) < 0)
|
||||
return -1;
|
||||
|
||||
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
err |= __put_user(FP_XSTATE_MAGIC2,
|
||||
(__u32 __user *) (buf + sig_xstate_ia32_size
|
||||
- FP_XSTATE_MAGIC2_SIZE));
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int save_i387_xstate_ia32(void __user *buf)
|
||||
{
|
||||
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (!used_math())
|
||||
return 0;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
|
||||
return -EACCES;
|
||||
/*
|
||||
* This will cause a "finit" to be triggered by the next
|
||||
* attempted FPU operation by the 'current' process.
|
||||
*/
|
||||
clear_used_math();
|
||||
|
||||
if (!HAVE_HWFP) {
|
||||
return fpregs_soft_get(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, fp) ? -1 : 1;
|
||||
}
|
||||
|
||||
unlazy_fpu(tsk);
|
||||
|
||||
if (cpu_has_xsave)
|
||||
return save_i387_xsave(fp);
|
||||
if (cpu_has_fxsr)
|
||||
return save_i387_fxsave(fp);
|
||||
else
|
||||
return save_i387_fsave(fp);
|
||||
}
|
||||
|
||||
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
|
||||
sizeof(struct i387_fsave_struct));
|
||||
}
|
||||
|
||||
static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
|
||||
unsigned int size)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err;
|
||||
|
||||
err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
|
||||
size);
|
||||
/* mxcsr reserved bits must be masked to zero for security reasons */
|
||||
tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
|
||||
if (err || __copy_from_user(&env, buf, sizeof(env)))
|
||||
return 1;
|
||||
convert_to_fxsr(tsk, &env);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int restore_i387_xsave(void __user *buf)
|
||||
{
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
struct _fpstate_ia32 __user *fx_user =
|
||||
((struct _fpstate_ia32 __user *) buf);
|
||||
struct i387_fxsave_struct __user *fx =
|
||||
(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
|
||||
struct xsave_hdr_struct *xsave_hdr =
|
||||
¤t->thread.fpu.state->xsave.xsave_hdr;
|
||||
u64 mask;
|
||||
int err;
|
||||
|
||||
if (check_for_xstate(fx, buf, &fx_sw_user))
|
||||
goto fx_only;
|
||||
|
||||
mask = fx_sw_user.xstate_bv;
|
||||
|
||||
err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
|
||||
|
||||
xsave_hdr->xstate_bv &= pcntxt_mask;
|
||||
/*
|
||||
* These bits must be zero.
|
||||
*/
|
||||
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
|
||||
|
||||
/*
|
||||
* Init the state that is not present in the memory layout
|
||||
* and enabled by the OS.
|
||||
*/
|
||||
mask = ~(pcntxt_mask & ~mask);
|
||||
xsave_hdr->xstate_bv &= mask;
|
||||
|
||||
return err;
|
||||
fx_only:
|
||||
/*
|
||||
* Couldn't find the extended state information in the memory
|
||||
* layout. Restore the FP/SSE and init the other extended state
|
||||
* enabled by the OS.
|
||||
*/
|
||||
xsave_hdr->xstate_bv = XSTATE_FPSSE;
|
||||
return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
|
||||
}
|
||||
|
||||
int restore_i387_xstate_ia32(void __user *buf)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *tsk = current;
|
||||
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
|
||||
|
||||
if (HAVE_HWFP)
|
||||
clear_fpu(tsk);
|
||||
|
||||
if (!buf) {
|
||||
if (used_math()) {
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
}
|
||||
|
||||
return 0;
|
||||
} else
|
||||
if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math()) {
|
||||
err = init_fpu(tsk);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (HAVE_HWFP) {
|
||||
if (cpu_has_xsave)
|
||||
err = restore_i387_xsave(buf);
|
||||
else if (cpu_has_fxsr)
|
||||
err = restore_i387_fxsave(fp, sizeof(struct
|
||||
i387_fxsave_struct));
|
||||
else
|
||||
err = restore_i387_fsave(fp);
|
||||
} else {
|
||||
err = fpregs_soft_set(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, fp) != 0;
|
||||
}
|
||||
set_used_math();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* FPU state for core dumps.
|
||||
* This is only used for a.out dumps now.
|
||||
|
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
|
||||
* out of.
|
||||
*/
|
||||
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
|
||||
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
|
||||
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
|
||||
}
|
||||
|
||||
static struct syscore_ops i8259_syscore_ops = {
|
||||
|
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
||||
seq_printf(p, " Rescheduling interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "CAL");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
|
||||
irq_stats(j)->irq_tlb_count);
|
||||
seq_printf(p, " Function call interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "TLB");
|
||||
for_each_online_cpu(j)
|
||||
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
||||
#ifdef CONFIG_SMP
|
||||
sum += irq_stats(cpu)->irq_resched_count;
|
||||
sum += irq_stats(cpu)->irq_call_count;
|
||||
sum += irq_stats(cpu)->irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
sum += irq_stats(cpu)->irq_thermal_count;
|
||||
|
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
|
||||
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
|
||||
{
|
||||
int err;
|
||||
#ifdef CONFIG_DEBUG_RODATA
|
||||
char opc[BREAK_INSTR_SIZE];
|
||||
#endif /* CONFIG_DEBUG_RODATA */
|
||||
|
||||
bpt->type = BP_BREAKPOINT;
|
||||
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
|
||||
|
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef KPROBES_CAN_USE_FTRACE
|
||||
static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
|
||||
struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
/*
|
||||
* Emulate singlestep (and also recover regs->ip)
|
||||
* as if there is a 5byte nop
|
||||
*/
|
||||
regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
|
||||
if (unlikely(p->post_handler)) {
|
||||
kcb->kprobe_status = KPROBE_HIT_SSDONE;
|
||||
p->post_handler(p, regs, 0);
|
||||
}
|
||||
__this_cpu_write(current_kprobe, NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
|
||||
* remain disabled throughout this function.
|
||||
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
||||
} else if (kprobe_running()) {
|
||||
p = __this_cpu_read(current_kprobe);
|
||||
if (p->break_handler && p->break_handler(p, regs)) {
|
||||
#ifdef KPROBES_CAN_USE_FTRACE
|
||||
if (kprobe_ftrace(p)) {
|
||||
skip_singlestep(p, regs, kcb);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
setup_singlestep(p, regs, kcb, 0);
|
||||
return 1;
|
||||
}
|
||||
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef KPROBES_CAN_USE_FTRACE
|
||||
/* Ftrace callback handler for kprobes */
|
||||
void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *ops, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *p;
|
||||
struct kprobe_ctlblk *kcb;
|
||||
unsigned long flags;
|
||||
|
||||
/* Disable irq for emulating a breakpoint and avoiding preempt */
|
||||
local_irq_save(flags);
|
||||
|
||||
p = get_kprobe((kprobe_opcode_t *)ip);
|
||||
if (unlikely(!p) || kprobe_disabled(p))
|
||||
goto end;
|
||||
|
||||
kcb = get_kprobe_ctlblk();
|
||||
if (kprobe_running()) {
|
||||
kprobes_inc_nmissed_count(p);
|
||||
} else {
|
||||
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
|
||||
regs->ip = ip + sizeof(kprobe_opcode_t);
|
||||
|
||||
__this_cpu_write(current_kprobe, p);
|
||||
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
if (!p->pre_handler || !p->pre_handler(p, regs))
|
||||
skip_singlestep(p, regs, kcb);
|
||||
/*
|
||||
* If pre_handler returns !0, it sets regs->ip and
|
||||
* resets current kprobe.
|
||||
*/
|
||||
}
|
||||
end:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
|
||||
{
|
||||
p->ainsn.insn = NULL;
|
||||
p->ainsn.boostable = -1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int __init arch_init_kprobes(void)
|
||||
{
|
||||
return arch_init_optprobes();
|
||||
|
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
||||
kvm_pv_disable_apf();
|
||||
kvm_disable_steal_time();
|
||||
}
|
||||
|
||||
static int kvm_pv_reboot_notify(struct notifier_block *nb,
|
||||
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void)
|
||||
#ifdef CONFIG_SMP
|
||||
static void __init kvm_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
#ifdef CONFIG_KVM_CLOCK
|
||||
WARN_ON(kvm_register_clock("primary cpu clock"));
|
||||
#endif
|
||||
kvm_guest_cpu_init();
|
||||
native_smp_prepare_boot_cpu();
|
||||
}
|
||||
|
@@ -75,20 +75,113 @@ struct microcode_amd {
|
||||
|
||||
static struct equiv_cpu_entry *equiv_cpu_table;
|
||||
|
||||
/* page-sized ucode patch buffer */
|
||||
void *patch;
|
||||
struct ucode_patch {
|
||||
struct list_head plist;
|
||||
void *data;
|
||||
u32 patch_id;
|
||||
u16 equiv_cpu;
|
||||
};
|
||||
|
||||
static LIST_HEAD(pcache);
|
||||
|
||||
static u16 find_equiv_id(unsigned int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
int i = 0;
|
||||
|
||||
if (!equiv_cpu_table)
|
||||
return 0;
|
||||
|
||||
while (equiv_cpu_table[i].installed_cpu != 0) {
|
||||
if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
|
||||
return equiv_cpu_table[i].equiv_cpu;
|
||||
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
BUG_ON(!equiv_cpu_table);
|
||||
|
||||
while (equiv_cpu_table[i].equiv_cpu != 0) {
|
||||
if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
|
||||
return equiv_cpu_table[i].installed_cpu;
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* a small, trivial cache of per-family ucode patches
|
||||
*/
|
||||
static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
|
||||
{
|
||||
struct ucode_patch *p;
|
||||
|
||||
list_for_each_entry(p, &pcache, plist)
|
||||
if (p->equiv_cpu == equiv_cpu)
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void update_cache(struct ucode_patch *new_patch)
|
||||
{
|
||||
struct ucode_patch *p;
|
||||
|
||||
list_for_each_entry(p, &pcache, plist) {
|
||||
if (p->equiv_cpu == new_patch->equiv_cpu) {
|
||||
if (p->patch_id >= new_patch->patch_id)
|
||||
/* we already have the latest patch */
|
||||
return;
|
||||
|
||||
list_replace(&p->plist, &new_patch->plist);
|
||||
kfree(p->data);
|
||||
kfree(p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* no patch found, add it */
|
||||
list_add_tail(&new_patch->plist, &pcache);
|
||||
}
|
||||
|
||||
static void free_cache(void)
|
||||
{
|
||||
struct ucode_patch *p, *tmp;
|
||||
|
||||
list_for_each_entry_safe(p, tmp, &pcache, plist) {
|
||||
__list_del(p->plist.prev, p->plist.next);
|
||||
kfree(p->data);
|
||||
kfree(p);
|
||||
}
|
||||
}
|
||||
|
||||
static struct ucode_patch *find_patch(unsigned int cpu)
|
||||
{
|
||||
u16 equiv_id;
|
||||
|
||||
equiv_id = find_equiv_id(cpu);
|
||||
if (!equiv_id)
|
||||
return NULL;
|
||||
|
||||
return cache_find_patch(equiv_id);
|
||||
}
|
||||
|
||||
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
csig->sig = cpuid_eax(0x00000001);
|
||||
csig->rev = c->microcode;
|
||||
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int verify_ucode_size(int cpu, u32 patch_size,
|
||||
static unsigned int verify_patch_size(int cpu, u32 patch_size,
|
||||
unsigned int size)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
|
||||
return patch_size;
|
||||
}
|
||||
|
||||
static u16 find_equiv_id(void)
|
||||
{
|
||||
unsigned int current_cpu_id, i = 0;
|
||||
|
||||
BUG_ON(equiv_cpu_table == NULL);
|
||||
|
||||
current_cpu_id = cpuid_eax(0x00000001);
|
||||
|
||||
while (equiv_cpu_table[i].installed_cpu != 0) {
|
||||
if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
|
||||
return equiv_cpu_table[i].equiv_cpu;
|
||||
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* we signal a good patch is found by returning its size > 0
|
||||
*/
|
||||
static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
|
||||
unsigned int leftover_size, int rev,
|
||||
unsigned int *current_size)
|
||||
{
|
||||
struct microcode_header_amd *mc_hdr;
|
||||
unsigned int actual_size, patch_size;
|
||||
u16 equiv_cpu_id;
|
||||
|
||||
/* size of the current patch we're staring at */
|
||||
patch_size = *(u32 *)(ucode_ptr + 4);
|
||||
*current_size = patch_size + SECTION_HDR_SIZE;
|
||||
|
||||
equiv_cpu_id = find_equiv_id();
|
||||
if (!equiv_cpu_id)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* let's look at the patch header itself now
|
||||
*/
|
||||
mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
|
||||
|
||||
if (mc_hdr->processor_rev_id != equiv_cpu_id)
|
||||
return 0;
|
||||
|
||||
/* ucode might be chipset specific -- currently we don't support this */
|
||||
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
|
||||
pr_err("CPU%d: chipset specific code not yet supported\n",
|
||||
cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mc_hdr->patch_id <= rev)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* now that the header looks sane, verify its size
|
||||
*/
|
||||
actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
|
||||
if (!actual_size)
|
||||
return 0;
|
||||
|
||||
/* clear the patch buffer */
|
||||
memset(patch, 0, PAGE_SIZE);
|
||||
|
||||
/* all looks ok, get the binary patch */
|
||||
get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
|
||||
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
static int apply_microcode_amd(int cpu)
|
||||
{
|
||||
u32 rev, dummy;
|
||||
int cpu_num = raw_smp_processor_id();
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
|
||||
struct microcode_amd *mc_amd = uci->mc;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct microcode_amd *mc_amd;
|
||||
struct ucode_cpu_info *uci;
|
||||
struct ucode_patch *p;
|
||||
u32 rev, dummy;
|
||||
|
||||
/* We should bind the task to the CPU */
|
||||
BUG_ON(cpu_num != cpu);
|
||||
BUG_ON(raw_smp_processor_id() != cpu);
|
||||
|
||||
if (mc_amd == NULL)
|
||||
uci = ucode_cpu_info + cpu;
|
||||
|
||||
p = find_patch(cpu);
|
||||
if (!p)
|
||||
return 0;
|
||||
|
||||
wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
|
||||
/* get patch id after patching */
|
||||
mc_amd = p->data;
|
||||
uci->mc = p->data;
|
||||
|
||||
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
|
||||
|
||||
/* check current patch id and patch's id for match */
|
||||
/* need to apply patch? */
|
||||
if (rev >= mc_amd->hdr.patch_id) {
|
||||
c->microcode = rev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
|
||||
|
||||
/* verify patch application was successful */
|
||||
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
|
||||
if (rev != mc_amd->hdr.patch_id) {
|
||||
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
|
||||
cpu, mc_amd->hdr.patch_id);
|
||||
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
|
||||
memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
|
||||
|
||||
/* add header length */
|
||||
return size + CONTAINER_HDR_SZ;
|
||||
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void)
|
||||
equiv_cpu_table = NULL;
|
||||
}
|
||||
|
||||
static enum ucode_state
|
||||
generic_load_microcode(int cpu, const u8 *data, size_t size)
|
||||
static void cleanup(void)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
struct microcode_header_amd *mc_hdr = NULL;
|
||||
unsigned int mc_size, leftover, current_size = 0;
|
||||
int offset;
|
||||
const u8 *ucode_ptr = data;
|
||||
void *new_mc = NULL;
|
||||
unsigned int new_rev = uci->cpu_sig.rev;
|
||||
enum ucode_state state = UCODE_ERROR;
|
||||
free_equiv_cpu_table();
|
||||
free_cache();
|
||||
}
|
||||
|
||||
offset = install_equiv_cpu_table(ucode_ptr);
|
||||
/*
|
||||
* We return the current size even if some of the checks failed so that
|
||||
* we can skip over the next patch. If we return a negative value, we
|
||||
* signal a grave error like a memory allocation has failed and the
|
||||
* driver cannot continue functioning normally. In such cases, we tear
|
||||
* down everything we've used up so far and exit.
|
||||
*/
|
||||
static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct microcode_header_amd *mc_hdr;
|
||||
struct ucode_patch *patch;
|
||||
unsigned int patch_size, crnt_size, ret;
|
||||
u32 proc_fam;
|
||||
u16 proc_id;
|
||||
|
||||
patch_size = *(u32 *)(fw + 4);
|
||||
crnt_size = patch_size + SECTION_HDR_SIZE;
|
||||
mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
|
||||
proc_id = mc_hdr->processor_rev_id;
|
||||
|
||||
proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
|
||||
if (!proc_fam) {
|
||||
pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
/* check if patch is for the current family */
|
||||
proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
|
||||
if (proc_fam != c->x86)
|
||||
return crnt_size;
|
||||
|
||||
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
|
||||
pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
|
||||
mc_hdr->patch_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
ret = verify_patch_size(cpu, patch_size, leftover);
|
||||
if (!ret) {
|
||||
pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
patch = kzalloc(sizeof(*patch), GFP_KERNEL);
|
||||
if (!patch) {
|
||||
pr_err("Patch allocation failure.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
patch->data = kzalloc(patch_size, GFP_KERNEL);
|
||||
if (!patch->data) {
|
||||
pr_err("Patch data allocation failure.\n");
|
||||
kfree(patch);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* All looks ok, copy patch... */
|
||||
memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
|
||||
INIT_LIST_HEAD(&patch->plist);
|
||||
patch->patch_id = mc_hdr->patch_id;
|
||||
patch->equiv_cpu = proc_id;
|
||||
|
||||
/* ... and add to cache. */
|
||||
update_cache(patch);
|
||||
|
||||
return crnt_size;
|
||||
}
|
||||
|
||||
static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
|
||||
{
|
||||
enum ucode_state ret = UCODE_ERROR;
|
||||
unsigned int leftover;
|
||||
u8 *fw = (u8 *)data;
|
||||
int crnt_size = 0;
|
||||
int offset;
|
||||
|
||||
offset = install_equiv_cpu_table(data);
|
||||
if (offset < 0) {
|
||||
pr_err("failed to create equivalent cpu table\n");
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
ucode_ptr += offset;
|
||||
fw += offset;
|
||||
leftover = size - offset;
|
||||
|
||||
if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
|
||||
if (*(u32 *)fw != UCODE_UCODE_TYPE) {
|
||||
pr_err("invalid type field in container file section header\n");
|
||||
goto free_table;
|
||||
free_equiv_cpu_table();
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (leftover) {
|
||||
mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
|
||||
new_rev, ¤t_size);
|
||||
if (mc_size) {
|
||||
mc_hdr = patch;
|
||||
new_mc = patch;
|
||||
new_rev = mc_hdr->patch_id;
|
||||
goto out_ok;
|
||||
}
|
||||
crnt_size = verify_and_add_patch(cpu, fw, leftover);
|
||||
if (crnt_size < 0)
|
||||
return ret;
|
||||
|
||||
ucode_ptr += current_size;
|
||||
leftover -= current_size;
|
||||
fw += crnt_size;
|
||||
leftover -= crnt_size;
|
||||
}
|
||||
|
||||
if (!new_mc) {
|
||||
state = UCODE_NFOUND;
|
||||
goto free_table;
|
||||
}
|
||||
|
||||
out_ok:
|
||||
uci->mc = new_mc;
|
||||
state = UCODE_OK;
|
||||
pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
|
||||
cpu, uci->cpu_sig.rev, new_rev);
|
||||
|
||||
free_table:
|
||||
free_equiv_cpu_table();
|
||||
|
||||
out:
|
||||
return state;
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -315,7 +402,7 @@ out:
|
||||
*
|
||||
* This legacy file is always smaller than 2K in size.
|
||||
*
|
||||
* Starting at family 15h they are in family specific firmware files:
|
||||
* Beginning with family 15h, they are in family-specific firmware files:
|
||||
*
|
||||
* amd-ucode/microcode_amd_fam15h.bin
|
||||
* amd-ucode/microcode_amd_fam16h.bin
|
||||
@@ -323,12 +410,17 @@ out:
|
||||
*
|
||||
* These might be larger than 2K.
|
||||
*/
|
||||
static enum ucode_state request_microcode_amd(int cpu, struct device *device)
|
||||
static enum ucode_state request_microcode_amd(int cpu, struct device *device,
|
||||
bool refresh_fw)
|
||||
{
|
||||
char fw_name[36] = "amd-ucode/microcode_amd.bin";
|
||||
const struct firmware *fw;
|
||||
enum ucode_state ret = UCODE_NFOUND;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
enum ucode_state ret = UCODE_NFOUND;
|
||||
const struct firmware *fw;
|
||||
|
||||
/* reload ucode container only on the boot cpu */
|
||||
if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
|
||||
return UCODE_OK;
|
||||
|
||||
if (c->x86 >= 0x15)
|
||||
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
|
||||
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
|
||||
goto fw_release;
|
||||
}
|
||||
|
||||
ret = generic_load_microcode(cpu, fw->data, fw->size);
|
||||
/* free old equiv table */
|
||||
free_equiv_cpu_table();
|
||||
|
||||
fw_release:
|
||||
ret = load_microcode_amd(cpu, fw->data, fw->size);
|
||||
if (ret != UCODE_OK)
|
||||
cleanup();
|
||||
|
||||
fw_release:
|
||||
release_firmware(fw);
|
||||
|
||||
out:
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
patch = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!patch)
|
||||
return NULL;
|
||||
|
||||
return µcode_amd_ops;
|
||||
}
|
||||
|
||||
void __exit exit_amd_microcode(void)
|
||||
{
|
||||
free_page((unsigned long)patch);
|
||||
cleanup();
|
||||
}
|
||||
|
@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
|
||||
if (do_microcode_update(buf, len) == 0)
|
||||
ret = (ssize_t)len;
|
||||
|
||||
if (ret > 0)
|
||||
perf_check_microcode();
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
@@ -276,19 +279,18 @@ static struct platform_device *microcode_pdev;
|
||||
static int reload_for_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
enum ucode_state ustate;
|
||||
int err = 0;
|
||||
|
||||
if (uci->valid) {
|
||||
enum ucode_state ustate;
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev);
|
||||
if (ustate == UCODE_OK)
|
||||
apply_microcode_on_target(cpu);
|
||||
else
|
||||
if (ustate == UCODE_ERROR)
|
||||
err = -EINVAL;
|
||||
}
|
||||
if (!uci->valid)
|
||||
return err;
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true);
|
||||
if (ustate == UCODE_OK)
|
||||
apply_microcode_on_target(cpu);
|
||||
else
|
||||
if (ustate == UCODE_ERROR)
|
||||
err = -EINVAL;
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -370,18 +372,15 @@ static void microcode_fini_cpu(int cpu)
|
||||
|
||||
static enum ucode_state microcode_resume_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
|
||||
if (!uci->mc)
|
||||
return UCODE_NFOUND;
|
||||
|
||||
pr_debug("CPU%d updated upon resume\n", cpu);
|
||||
apply_microcode_on_target(cpu);
|
||||
|
||||
if (apply_microcode_on_target(cpu))
|
||||
return UCODE_ERROR;
|
||||
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
static enum ucode_state microcode_init_cpu(int cpu)
|
||||
static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
|
||||
{
|
||||
enum ucode_state ustate;
|
||||
|
||||
@@ -392,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
|
||||
if (system_state != SYSTEM_RUNNING)
|
||||
return UCODE_NFOUND;
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev);
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev,
|
||||
refresh_fw);
|
||||
|
||||
if (ustate == UCODE_OK) {
|
||||
pr_debug("CPU%d updated upon init\n", cpu);
|
||||
@@ -405,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
|
||||
static enum ucode_state microcode_update_cpu(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
enum ucode_state ustate;
|
||||
|
||||
if (uci->valid)
|
||||
ustate = microcode_resume_cpu(cpu);
|
||||
else
|
||||
ustate = microcode_init_cpu(cpu);
|
||||
return microcode_resume_cpu(cpu);
|
||||
|
||||
return ustate;
|
||||
return microcode_init_cpu(cpu, false);
|
||||
}
|
||||
|
||||
static int mc_device_add(struct device *dev, struct subsys_interface *sif)
|
||||
@@ -428,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (microcode_init_cpu(cpu) == UCODE_ERROR)
|
||||
if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
|
||||
return -EINVAL;
|
||||
|
||||
return err;
|
||||
@@ -477,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
|
||||
struct device *dev;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
switch (action) {
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
microcode_update_cpu(cpu);
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
pr_debug("CPU%d added\n", cpu);
|
||||
/*
|
||||
* "break" is missing on purpose here because we want to fall
|
||||
* through in order to create the sysfs group.
|
||||
*/
|
||||
|
||||
case CPU_DOWN_FAILED:
|
||||
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
|
||||
pr_err("Failed to create group for CPU%d\n", cpu);
|
||||
break;
|
||||
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
/* Suspend is in progress, only remove the interface */
|
||||
sysfs_remove_group(&dev->kobj, &mc_attr_group);
|
||||
pr_debug("CPU%d removed\n", cpu);
|
||||
break;
|
||||
|
||||
/*
|
||||
* case CPU_DEAD:
|
||||
*
|
||||
* When a CPU goes offline, don't free up or invalidate the copy of
|
||||
* the microcode in kernel memory, so that we can reuse it when the
|
||||
* CPU comes back online without unnecessarily requesting the userspace
|
||||
* for it again.
|
||||
*/
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
/* The CPU refused to come up during a system resume */
|
||||
microcode_fini_cpu(cpu);
|
||||
break;
|
||||
}
|
||||
|
||||
/* The CPU refused to come up during a system resume */
|
||||
if (action == CPU_UP_CANCELED_FROZEN)
|
||||
microcode_fini_cpu(cpu);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum ucode_state request_microcode_fw(int cpu, struct device *device)
|
||||
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
|
||||
bool refresh_fw)
|
||||
{
|
||||
char name[30];
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
@@ -257,12 +257,14 @@ static int __init msr_init(void)
|
||||
goto out_chrdev;
|
||||
}
|
||||
msr_class->devnode = msr_devnode;
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(i) {
|
||||
err = msr_device_create(i);
|
||||
if (err != 0)
|
||||
goto out_class;
|
||||
}
|
||||
register_hotcpu_notifier(&msr_class_cpu_notifier);
|
||||
put_online_cpus();
|
||||
|
||||
err = 0;
|
||||
goto out;
|
||||
@@ -271,6 +273,7 @@ out_class:
|
||||
i = 0;
|
||||
for_each_online_cpu(i)
|
||||
msr_device_destroy(i);
|
||||
put_online_cpus();
|
||||
class_destroy(msr_class);
|
||||
out_chrdev:
|
||||
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
||||
@@ -281,11 +284,13 @@ out:
|
||||
static void __exit msr_exit(void)
|
||||
{
|
||||
int cpu = 0;
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu)
|
||||
msr_device_destroy(cpu);
|
||||
class_destroy(msr_class);
|
||||
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
|
||||
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
module_init(msr_init);
|
||||
|
105
arch/x86/kernel/perf_regs.c
Normal file
105
arch/x86/kernel/perf_regs.c
Normal file
@@ -0,0 +1,105 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <asm/perf_regs.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
|
||||
#else
|
||||
#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
|
||||
#endif
|
||||
|
||||
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
|
||||
|
||||
static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
|
||||
PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_SI, si),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_DI, di),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
|
||||
#ifdef CONFIG_X86_32
|
||||
PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_ES, es),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
|
||||
#else
|
||||
/*
|
||||
* The pt_regs struct does not store
|
||||
* ds, es, fs, gs in 64 bit mode.
|
||||
*/
|
||||
(unsigned int) -1,
|
||||
(unsigned int) -1,
|
||||
(unsigned int) -1,
|
||||
(unsigned int) -1,
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
|
||||
PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
|
||||
#endif
|
||||
};
|
||||
|
||||
u64 perf_reg_value(struct pt_regs *regs, int idx)
|
||||
{
|
||||
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
|
||||
return 0;
|
||||
|
||||
return regs_get_register(regs, pt_regs_offset[idx]);
|
||||
}
|
||||
|
||||
#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
int perf_reg_validate(u64 mask)
|
||||
{
|
||||
if (!mask || mask & REG_RESERVED)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 perf_reg_abi(struct task_struct *task)
|
||||
{
|
||||
return PERF_SAMPLE_REGS_ABI_32;
|
||||
}
|
||||
#else /* CONFIG_X86_64 */
|
||||
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
|
||||
(1ULL << PERF_REG_X86_ES) | \
|
||||
(1ULL << PERF_REG_X86_FS) | \
|
||||
(1ULL << PERF_REG_X86_GS))
|
||||
|
||||
int perf_reg_validate(u64 mask)
|
||||
{
|
||||
if (!mask || mask & REG_RESERVED)
|
||||
return -EINVAL;
|
||||
|
||||
if (mask & REG_NOSUPPORT)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 perf_reg_abi(struct task_struct *task)
|
||||
{
|
||||
if (test_tsk_thread_flag(task, TIF_IA32))
|
||||
return PERF_SAMPLE_REGS_ABI_32;
|
||||
else
|
||||
return PERF_SAMPLE_REGS_ABI_64;
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
|
||||
return oprom;
|
||||
}
|
||||
|
||||
void *pci_map_biosrom(struct pci_dev *pdev)
|
||||
void __iomem *pci_map_biosrom(struct pci_dev *pdev)
|
||||
{
|
||||
struct resource *oprom = find_oprom(pdev);
|
||||
|
||||
|
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
int ret;
|
||||
|
||||
unlazy_fpu(src);
|
||||
|
||||
*dst = *src;
|
||||
if (fpu_allocated(&src->thread.fpu)) {
|
||||
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
|
||||
ret = fpu_alloc(&dst->thread.fpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
fpu_copy(&dst->thread.fpu, &src->thread.fpu);
|
||||
fpu_copy(dst, src);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
|
||||
SLAB_PANIC | SLAB_NOTRACK, NULL);
|
||||
}
|
||||
|
||||
static inline void drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Forget coprocessor state..
|
||||
*/
|
||||
tsk->fpu_counter = 0;
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
}
|
||||
|
||||
/*
|
||||
* Free current thread data structures etc..
|
||||
*/
|
||||
@@ -163,7 +151,13 @@ void flush_thread(void)
|
||||
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
drop_fpu(tsk);
|
||||
drop_init_fpu(tsk);
|
||||
/*
|
||||
* Free the FPU state for non xsave platforms. They get reallocated
|
||||
* lazily at the first use.
|
||||
*/
|
||||
if (!use_eager_fpu())
|
||||
free_thread_xstate(tsk);
|
||||
}
|
||||
|
||||
static void hard_disable_TSC(void)
|
||||
@@ -298,71 +292,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
|
||||
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* This gets run with %si containing the
|
||||
* function to call, and %di containing
|
||||
* the "args".
|
||||
*/
|
||||
extern void kernel_thread_helper(void);
|
||||
|
||||
/*
|
||||
* Create a kernel thread
|
||||
*/
|
||||
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
|
||||
memset(®s, 0, sizeof(regs));
|
||||
|
||||
regs.si = (unsigned long) fn;
|
||||
regs.di = (unsigned long) arg;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
regs.ds = __USER_DS;
|
||||
regs.es = __USER_DS;
|
||||
regs.fs = __KERNEL_PERCPU;
|
||||
regs.gs = __KERNEL_STACK_CANARY;
|
||||
#else
|
||||
regs.ss = __KERNEL_DS;
|
||||
#endif
|
||||
|
||||
regs.orig_ax = -1;
|
||||
regs.ip = (unsigned long) kernel_thread_helper;
|
||||
regs.cs = __KERNEL_CS | get_kernel_rpl();
|
||||
regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
|
||||
|
||||
/* Ok, create the new process.. */
|
||||
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(kernel_thread);
|
||||
|
||||
/*
|
||||
* sys_execve() executes a new program.
|
||||
*/
|
||||
long sys_execve(const char __user *name,
|
||||
const char __user *const __user *argv,
|
||||
const char __user *const __user *envp, struct pt_regs *regs)
|
||||
{
|
||||
long error;
|
||||
char *filename;
|
||||
|
||||
filename = getname(name);
|
||||
error = PTR_ERR(filename);
|
||||
if (IS_ERR(filename))
|
||||
return error;
|
||||
error = do_execve(filename, argv, envp, regs);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (error == 0) {
|
||||
/* Make sure we don't return using sysenter.. */
|
||||
set_thread_flag(TIF_IRET);
|
||||
}
|
||||
#endif
|
||||
|
||||
putname(filename);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Idle related variables and functions
|
||||
*/
|
||||
|
@@ -57,6 +57,7 @@
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
||||
|
||||
/*
|
||||
* Return saved PC of a blocked thread.
|
||||
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
|
||||
}
|
||||
|
||||
int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
unsigned long unused,
|
||||
unsigned long arg,
|
||||
struct task_struct *p, struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *childregs;
|
||||
struct pt_regs *childregs = task_pt_regs(p);
|
||||
struct task_struct *tsk;
|
||||
int err;
|
||||
|
||||
childregs = task_pt_regs(p);
|
||||
*childregs = *regs;
|
||||
childregs->ax = 0;
|
||||
childregs->sp = sp;
|
||||
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||
|
||||
p->thread.ip = (unsigned long) ret_from_fork;
|
||||
if (unlikely(!regs)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
p->thread.ip = (unsigned long) ret_from_kernel_thread;
|
||||
task_user_gs(p) = __KERNEL_STACK_CANARY;
|
||||
childregs->ds = __USER_DS;
|
||||
childregs->es = __USER_DS;
|
||||
childregs->fs = __KERNEL_PERCPU;
|
||||
childregs->bx = sp; /* function */
|
||||
childregs->bp = arg;
|
||||
childregs->orig_ax = -1;
|
||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
|
||||
p->fpu_counter = 0;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
return 0;
|
||||
}
|
||||
*childregs = *regs;
|
||||
childregs->ax = 0;
|
||||
childregs->sp = sp;
|
||||
|
||||
p->thread.ip = (unsigned long) ret_from_fork;
|
||||
task_user_gs(p) = get_user_gs(regs);
|
||||
|
||||
p->fpu_counter = 0;
|
||||
@@ -190,10 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
||||
regs->cs = __USER_CS;
|
||||
regs->ip = new_ip;
|
||||
regs->sp = new_sp;
|
||||
regs->flags = X86_EFLAGS_IF;
|
||||
/*
|
||||
* Free the old FP and other extended state
|
||||
* force it to the iret return path by making it look as if there was
|
||||
* some work pending.
|
||||
*/
|
||||
free_thread_xstate(current);
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(start_thread);
|
||||
|
||||
|
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
|
||||
}
|
||||
|
||||
int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
unsigned long unused,
|
||||
unsigned long arg,
|
||||
struct task_struct *p, struct pt_regs *regs)
|
||||
{
|
||||
int err;
|
||||
struct pt_regs *childregs;
|
||||
struct task_struct *me = current;
|
||||
|
||||
childregs = ((struct pt_regs *)
|
||||
(THREAD_SIZE + task_stack_page(p))) - 1;
|
||||
*childregs = *regs;
|
||||
|
||||
childregs->ax = 0;
|
||||
if (user_mode(regs))
|
||||
childregs->sp = sp;
|
||||
else
|
||||
childregs->sp = (unsigned long)childregs;
|
||||
|
||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||
p->thread.usersp = me->thread.usersp;
|
||||
|
||||
set_tsk_thread_flag(p, TIF_FORK);
|
||||
|
||||
p->fpu_counter = 0;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
|
||||
savesegment(es, p->thread.es);
|
||||
savesegment(ds, p->thread.ds);
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(!regs)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
childregs->sp = (unsigned long)childregs;
|
||||
childregs->ss = __KERNEL_DS;
|
||||
childregs->bx = sp; /* function */
|
||||
childregs->bp = arg;
|
||||
childregs->orig_ax = -1;
|
||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
|
||||
return 0;
|
||||
}
|
||||
*childregs = *regs;
|
||||
|
||||
childregs->ax = 0;
|
||||
childregs->sp = sp;
|
||||
|
||||
err = -ENOMEM;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
@@ -232,10 +239,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
|
||||
regs->cs = _cs;
|
||||
regs->ss = _ss;
|
||||
regs->flags = X86_EFLAGS_IF;
|
||||
/*
|
||||
* Free the old FP and other extended state
|
||||
*/
|
||||
free_thread_xstate(current);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -21,6 +21,7 @@
|
||||
#include <linux/signal.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
|
||||
#define genregs32_get genregs_get
|
||||
#define genregs32_set genregs_set
|
||||
|
||||
#define user_i387_ia32_struct user_i387_struct
|
||||
#define user32_fxsr_struct user_fxsr_struct
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
long ret = 0;
|
||||
|
||||
rcu_user_exit();
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs)
|
||||
!test_thread_flag(TIF_SYSCALL_EMU);
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
rcu_user_enter();
|
||||
}
|
||||
|
@@ -225,7 +225,7 @@ static struct platform_device rtc_device = {
|
||||
static __init int add_rtc_cmos(void)
|
||||
{
|
||||
#ifdef CONFIG_PNP
|
||||
static const char *ids[] __initconst =
|
||||
static const char * const const ids[] __initconst =
|
||||
{ "PNP0b00", "PNP0b01", "PNP0b02", };
|
||||
struct pnp_dev *dev;
|
||||
struct pnp_id *id;
|
||||
|
@@ -68,6 +68,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/jiffies.h>
|
||||
|
||||
#include <video/edid.h>
|
||||
|
||||
@@ -957,7 +958,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
initmem_init();
|
||||
memblock_find_dma_reserve();
|
||||
|
||||
#ifdef CONFIG_KVM_CLOCK
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
kvmclock_init();
|
||||
#endif
|
||||
|
||||
@@ -1032,6 +1033,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
mcheck_init();
|
||||
|
||||
arch_init_ideal_nops();
|
||||
|
||||
register_refined_jiffies(CLOCK_TICK_RATE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
||||
regs->orig_ax = -1; /* disable syscall checks */
|
||||
|
||||
get_user_ex(buf, &sc->fpstate);
|
||||
err |= restore_i387_xstate(buf);
|
||||
|
||||
get_user_ex(*pax, &sc->ax);
|
||||
} get_user_catch(err);
|
||||
|
||||
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
|
||||
void __user **fpstate)
|
||||
{
|
||||
/* Default to using normal stack */
|
||||
unsigned long math_size = 0;
|
||||
unsigned long sp = regs->sp;
|
||||
unsigned long buf_fx = 0;
|
||||
int onsigstack = on_sig_stack(sp);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* redzone */
|
||||
sp -= 128;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
if (config_enabled(CONFIG_X86_64))
|
||||
sp -= 128;
|
||||
|
||||
if (!onsigstack) {
|
||||
/* This is the X/Open sanctioned signal stack switching. */
|
||||
if (ka->sa.sa_flags & SA_ONSTACK) {
|
||||
if (current->sas_ss_size)
|
||||
sp = current->sas_ss_sp + current->sas_ss_size;
|
||||
} else {
|
||||
#ifdef CONFIG_X86_32
|
||||
/* This is the legacy signal stack switching. */
|
||||
if ((regs->ss & 0xffff) != __USER_DS &&
|
||||
!(ka->sa.sa_flags & SA_RESTORER) &&
|
||||
ka->sa.sa_restorer)
|
||||
} else if (config_enabled(CONFIG_X86_32) &&
|
||||
(regs->ss & 0xffff) != __USER_DS &&
|
||||
!(ka->sa.sa_flags & SA_RESTORER) &&
|
||||
ka->sa.sa_restorer) {
|
||||
/* This is the legacy signal stack switching. */
|
||||
sp = (unsigned long) ka->sa.sa_restorer;
|
||||
#endif /* CONFIG_X86_32 */
|
||||
}
|
||||
}
|
||||
|
||||
if (used_math()) {
|
||||
sp -= sig_xstate_size;
|
||||
#ifdef CONFIG_X86_64
|
||||
sp = round_down(sp, 64);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
|
||||
&buf_fx, &math_size);
|
||||
*fpstate = (void __user *)sp;
|
||||
}
|
||||
|
||||
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
|
||||
if (onsigstack && !likely(on_sig_stack(sp)))
|
||||
return (void __user *)-1L;
|
||||
|
||||
/* save i387 state */
|
||||
if (used_math() && save_i387_xstate(*fpstate) < 0)
|
||||
/* save i387 and extended state */
|
||||
if (used_math() &&
|
||||
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
|
||||
return (void __user *)-1L;
|
||||
|
||||
return (void __user *)sp;
|
||||
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
put_user_ex(sig, &frame->sig);
|
||||
put_user_ex(&frame->info, &frame->pinfo);
|
||||
put_user_ex(&frame->uc, &frame->puc);
|
||||
err |= copy_siginfo_to_user(&frame->info, info);
|
||||
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
/* Set up to return from userspace. */
|
||||
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
|
||||
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
*/
|
||||
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
|
||||
} put_user_catch(err);
|
||||
|
||||
err |= copy_siginfo_to_user(&frame->info, info);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
/* Set up to return from userspace. If provided, use a stub
|
||||
already in userspace. */
|
||||
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
}
|
||||
} put_user_catch(err);
|
||||
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
void __user *restorer;
|
||||
int err = 0;
|
||||
void __user *fpstate = NULL;
|
||||
|
||||
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ka->sa.sa_flags & SA_SIGINFO) {
|
||||
if (copy_siginfo_to_user32(&frame->info, info))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER) {
|
||||
restorer = ka->sa.sa_restorer;
|
||||
} else {
|
||||
/* could use a vstub here */
|
||||
restorer = NULL;
|
||||
err |= -EFAULT;
|
||||
}
|
||||
put_user_ex(restorer, &frame->pretcode);
|
||||
} put_user_catch(err);
|
||||
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
/* Set up registers for signal handler */
|
||||
regs->sp = (unsigned long) frame;
|
||||
regs->ip = (unsigned long) ka->sa.sa_handler;
|
||||
|
||||
/* We use the x32 calling convention here... */
|
||||
regs->di = sig;
|
||||
regs->si = (unsigned long) &frame->info;
|
||||
regs->dx = (unsigned long) &frame->uc;
|
||||
|
||||
loadsegment(ds, __USER_DS);
|
||||
loadsegment(es, __USER_DS);
|
||||
|
||||
regs->cs = __USER_CS;
|
||||
regs->ss = __USER_DS;
|
||||
#endif /* CONFIG_X86_X32_ABI */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Atomically swap in the new signal mask, and wait for a signal.
|
||||
@@ -612,55 +682,22 @@ static int signr_convert(int sig)
|
||||
return sig;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
#define is_ia32 1
|
||||
#define ia32_setup_frame __setup_frame
|
||||
#define ia32_setup_rt_frame __setup_rt_frame
|
||||
|
||||
#else /* !CONFIG_X86_32 */
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define is_ia32 test_thread_flag(TIF_IA32)
|
||||
#else /* !CONFIG_IA32_EMULATION */
|
||||
#define is_ia32 0
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
#define is_x32 test_thread_flag(TIF_X32)
|
||||
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs);
|
||||
#else /* !CONFIG_X86_X32_ABI */
|
||||
#define is_x32 0
|
||||
#endif /* CONFIG_X86_X32_ABI */
|
||||
|
||||
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
sigset_t *set, struct pt_regs *regs);
|
||||
int ia32_setup_frame(int sig, struct k_sigaction *ka,
|
||||
sigset_t *set, struct pt_regs *regs);
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static int
|
||||
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
int usig = signr_convert(sig);
|
||||
sigset_t *set = sigmask_to_save();
|
||||
compat_sigset_t *cset = (compat_sigset_t *) set;
|
||||
|
||||
/* Set up the stack frame */
|
||||
if (is_ia32) {
|
||||
if (is_ia32_frame()) {
|
||||
if (ka->sa.sa_flags & SA_SIGINFO)
|
||||
return ia32_setup_rt_frame(usig, ka, info, set, regs);
|
||||
return ia32_setup_rt_frame(usig, ka, info, cset, regs);
|
||||
else
|
||||
return ia32_setup_frame(usig, ka, set, regs);
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
} else if (is_x32) {
|
||||
return x32_setup_rt_frame(usig, ka, info,
|
||||
(compat_sigset_t *)set, regs);
|
||||
#endif
|
||||
return ia32_setup_frame(usig, ka, cset, regs);
|
||||
} else if (is_x32_frame()) {
|
||||
return x32_setup_rt_frame(usig, ka, info, cset, regs);
|
||||
} else {
|
||||
return __setup_rt_frame(sig, ka, info, set, regs);
|
||||
}
|
||||
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs)
|
||||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
rcu_user_exit();
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
||||
@@ -801,9 +840,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
clear_thread_flag(TIF_IRET);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
rcu_user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
@@ -824,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
|
||||
siginfo_t *info, compat_sigset_t *set,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
void __user *restorer;
|
||||
int err = 0;
|
||||
void __user *fpstate = NULL;
|
||||
|
||||
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ka->sa.sa_flags & SA_SIGINFO) {
|
||||
if (copy_siginfo_to_user32(&frame->info, info))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
|
||||
put_user_ex(sas_ss_flags(regs->sp),
|
||||
&frame->uc.uc_stack.ss_flags);
|
||||
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
|
||||
regs, set->sig[0]);
|
||||
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER) {
|
||||
restorer = ka->sa.sa_restorer;
|
||||
} else {
|
||||
/* could use a vstub here */
|
||||
restorer = NULL;
|
||||
err |= -EFAULT;
|
||||
}
|
||||
put_user_ex(restorer, &frame->pretcode);
|
||||
} put_user_catch(err);
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
/* Set up registers for signal handler */
|
||||
regs->sp = (unsigned long) frame;
|
||||
regs->ip = (unsigned long) ka->sa.sa_handler;
|
||||
|
||||
/* We use the x32 calling convention here... */
|
||||
regs->di = sig;
|
||||
regs->si = (unsigned long) &frame->info;
|
||||
regs->dx = (unsigned long) &frame->uc;
|
||||
|
||||
loadsegment(ds, __USER_DS);
|
||||
loadsegment(es, __USER_DS);
|
||||
|
||||
regs->cs = __USER_CS;
|
||||
regs->ss = __USER_DS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
|
||||
{
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
|
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
unsigned long boot_error = 0;
|
||||
int timeout;
|
||||
|
||||
alternatives_smp_switch(1);
|
||||
/* Just in case we booted with a single CPU. */
|
||||
alternatives_enable_smp();
|
||||
|
||||
idle->thread.sp = (unsigned long) (((struct pt_regs *)
|
||||
(THREAD_SIZE + task_stack_page(idle))) - 1);
|
||||
@@ -1053,20 +1054,6 @@ out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void arch_disable_nonboot_cpus_begin(void)
|
||||
{
|
||||
/*
|
||||
* Avoid the smp alternatives switch during the disable_nonboot_cpus().
|
||||
* In the suspend path, we will be back in the SMP mode shortly anyways.
|
||||
*/
|
||||
skip_smp_alternatives = true;
|
||||
}
|
||||
|
||||
void arch_disable_nonboot_cpus_end(void)
|
||||
{
|
||||
skip_smp_alternatives = false;
|
||||
}
|
||||
|
||||
void arch_enable_nonboot_cpus_begin(void)
|
||||
{
|
||||
set_mtrr_aps_delayed_init();
|
||||
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
|
||||
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
|
||||
if (system_state == SYSTEM_RUNNING)
|
||||
pr_info("CPU %u is now offline\n", cpu);
|
||||
|
||||
if (1 == num_online_cpus())
|
||||
alternatives_smp_switch(0);
|
||||
return;
|
||||
}
|
||||
msleep(100);
|
||||
|
@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void set_task_blockstep(struct task_struct *task, bool on)
|
||||
{
|
||||
unsigned long debugctl;
|
||||
|
||||
/*
|
||||
* Ensure irq/preemption can't change debugctl in between.
|
||||
* Note also that both TIF_BLOCKSTEP and debugctl should
|
||||
* be changed atomically wrt preemption.
|
||||
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
|
||||
* wrong if task != current, SIGKILL can wakeup the stopped
|
||||
* tracee and set/clear can play with the running task, this
|
||||
* can confuse the next __switch_to_xtra().
|
||||
*/
|
||||
local_irq_disable();
|
||||
debugctl = get_debugctlmsr();
|
||||
if (on) {
|
||||
debugctl |= DEBUGCTLMSR_BTF;
|
||||
set_tsk_thread_flag(task, TIF_BLOCKSTEP);
|
||||
} else {
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
|
||||
}
|
||||
if (task == current)
|
||||
update_debugctlmsr(debugctl);
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable single or block step.
|
||||
*/
|
||||
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
|
||||
* So no one should try to use debugger block stepping in a program
|
||||
* that uses user-mode single stepping itself.
|
||||
*/
|
||||
if (enable_single_step(child) && block) {
|
||||
unsigned long debugctl = get_debugctlmsr();
|
||||
|
||||
debugctl |= DEBUGCTLMSR_BTF;
|
||||
update_debugctlmsr(debugctl);
|
||||
set_tsk_thread_flag(child, TIF_BLOCKSTEP);
|
||||
} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
|
||||
unsigned long debugctl = get_debugctlmsr();
|
||||
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
update_debugctlmsr(debugctl);
|
||||
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
|
||||
}
|
||||
if (enable_single_step(child) && block)
|
||||
set_task_blockstep(child, true);
|
||||
else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
|
||||
set_task_blockstep(child, false);
|
||||
}
|
||||
|
||||
void user_enable_single_step(struct task_struct *child)
|
||||
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
|
||||
/*
|
||||
* Make sure block stepping (BTF) is disabled.
|
||||
*/
|
||||
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
|
||||
unsigned long debugctl = get_debugctlmsr();
|
||||
|
||||
debugctl &= ~DEBUGCTLMSR_BTF;
|
||||
update_debugctlmsr(debugctl);
|
||||
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
|
||||
}
|
||||
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
|
||||
set_task_blockstep(child, false);
|
||||
|
||||
/* Always clear TIF_SINGLESTEP... */
|
||||
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
|
||||
|
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
* This file contains various random system calls that
|
||||
* have a non-standard calling sequence on the Linux/i386
|
||||
* platform.
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sem.h>
|
||||
#include <linux/msg.h>
|
||||
#include <linux/shm.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/ipc.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/unistd.h>
|
||||
|
||||
#include <asm/syscalls.h>
|
||||
|
||||
/*
|
||||
* Do a system call from kernel instead of calling sys_execve so we
|
||||
* end up with proper pt_regs.
|
||||
*/
|
||||
int kernel_execve(const char *filename,
|
||||
const char *const argv[],
|
||||
const char *const envp[])
|
||||
{
|
||||
long __res;
|
||||
asm volatile ("int $0x80"
|
||||
: "=a" (__res)
|
||||
: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
|
||||
return __res;
|
||||
}
|
@@ -55,6 +55,7 @@
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/rcu.h>
|
||||
|
||||
#include <asm/mach_traps.h>
|
||||
|
||||
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
dec_preempt_count();
|
||||
}
|
||||
|
||||
static int __kprobes
|
||||
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
|
||||
struct pt_regs *regs, long error_code)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
/*
|
||||
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
|
||||
* On nmi (interrupt 2), do_trap should not be called.
|
||||
*/
|
||||
if (trapnr < X86_TRAP_UD) {
|
||||
if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
||||
error_code, trapnr))
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if (!user_mode(regs)) {
|
||||
if (!fixup_exception(regs)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = trapnr;
|
||||
die(str, regs, error_code);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void __kprobes
|
||||
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
||||
long error_code, siginfo_t *info)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
/*
|
||||
* traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
|
||||
* On nmi (interrupt 2), do_trap should not be called.
|
||||
*/
|
||||
if (trapnr < X86_TRAP_UD)
|
||||
goto vm86_trap;
|
||||
goto trap_signal;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!user_mode(regs))
|
||||
goto kernel_trap;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
trap_signal:
|
||||
#endif
|
||||
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
|
||||
return;
|
||||
/*
|
||||
* We want error_code and trap_nr set for userspace faults and
|
||||
* kernelspace faults which result in die(), but not
|
||||
@@ -158,33 +174,20 @@ trap_signal:
|
||||
force_sig_info(signr, info, tsk);
|
||||
else
|
||||
force_sig(signr, tsk);
|
||||
return;
|
||||
|
||||
kernel_trap:
|
||||
if (!fixup_exception(regs)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = trapnr;
|
||||
die(str, regs, error_code);
|
||||
}
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
vm86_trap:
|
||||
if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
||||
error_code, trapnr))
|
||||
goto trap_signal;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DO_ERROR(trapnr, signr, str, name) \
|
||||
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
||||
{ \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
||||
== NOTIFY_STOP) \
|
||||
exception_enter(regs); \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, \
|
||||
trapnr, signr) == NOTIFY_STOP) { \
|
||||
exception_exit(regs); \
|
||||
return; \
|
||||
} \
|
||||
conditional_sti(regs); \
|
||||
do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
||||
exception_exit(regs); \
|
||||
}
|
||||
|
||||
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
||||
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
||||
info.si_errno = 0; \
|
||||
info.si_code = sicode; \
|
||||
info.si_addr = (void __user *)siaddr; \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
||||
== NOTIFY_STOP) \
|
||||
exception_enter(regs); \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, \
|
||||
trapnr, signr) == NOTIFY_STOP) { \
|
||||
exception_exit(regs); \
|
||||
return; \
|
||||
} \
|
||||
conditional_sti(regs); \
|
||||
do_trap(trapnr, signr, str, regs, error_code, &info); \
|
||||
exception_exit(regs); \
|
||||
}
|
||||
|
||||
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
|
||||
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
|
||||
/* Runs on IST stack */
|
||||
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
|
||||
X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
|
||||
return;
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
}
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
static const char str[] = "double fault";
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
exception_enter(regs);
|
||||
/* Return not checked because double check cannot be ignored */
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
|
||||
exception_enter(regs);
|
||||
conditional_sti(regs);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
goto gp_in_vm86;
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
}
|
||||
#endif
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs))
|
||||
goto gp_in_kernel;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
goto exit;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die("general protection fault", regs, error_code);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
gp_in_vm86:
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
return;
|
||||
#endif
|
||||
|
||||
gp_in_kernel:
|
||||
if (fixup_exception(regs))
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
|
||||
return;
|
||||
die("general protection fault", regs, error_code);
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
/* May run on IST stack. */
|
||||
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
|
||||
ftrace_int3_handler(regs))
|
||||
return;
|
||||
#endif
|
||||
exception_enter(regs);
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
|
||||
|
||||
if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
exception_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/* DR6 may or may not be cleared by the CPU */
|
||||
set_debugreg(0, 6);
|
||||
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
X86_TRAP_DB);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
return;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
return;
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
#ifdef CONFIG_X86_32
|
||||
ignore_fpu_irq = 1;
|
||||
#endif
|
||||
|
||||
exception_enter(regs);
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
@@ -613,11 +628,12 @@ void math_state_restore(void)
|
||||
}
|
||||
|
||||
__thread_fpu_begin(tsk);
|
||||
|
||||
/*
|
||||
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
||||
*/
|
||||
if (unlikely(restore_fpu_checking(tsk))) {
|
||||
__thread_fpu_end(tsk);
|
||||
drop_init_fpu(tsk);
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
}
|
||||
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
|
||||
dotraplinkage void __kprobes
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
BUG_ON(use_eager_fpu());
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
if (read_cr0() & X86_CR0_EM) {
|
||||
struct math_emu_info info = { };
|
||||
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
exception_exit(regs);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
#endif
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
siginfo_t info;
|
||||
|
||||
exception_enter(regs);
|
||||
local_irq_enable();
|
||||
|
||||
info.si_signo = SIGILL;
|
||||
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
info.si_code = ILL_BADSTK;
|
||||
info.si_addr = NULL;
|
||||
if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
|
||||
X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
|
||||
return;
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
}
|
||||
exception_exit(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -41,6 +41,9 @@
|
||||
/* Adjust the return address of a call insn */
|
||||
#define UPROBE_FIX_CALL 0x2
|
||||
|
||||
/* Instruction will modify TF, don't change it */
|
||||
#define UPROBE_FIX_SETF 0x4
|
||||
|
||||
#define UPROBE_FIX_RIP_AX 0x8000
|
||||
#define UPROBE_FIX_RIP_CX 0x4000
|
||||
|
||||
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
|
||||
insn_get_opcode(insn); /* should be a nop */
|
||||
|
||||
switch (OPCODE1(insn)) {
|
||||
case 0x9d:
|
||||
/* popf */
|
||||
auprobe->fixups |= UPROBE_FIX_SETF;
|
||||
break;
|
||||
case 0xc3: /* ret/lret */
|
||||
case 0xcb:
|
||||
case 0xc2:
|
||||
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
* Skip these instructions as per the currently known x86 ISA.
|
||||
* 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
|
||||
*/
|
||||
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
{
|
||||
bool ret = __skip_sstep(auprobe, regs);
|
||||
if (ret && (regs->flags & X86_EFLAGS_TF))
|
||||
send_sig(SIGTRAP, current, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct arch_uprobe_task *autask = &task->utask->autask;
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
|
||||
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
|
||||
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
|
||||
set_task_blockstep(task, false);
|
||||
}
|
||||
|
||||
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct arch_uprobe_task *autask = &task->utask->autask;
|
||||
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
/*
|
||||
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
|
||||
* SIGTRAP if we do not clear TF. We need to examine the opcode to
|
||||
* make it right.
|
||||
*/
|
||||
if (unlikely(trapped)) {
|
||||
if (!autask->saved_tf)
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
} else {
|
||||
if (autask->saved_tf)
|
||||
send_sig(SIGTRAP, task, 0);
|
||||
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
}
|
||||
|
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
|
||||
if ((trapno == 3) || (trapno == 1)) {
|
||||
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
|
||||
/* setting this flag forces the code in entry_32.S to
|
||||
call save_v86_state() and change the stack pointer
|
||||
to KVM86->regs32 */
|
||||
set_thread_flag(TIF_IRET);
|
||||
the path where we call save_v86_state() and change
|
||||
the stack pointer to KVM86->regs32 */
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
return 0;
|
||||
}
|
||||
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
|
||||
|
@@ -28,7 +28,7 @@
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <linux/getcpu.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
|
||||
vsyscall_gtod_data.sys_tz = sys_tz;
|
||||
}
|
||||
|
||||
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
|
||||
struct clocksource *clock, u32 mult)
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
struct timespec monotonic;
|
||||
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
|
||||
|
||||
write_seqcount_begin(&vsyscall_gtod_data.seq);
|
||||
write_seqcount_begin(&vdata->seq);
|
||||
|
||||
/* copy vsyscall data */
|
||||
vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
|
||||
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
|
||||
vsyscall_gtod_data.clock.mask = clock->mask;
|
||||
vsyscall_gtod_data.clock.mult = mult;
|
||||
vsyscall_gtod_data.clock.shift = clock->shift;
|
||||
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
|
||||
vdata->clock.cycle_last = tk->clock->cycle_last;
|
||||
vdata->clock.mask = tk->clock->mask;
|
||||
vdata->clock.mult = tk->mult;
|
||||
vdata->clock.shift = tk->shift;
|
||||
|
||||
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
|
||||
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
|
||||
vdata->wall_time_sec = tk->xtime_sec;
|
||||
vdata->wall_time_snsec = tk->xtime_nsec;
|
||||
|
||||
monotonic = timespec_add(*wall_time, *wtm);
|
||||
vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
|
||||
vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
|
||||
vdata->monotonic_time_sec = tk->xtime_sec
|
||||
+ tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->xtime_nsec
|
||||
+ (tk->wall_to_monotonic.tv_nsec
|
||||
<< tk->shift);
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
|
||||
vsyscall_gtod_data.monotonic_time_coarse =
|
||||
timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
|
||||
vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
|
||||
vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
|
||||
|
||||
write_seqcount_end(&vsyscall_gtod_data.seq);
|
||||
vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
|
||||
tk->wall_to_monotonic);
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
|
||||
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
|
||||
|
@@ -13,9 +13,13 @@
|
||||
#include <asm/ftrace.h>
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
/* mcount is defined in assembly */
|
||||
/* mcount and __fentry__ are defined in assembly */
|
||||
#ifdef CC_USING_FENTRY
|
||||
EXPORT_SYMBOL(__fentry__);
|
||||
#else
|
||||
EXPORT_SYMBOL(mcount);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(__get_user_1);
|
||||
EXPORT_SYMBOL(__get_user_2);
|
||||
|
@@ -10,9 +10,7 @@
|
||||
#include <linux/compat.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#include <asm/sigcontext32.h>
|
||||
#endif
|
||||
#include <asm/sigframe.h>
|
||||
#include <asm/xcr.h>
|
||||
|
||||
/*
|
||||
@@ -23,13 +21,9 @@ u64 pcntxt_mask;
|
||||
/*
|
||||
* Represents init state for the supported extended state.
|
||||
*/
|
||||
static struct xsave_struct *init_xstate_buf;
|
||||
|
||||
struct _fpx_sw_bytes fx_sw_reserved;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
struct _fpx_sw_bytes fx_sw_reserved_ia32;
|
||||
#endif
|
||||
struct xsave_struct *init_xstate_buf;
|
||||
|
||||
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
|
||||
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
|
||||
|
||||
/*
|
||||
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
|
||||
*/
|
||||
void __sanitize_i387_state(struct task_struct *tsk)
|
||||
{
|
||||
u64 xstate_bv;
|
||||
int feature_bit = 0x2;
|
||||
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
|
||||
int feature_bit = 0x2;
|
||||
u64 xstate_bv;
|
||||
|
||||
if (!fx)
|
||||
return;
|
||||
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
|
||||
* Check for the presence of extended state information in the
|
||||
* user fpstate pointer in the sigcontext.
|
||||
*/
|
||||
int check_for_xstate(struct i387_fxsave_struct __user *buf,
|
||||
void __user *fpstate,
|
||||
struct _fpx_sw_bytes *fx_sw_user)
|
||||
static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
|
||||
void __user *fpstate,
|
||||
struct _fpx_sw_bytes *fx_sw)
|
||||
{
|
||||
int min_xstate_size = sizeof(struct i387_fxsave_struct) +
|
||||
sizeof(struct xsave_hdr_struct);
|
||||
unsigned int magic2;
|
||||
int err;
|
||||
|
||||
err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* First Magic check failed.
|
||||
*/
|
||||
if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
|
||||
return -EINVAL;
|
||||
/* Check for the first magic field and other error scenarios. */
|
||||
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
|
||||
fx_sw->xstate_size < min_xstate_size ||
|
||||
fx_sw->xstate_size > xstate_size ||
|
||||
fx_sw->xstate_size > fx_sw->extended_size)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Check for error scenarios.
|
||||
*/
|
||||
if (fx_sw_user->xstate_size < min_xstate_size ||
|
||||
fx_sw_user->xstate_size > xstate_size ||
|
||||
fx_sw_user->xstate_size > fx_sw_user->extended_size)
|
||||
return -EINVAL;
|
||||
|
||||
err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
|
||||
fx_sw_user->extended_size -
|
||||
FP_XSTATE_MAGIC2_SIZE));
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* Check for the presence of second magic word at the end of memory
|
||||
* layout. This detects the case where the user just copied the legacy
|
||||
* fpstate layout with out copying the extended state information
|
||||
* in the memory layout.
|
||||
*/
|
||||
if (magic2 != FP_XSTATE_MAGIC2)
|
||||
return -EFAULT;
|
||||
if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
|
||||
|| magic2 != FP_XSTATE_MAGIC2)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Signal frame handlers.
|
||||
*/
|
||||
|
||||
int save_i387_xstate(void __user *buf)
|
||||
static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
int err = 0;
|
||||
if (use_fxsr()) {
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
struct _fpstate_ia32 __user *fp = buf;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
|
||||
return -EACCES;
|
||||
convert_from_fxsr(&env, tsk);
|
||||
|
||||
BUG_ON(sig_xstate_size < xstate_size);
|
||||
|
||||
if ((unsigned long)buf % 64)
|
||||
pr_err("%s: bad fpstate %p\n", __func__, buf);
|
||||
|
||||
if (!used_math())
|
||||
return 0;
|
||||
|
||||
if (user_has_fpu()) {
|
||||
if (use_xsave())
|
||||
err = xsave_user(buf);
|
||||
else
|
||||
err = fxsave_user(buf);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
user_fpu_end();
|
||||
if (__copy_to_user(buf, &env, sizeof(env)) ||
|
||||
__put_user(xsave->i387.swd, &fp->status) ||
|
||||
__put_user(X86_FXSR_MAGIC, &fp->magic))
|
||||
return -1;
|
||||
} else {
|
||||
sanitize_i387_state(tsk);
|
||||
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
|
||||
xstate_size))
|
||||
struct i387_fsave_struct __user *fp = buf;
|
||||
u32 swd;
|
||||
if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
|
||||
return -1;
|
||||
}
|
||||
|
||||
clear_used_math(); /* trigger finit */
|
||||
|
||||
if (use_xsave()) {
|
||||
struct _fpstate __user *fx = buf;
|
||||
struct _xstate __user *x = buf;
|
||||
u64 xstate_bv;
|
||||
|
||||
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
|
||||
err |= __put_user(FP_XSTATE_MAGIC2,
|
||||
(__u32 __user *) (buf + sig_xstate_size
|
||||
- FP_XSTATE_MAGIC2_SIZE));
|
||||
|
||||
/*
|
||||
* Read the xstate_bv which we copied (directly from the cpu or
|
||||
* from the state in task struct) to the user buffers and
|
||||
* set the FP/SSE bits.
|
||||
*/
|
||||
err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context. This will
|
||||
* enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware apps can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore the extended state if present. Otherwise, restore the FP/SSE
|
||||
* state.
|
||||
*/
|
||||
static int restore_user_xstate(void __user *buf)
|
||||
static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
|
||||
{
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
u64 mask;
|
||||
struct xsave_struct __user *x = buf;
|
||||
struct _fpx_sw_bytes *sw_bytes;
|
||||
u32 xstate_bv;
|
||||
int err;
|
||||
|
||||
if (((unsigned long)buf % 64) ||
|
||||
check_for_xstate(buf, buf, &fx_sw_user))
|
||||
goto fx_only;
|
||||
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
|
||||
sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
|
||||
err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
|
||||
|
||||
mask = fx_sw_user.xstate_bv;
|
||||
|
||||
/*
|
||||
* restore the state passed by the user.
|
||||
*/
|
||||
err = xrestore_user(buf, mask);
|
||||
if (err)
|
||||
if (!use_xsave())
|
||||
return err;
|
||||
|
||||
/*
|
||||
* init the state skipped by the user.
|
||||
*/
|
||||
mask = pcntxt_mask & ~mask;
|
||||
if (unlikely(mask))
|
||||
xrstor_state(init_xstate_buf, mask);
|
||||
err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
|
||||
|
||||
return 0;
|
||||
|
||||
fx_only:
|
||||
/*
|
||||
* couldn't find the extended state information in the
|
||||
* memory layout. Restore just the FP/SSE and init all
|
||||
* the other extended state.
|
||||
* Read the xstate_bv which we copied (directly from the cpu or
|
||||
* from the state in task struct) to the user buffers.
|
||||
*/
|
||||
xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
|
||||
return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
|
||||
err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
|
||||
|
||||
/*
|
||||
* For legacy compatible, we always set FP/SSE bits in the bit
|
||||
* vector while saving the state to the user context. This will
|
||||
* enable us capturing any changes(during sigreturn) to
|
||||
* the FP/SSE bits by the legacy applications which don't touch
|
||||
* xstate_bv in the xsave header.
|
||||
*
|
||||
* xsave aware apps can change the xstate_bv in the xsave
|
||||
* header as well as change any contents in the memory layout.
|
||||
* xrestore as part of sigreturn will capture all the changes.
|
||||
*/
|
||||
xstate_bv |= XSTATE_FPSSE;
|
||||
|
||||
err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int save_user_xstate(struct xsave_struct __user *buf)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (use_xsave())
|
||||
err = xsave_user(buf);
|
||||
else if (use_fxsr())
|
||||
err = fxsave_user((struct i387_fxsave_struct __user *) buf);
|
||||
else
|
||||
err = fsave_user((struct i387_fsave_struct __user *) buf);
|
||||
|
||||
if (unlikely(err) && __clear_user(buf, xstate_size))
|
||||
err = -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This restores directly out of user space. Exceptions are handled.
|
||||
* Save the fpu, extended register state to the user signal frame.
|
||||
*
|
||||
* 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
|
||||
* state is copied.
|
||||
* 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
|
||||
*
|
||||
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
|
||||
* buf != buf_fx for 32-bit frames with fxstate.
|
||||
*
|
||||
* If the fpu, extended register state is live, save the state directly
|
||||
* to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
|
||||
* copy the thread's fpu state to the user frame starting at 'buf_fx'.
|
||||
*
|
||||
* If this is a 32-bit frame with fxstate, put a fsave header before
|
||||
* the aligned state at 'buf_fx'.
|
||||
*
|
||||
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
|
||||
* indicating the absence/presence of the extended state to the user.
|
||||
*/
|
||||
int restore_i387_xstate(void __user *buf)
|
||||
int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
{
|
||||
struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave;
|
||||
struct task_struct *tsk = current;
|
||||
int err = 0;
|
||||
int ia32_fxstate = (buf != buf_fx);
|
||||
|
||||
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
|
||||
config_enabled(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, buf, size))
|
||||
return -EACCES;
|
||||
|
||||
if (!HAVE_HWFP)
|
||||
return fpregs_soft_get(current, NULL, 0,
|
||||
sizeof(struct user_i387_ia32_struct), NULL,
|
||||
(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
|
||||
|
||||
if (user_has_fpu()) {
|
||||
/* Save the live register state to the user directly. */
|
||||
if (save_user_xstate(buf_fx))
|
||||
return -1;
|
||||
/* Update the thread's fxstate to save the fsave header. */
|
||||
if (ia32_fxstate)
|
||||
fpu_fxsave(&tsk->thread.fpu);
|
||||
} else {
|
||||
sanitize_i387_state(tsk);
|
||||
if (__copy_to_user(buf_fx, xsave, xstate_size))
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Save the fsave header for the 32-bit frames. */
|
||||
if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
|
||||
return -1;
|
||||
|
||||
if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
|
||||
return -1;
|
||||
|
||||
drop_init_fpu(tsk); /* trigger finit */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sanitize_restored_xstate(struct task_struct *tsk,
|
||||
struct user_i387_ia32_struct *ia32_env,
|
||||
u64 xstate_bv, int fx_only)
|
||||
{
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
|
||||
|
||||
if (use_xsave()) {
|
||||
/* These bits must be zero. */
|
||||
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
|
||||
|
||||
/*
|
||||
* Init the state that is not present in the memory
|
||||
* layout and not enabled by the OS.
|
||||
*/
|
||||
if (fx_only)
|
||||
xsave_hdr->xstate_bv = XSTATE_FPSSE;
|
||||
else
|
||||
xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
|
||||
}
|
||||
|
||||
if (use_fxsr()) {
|
||||
/*
|
||||
* mscsr reserved bits must be masked to zero for security
|
||||
* reasons.
|
||||
*/
|
||||
xsave->i387.mxcsr &= mxcsr_feature_mask;
|
||||
|
||||
convert_to_fxsr(tsk, ia32_env);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore the extended state if present. Otherwise, restore the FP/SSE state.
|
||||
*/
|
||||
static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
|
||||
{
|
||||
if (use_xsave()) {
|
||||
if ((unsigned long)buf % 64 || fx_only) {
|
||||
u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
|
||||
xrstor_state(init_xstate_buf, init_bv);
|
||||
return fxrstor_user(buf);
|
||||
} else {
|
||||
u64 init_bv = pcntxt_mask & ~xbv;
|
||||
if (unlikely(init_bv))
|
||||
xrstor_state(init_xstate_buf, init_bv);
|
||||
return xrestore_user(buf, xbv);
|
||||
}
|
||||
} else if (use_fxsr()) {
|
||||
return fxrstor_user(buf);
|
||||
} else
|
||||
return frstor_user(buf);
|
||||
}
|
||||
|
||||
int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
{
|
||||
int ia32_fxstate = (buf != buf_fx);
|
||||
struct task_struct *tsk = current;
|
||||
int state_size = xstate_size;
|
||||
u64 xstate_bv = 0;
|
||||
int fx_only = 0;
|
||||
|
||||
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
|
||||
config_enabled(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!buf) {
|
||||
if (used_math())
|
||||
goto clear;
|
||||
drop_init_fpu(tsk);
|
||||
return 0;
|
||||
} else
|
||||
if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math()) {
|
||||
err = init_fpu(tsk);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
user_fpu_begin();
|
||||
if (use_xsave())
|
||||
err = restore_user_xstate(buf);
|
||||
else
|
||||
err = fxrstor_checking((__force struct i387_fxsave_struct *)
|
||||
buf);
|
||||
if (unlikely(err)) {
|
||||
if (!access_ok(VERIFY_READ, buf, size))
|
||||
return -EACCES;
|
||||
|
||||
if (!used_math() && init_fpu(tsk))
|
||||
return -1;
|
||||
|
||||
if (!HAVE_HWFP) {
|
||||
return fpregs_soft_set(current, NULL,
|
||||
0, sizeof(struct user_i387_ia32_struct),
|
||||
NULL, buf) != 0;
|
||||
}
|
||||
|
||||
if (use_xsave()) {
|
||||
struct _fpx_sw_bytes fx_sw_user;
|
||||
if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
|
||||
/*
|
||||
* Couldn't find the extended state information in the
|
||||
* memory layout. Restore just the FP/SSE and init all
|
||||
* the other extended state.
|
||||
*/
|
||||
state_size = sizeof(struct i387_fxsave_struct);
|
||||
fx_only = 1;
|
||||
} else {
|
||||
state_size = fx_sw_user.xstate_size;
|
||||
xstate_bv = fx_sw_user.xstate_bv;
|
||||
}
|
||||
}
|
||||
|
||||
if (ia32_fxstate) {
|
||||
/*
|
||||
* Encountered an error while doing the restore from the
|
||||
* user buffer, clear the fpu state.
|
||||
* For 32-bit frames with fxstate, copy the user state to the
|
||||
* thread's fpu state, reconstruct fxstate from the fsave
|
||||
* header. Sanitize the copied state etc.
|
||||
*/
|
||||
clear:
|
||||
clear_fpu(tsk);
|
||||
clear_used_math();
|
||||
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
|
||||
struct user_i387_ia32_struct env;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Drop the current fpu which clears used_math(). This ensures
|
||||
* that any context-switch during the copy of the new state,
|
||||
* avoids the intermediate state from getting restored/saved.
|
||||
* Thus avoiding the new restored state from getting corrupted.
|
||||
* We will be ready to restore/save the state only after
|
||||
* set_used_math() is again set.
|
||||
*/
|
||||
drop_fpu(tsk);
|
||||
|
||||
if (__copy_from_user(xsave, buf_fx, state_size) ||
|
||||
__copy_from_user(&env, buf, sizeof(env))) {
|
||||
err = -1;
|
||||
} else {
|
||||
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
|
||||
set_used_math();
|
||||
}
|
||||
|
||||
if (use_eager_fpu())
|
||||
math_state_restore();
|
||||
|
||||
return err;
|
||||
} else {
|
||||
/*
|
||||
* For 64-bit frames and 32-bit fsave frames, restore the user
|
||||
* state to the registers directly (with exceptions handled).
|
||||
*/
|
||||
user_fpu_begin();
|
||||
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
|
||||
drop_init_fpu(tsk);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prepare the SW reserved portion of the fxsave memory layout, indicating
|
||||
@@ -321,31 +428,22 @@ clear:
|
||||
*/
|
||||
static void prepare_fx_sw_frame(void)
|
||||
{
|
||||
int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
|
||||
FP_XSTATE_MAGIC2_SIZE;
|
||||
int fsave_header_size = sizeof(struct i387_fsave_struct);
|
||||
int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
|
||||
|
||||
sig_xstate_size = sizeof(struct _fpstate) + size_extended;
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
|
||||
#endif
|
||||
|
||||
memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
|
||||
if (config_enabled(CONFIG_X86_32))
|
||||
size += fsave_header_size;
|
||||
|
||||
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
|
||||
fx_sw_reserved.extended_size = sig_xstate_size;
|
||||
fx_sw_reserved.extended_size = size;
|
||||
fx_sw_reserved.xstate_bv = pcntxt_mask;
|
||||
fx_sw_reserved.xstate_size = xstate_size;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
|
||||
sizeof(struct _fpx_sw_bytes));
|
||||
fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned int sig_xstate_size = sizeof(struct _fpstate);
|
||||
#endif
|
||||
if (config_enabled(CONFIG_IA32_EMULATION)) {
|
||||
fx_sw_reserved_ia32 = fx_sw_reserved;
|
||||
fx_sw_reserved_ia32.extended_size += fsave_header_size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable the extended processor state save/restore feature
|
||||
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
|
||||
/*
|
||||
* setup the xstate image representing the init state
|
||||
*/
|
||||
static void __init setup_xstate_init(void)
|
||||
static void __init setup_init_fpu_buf(void)
|
||||
{
|
||||
setup_xstate_features();
|
||||
|
||||
/*
|
||||
* Setup init_xstate_buf to represent the init state of
|
||||
* all the features managed by the xsave
|
||||
*/
|
||||
init_xstate_buf = alloc_bootmem_align(xstate_size,
|
||||
__alignof__(struct xsave_struct));
|
||||
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
|
||||
fx_finit(&init_xstate_buf->i387);
|
||||
|
||||
if (!cpu_has_xsave)
|
||||
return;
|
||||
|
||||
setup_xstate_features();
|
||||
|
||||
clts();
|
||||
/*
|
||||
* Init all the features state with header_bv being 0x0
|
||||
*/
|
||||
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
|
||||
* of any feature which is not represented by all zero's.
|
||||
*/
|
||||
xsave_state(init_xstate_buf, -1);
|
||||
stts();
|
||||
}
|
||||
|
||||
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
|
||||
static int __init eager_fpu_setup(char *s)
|
||||
{
|
||||
if (!strcmp(s, "on"))
|
||||
eagerfpu = ENABLE;
|
||||
else if (!strcmp(s, "off"))
|
||||
eagerfpu = DISABLE;
|
||||
else if (!strcmp(s, "auto"))
|
||||
eagerfpu = AUTO;
|
||||
return 1;
|
||||
}
|
||||
__setup("eagerfpu=", eager_fpu_setup);
|
||||
|
||||
/*
|
||||
* Enable and initialize the xsave feature.
|
||||
*/
|
||||
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
|
||||
|
||||
update_regset_xstate_info(xstate_size, pcntxt_mask);
|
||||
prepare_fx_sw_frame();
|
||||
setup_init_fpu_buf();
|
||||
|
||||
setup_xstate_init();
|
||||
/* Auto enable eagerfpu for xsaveopt */
|
||||
if (cpu_has_xsaveopt && eagerfpu != DISABLE)
|
||||
eagerfpu = ENABLE;
|
||||
|
||||
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
|
||||
pcntxt_mask, xstate_size);
|
||||
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
|
||||
next_func = xstate_enable;
|
||||
this_func();
|
||||
}
|
||||
|
||||
static inline void __init eager_fpu_init_bp(void)
|
||||
{
|
||||
current->thread.fpu.state =
|
||||
alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
|
||||
if (!init_xstate_buf)
|
||||
setup_init_fpu_buf();
|
||||
}
|
||||
|
||||
void __cpuinit eager_fpu_init(void)
|
||||
{
|
||||
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
|
||||
|
||||
clear_used_math();
|
||||
current_thread_info()->status = 0;
|
||||
|
||||
if (eagerfpu == ENABLE)
|
||||
setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
|
||||
|
||||
if (!cpu_has_eager_fpu) {
|
||||
stts();
|
||||
return;
|
||||
}
|
||||
|
||||
if (boot_func) {
|
||||
boot_func();
|
||||
boot_func = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is same as math_state_restore(). But use_xsave() is
|
||||
* not yet patched to use math_state_restore().
|
||||
*/
|
||||
init_fpu(current);
|
||||
__thread_fpu_begin(current);
|
||||
if (cpu_has_xsave)
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
}
|
||||
|
Reference in New Issue
Block a user