Merge commit 'v3.7-rc1' into stable/for-linus-3.7

* commit 'v3.7-rc1': (10892 commits)
  Linux 3.7-rc1
  x86, boot: Explicitly include autoconf.h for hostprogs
  perf: Fix UAPI fallout
  ARM: config: make sure that platforms are ordered by option string
  ARM: config: sort select statements alphanumerically
  UAPI: (Scripted) Disintegrate include/linux/byteorder
  UAPI: (Scripted) Disintegrate include/linux
  UAPI: Unexport linux/blk_types.h
  UAPI: Unexport part of linux/ppp-comp.h
  perf: Handle new rbtree implementation
  procfs: don't need a PATH_MAX allocation to hold a string representation of an int
  vfs: embed struct filename inside of names_cache allocation if possible
  audit: make audit_inode take struct filename
  vfs: make path_openat take a struct filename pointer
  vfs: turn do_path_lookup into wrapper around struct filename variant
  audit: allow audit code to satisfy getname requests from its names_list
  vfs: define struct filename and have getname() return it
  btrfs: Fix compilation with user namespace support enabled
  userns: Fix posix_acl_file_xattr_userns gid conversion
  userns: Properly print bluetooth socket uids
  ...
This commit is contained in:
Konrad Rzeszutek Wilk
2012-10-19 15:19:19 -04:00
13183 changed files with 691970 additions and 373846 deletions

View File

@@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-y += syscall_$(BITS).o
obj-$(CONFIG_X86_64) += vsyscall_64.o
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
@@ -100,6 +99,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)

View File

@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
acpi_register_lapic(physid, ACPI_MADT_ENABLED);
/*
* If mp_register_lapic successfully generates a new logical cpu
* If acpi_register_lapic successfully generates a new logical cpu
* number, then the following will get us exactly what was mapped
*/
cpumask_andnot(new_map, cpu_present_mask, tmp_map);

View File

@@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void)
header->video_mode = saved_video_mode;
header->pmode_behavior = 0;
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
&header->pmode_efer_high))
header->pmode_efer_low = header->pmode_efer_high = 0;
if (!rdmsr_safe(MSR_EFER,
&header->pmode_efer_low,
&header->pmode_efer_high))
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
header->pmode_cr4 = read_cr4_safe();
header->pmode_behavior = 0;
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
header->pmode_cr4 = read_cr4();
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
&header->pmode_misc_en_low,
&header->pmode_misc_en_high))

View File

@@ -23,19 +23,6 @@
#define MAX_PATCH_LEN (255-1)
#ifdef CONFIG_HOTPLUG_CPU
static int smp_alt_once;
static int __init bootonly(char *str)
{
smp_alt_once = 1;
return 1;
}
__setup("smp-alt-boot", bootonly);
#else
#define smp_alt_once 1
#endif
static int __initdata_or_module debug_alternative;
static int __init debug_alt(char *str)
@@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
/* turn DS segment override prefix into lock prefix */
if (*ptr == 0x3e)
text_poke(ptr, ((unsigned char []){0xf0}), 1);
};
}
mutex_unlock(&text_mutex);
}
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
{
const s32 *poff;
if (noreplace_smp)
return;
mutex_lock(&text_mutex);
for (poff = start; poff < end; poff++) {
u8 *ptr = (u8 *)poff + *poff;
@@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
/* turn lock prefix into DS segment override prefix */
if (*ptr == 0xf0)
text_poke(ptr, ((unsigned char []){0x3E}), 1);
};
}
mutex_unlock(&text_mutex);
}
@@ -359,7 +343,7 @@ struct smp_alt_module {
};
static LIST_HEAD(smp_alt_modules);
static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
static bool uniproc_patched = false; /* protected by smp_alt */
void __init_or_module alternatives_smp_module_add(struct module *mod,
char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
{
struct smp_alt_module *smp;
if (noreplace_smp)
return;
mutex_lock(&smp_alt);
if (!uniproc_patched)
goto unlock;
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
text, text_end);
return;
}
if (num_possible_cpus() == 1)
/* Don't bother remembering, we'll never have to undo it. */
goto smp_unlock;
smp = kzalloc(sizeof(*smp), GFP_KERNEL);
if (NULL == smp)
return; /* we'll run the (safe but slow) SMP code then ... */
/* we'll run the (safe but slow) SMP code then ... */
goto unlock;
smp->mod = mod;
smp->name = name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(smp->locks, smp->locks_end,
smp->text, smp->text_end);
smp_unlock:
alternatives_smp_unlock(locks, locks_end, text, text_end);
unlock:
mutex_unlock(&smp_alt);
}
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
{
struct smp_alt_module *item;
if (smp_alt_once || noreplace_smp)
return;
mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
mutex_unlock(&smp_alt);
DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
return;
break;
}
mutex_unlock(&smp_alt);
}
bool skip_smp_alternatives;
void alternatives_smp_switch(int smp)
void alternatives_enable_smp(void)
{
struct smp_alt_module *mod;
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
pr_info("lockdep: fixing up alternatives\n");
#endif
if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
/* Why bother if there are no other CPUs? */
BUG_ON(num_possible_cpus() == 1);
mutex_lock(&smp_alt);
/*
* Avoid unnecessary switches because it forces JIT based VMs to
* throw away all cached translations, which can be quite costly.
*/
if (smp == smp_mode) {
/* nothing */
} else if (smp) {
if (uniproc_patched) {
pr_info("switching to SMP code\n");
BUG_ON(num_online_cpus() != 1);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
} else {
pr_info("switching to UP code\n");
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_unlock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
uniproc_patched = false;
}
smp_mode = smp;
mutex_unlock(&smp_alt);
}
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
* tables in case we know the number of CPUs will never ever
* change */
#ifdef CONFIG_HOTPLUG_CPU
if (num_possible_cpus() < 2)
smp_alt_once = 1;
#endif
#ifdef CONFIG_SMP
if (smp_alt_once) {
if (1 == num_possible_cpus()) {
pr_info("switching to UP code\n");
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
_text, _etext);
}
} else {
/* Patch to UP if other cpus not imminent. */
if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
uniproc_patched = true;
alternatives_smp_module_add(NULL, "core kernel",
__smp_locks, __smp_locks_end,
_text, _etext);
/* Only switch to UP mode if we don't immediately boot others */
if (num_present_cpus() == 1 || setup_max_cpus <= 1)
alternatives_smp_switch(0);
}
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
if (smp_alt_once)
if (!uniproc_patched || num_possible_cpus() == 1)
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
restart_nmi();
}

View File

@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
i++;
v1 >>= 1;
};
}
apic_printk(APIC_DEBUG, KERN_CONT "\n");

View File

@@ -30,7 +30,7 @@
static int numachip_system __read_mostly;
static struct apic apic_numachip __read_mostly;
static const struct apic apic_numachip __read_mostly;
static unsigned int get_apic_id(unsigned long x)
{
@@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
static struct apic apic_numachip __refconst = {
static const struct apic apic_numachip __refconst = {
.name = "NumaConnect system",
.probe = numachip_probe,

View File

@@ -69,4 +69,7 @@ void common(void) {
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
}

View File

@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif

View File

@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
}
#endif
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
{
if (!cpu_has_invlpg)
return;
tlb_flushall_shift = 5;
if (c->x86 <= 0x11)
tlb_flushall_shift = 4;
}
static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
{
u32 ebx, eax, ecx, edx;
u16 mask = 0xfff;
if (c->x86 < 0xf)
return;
if (c->extended_cpuid_level < 0x80000006)
return;
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
tlb_lli_4k[ENTRIES] = ebx & mask;
/*
* K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
* characteristics from the CPUID function 0x80000005 instead.
*/
if (c->x86 == 0xf) {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
mask = 0xff;
}
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!((eax >> 16) & mask)) {
u32 a, b, c, d;
cpuid(0x80000005, &a, &b, &c, &d);
tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
} else {
tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
}
/* a 4M entry uses two 2M entries */
tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
if (!(eax & mask)) {
/* Erratum 658 */
if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
tlb_lli_2m[ENTRIES] = 1024;
} else {
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
tlb_lli_2m[ENTRIES] = eax & 0xff;
}
} else
tlb_lli_2m[ENTRIES] = eax & mask;
tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
cpu_set_tlb_flushall_shift(c);
}
static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_size_cache = amd_size_cache,
#endif
.c_early_init = early_init_amd,
.c_detect_tlb = cpu_detect_tlb_amd,
.c_bsp_init = bsp_init_amd,
.c_init = init_amd,
.c_x86_vendor = X86_VENDOR_AMD,

View File

@@ -165,10 +165,15 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
#endif
check_config();
check_fpu();
check_hlt();
check_popad();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
/*
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
check_fpu();
}

View File

@@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
}
#endif
static int disable_smep __cpuinitdata;
static __init int setup_disable_smep(char *arg)
{
disable_smep = 1;
setup_clear_cpu_cap(X86_FEATURE_SMEP);
return 1;
}
__setup("nosmep", setup_disable_smep);
static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP)) {
if (unlikely(disable_smep)) {
setup_clear_cpu_cap(X86_FEATURE_SMEP);
clear_in_cr4(X86_CR4_SMEP);
} else
set_in_cr4(X86_CR4_SMEP);
}
if (cpu_has(c, X86_FEATURE_SMEP))
set_in_cr4(X86_CR4_SMEP);
}
static __init int setup_disable_smap(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMAP);
return 1;
}
__setup("nosmap", setup_disable_smap);
static __always_inline void setup_smap(struct cpuinfo_x86 *c)
{
unsigned long eflags;
/* This should have been cleared long ago */
raw_local_save_flags(eflags);
BUG_ON(eflags & X86_EFLAGS_AC);
if (cpu_has(c, X86_FEATURE_SMAP))
set_in_cr4(X86_CR4_SMAP);
}
/*
@@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
"tlb_flushall_shift is 0x%x\n",
"tlb_flushall_shift: %d\n",
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
@@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
c->cpu_index = 0;
filter_cpuid_features(c, false);
setup_smep(c);
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
}
@@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid;
}
setup_smep(c);
get_model_name(c); /* Default name */
detect_nopl(c);
@@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
/* Set up SMEP/SMAP */
setup_smep(c);
setup_smap(c);
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -942,8 +955,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
if (boot_cpu_data.cpuid_level >= 2)
cpu_detect_tlb(&boot_cpu_data);
cpu_detect_tlb(&boot_cpu_data);
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT "%s ", vendor);
if (c->x86_model_id[0])
printk(KERN_CONT "%s", c->x86_model_id);
printk(KERN_CONT "%s", strim(c->x86_model_id));
else
printk(KERN_CONT "%d86", c->x86);
printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
else
printk(KERN_CONT "\n");
printk(KERN_CONT ")\n");
print_cpu_msr(c);
}
@@ -1113,11 +1127,10 @@ void syscall_init(void)
/* Flags to clear on syscall */
wrmsrl(MSR_SYSCALL_MASK,
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
X86_EFLAGS_IOPL|X86_EFLAGS_AC);
}
unsigned long kernel_eflags;
/*
* Copies of the original ist values from the tss are only accessed during
* debugging, no special alignment required.
@@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs();
fpu_init();
xsave_init();
raw_local_save_flags(kernel_eflags);
if (is_uv_system())
uv_cpu_init();
@@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs();
fpu_init();
xsave_init();
}
#endif

View File

@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
int i, j, n;
unsigned int regs[4];
unsigned char *desc = (unsigned char *)regs;
if (c->cpuid_level < 2)
return;
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;

View File

@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
}
static cpumask_var_t mce_inject_cpumask;
static DEFINE_MUTEX(mce_inject_mutex);
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
put_online_cpus();
} else
#endif
{
preempt_disable();
raise_local();
preempt_enable();
}
}
/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
* so do it a jiffie or two later everywhere.
*/
schedule_timeout(2);
mutex_lock(&mce_inject_mutex);
raise_mce(&m);
mutex_unlock(&mce_inject_mutex);
return usize;
}

View File

@@ -28,6 +28,18 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
#endif
void mce_timer_kick(unsigned long interval);
#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
ssize_t apei_read_mce(struct mce *m, u64 *record_id);

View File

@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
int mce_cmci_disabled __read_mostly;
int mce_ignore_ce __read_mostly;
int mce_ser __read_mostly;
int mce_bios_cmci_threshold __read_mostly;
struct mce_bank *mce_banks __read_mostly;
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static unsigned long mce_adjust_timer_default(unsigned long interval)
{
return interval;
}
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
mce_adjust_timer_default;
static void mce_timer_fn(unsigned long data)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
mce_intel_cmci_poll();
}
/*
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
* polling interval, otherwise increase the polling interval.
*/
iv = __this_cpu_read(mce_next_interval);
if (mce_notify_irq())
if (mce_notify_irq()) {
iv = max(iv / 2, (unsigned long) HZ/100);
else
} else {
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
iv = mce_adjust_timer(iv);
}
__this_cpu_write(mce_next_interval, iv);
/* Might have become 0 after CMCI storm subsided */
if (iv) {
t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
}
t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
/*
* Ensure that the timer is firing in @interval from now.
*/
void mce_timer_kick(unsigned long interval)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
unsigned long when = jiffies + interval;
unsigned long iv = __this_cpu_read(mce_next_interval);
if (timer_pending(t)) {
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
}
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
mce_adjust_timer = mce_intel_adjust_timer;
break;
case X86_VENDOR_AMD:
mce_amd_feature_init(c);
@@ -1594,21 +1629,26 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
}
}
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
unsigned long iv = mce_adjust_timer(check_interval * HZ);
__this_cpu_write(mce_next_interval, iv);
if (mce_ignore_ce || !iv)
return;
t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
}
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
unsigned long iv = check_interval * HZ;
unsigned int cpu = smp_processor_id();
setup_timer(t, mce_timer_fn, smp_processor_id());
if (mce_ignore_ce)
return;
__this_cpu_write(mce_next_interval, iv);
if (!iv)
return;
t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
setup_timer(t, mce_timer_fn, cpu);
mce_start_timer(cpu, t);
}
/* Handle unconfigured int18 (should never happen) */
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
* check, or 0 to not wait
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
*/
static int __init mcheck_enable(char *str)
{
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
mce_ignore_ce = 1;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
mce_bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
mce_bios_cmci_threshold = 1;
else if (isdigit(str[0])) {
get_option(&str, &tolerant);
if (*str == ',') {
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
&mce_cmci_disabled
};
static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
&mce_bios_cmci_threshold
};
static struct device_attribute *mce_device_attrs[] = {
&dev_attr_tolerant.attr,
&dev_attr_check_interval.attr,
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
&dev_attr_dont_log_ce.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
&dev_attr_bios_cmci_threshold.attr,
NULL
};
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
unsigned int cpu = (unsigned long)hcpu;
struct timer_list *t = &per_cpu(mce_timer, cpu);
switch (action) {
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
mce_device_create(cpu);
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
mce_device_remove(cpu);
mce_intel_hcpu_update(cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
del_timer_sync(t);
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
del_timer_sync(t);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
if (!mce_ignore_ce && check_interval) {
t->expires = round_jiffies(jiffies +
per_cpu(mce_next_interval, cpu));
add_timer_on(t, cpu);
}
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
case CPU_POST_DEAD:
/* intentionally ignoring frozen here */
cmci_rediscover(cpu);
mce_start_timer(cpu, t);
break;
}
if (action == CPU_POST_DEAD) {
/* intentionally ignoring frozen here */
cmci_rediscover(cpu);
}
return NOTIFY_OK;
}

View File

@@ -15,6 +15,8 @@
#include <asm/msr.h>
#include <asm/mce.h>
#include "mce-internal.h"
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
*/
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
#define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ)
#define CMCI_STORM_INTERVAL (1 * HZ)
#define CMCI_STORM_THRESHOLD 15
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
enum {
CMCI_STORM_NONE,
CMCI_STORM_ACTIVE,
CMCI_STORM_SUBSIDED,
};
static atomic_t cmci_storm_on_cpus;
static int cmci_supported(int *banks)
{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}
void mce_intel_cmci_poll(void)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return;
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
}
void mce_intel_hcpu_update(unsigned long cpu)
{
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
atomic_dec(&cmci_storm_on_cpus);
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
}
unsigned long mce_intel_adjust_timer(unsigned long interval)
{
int r;
if (interval < CMCI_POLL_INTERVAL)
return interval;
switch (__this_cpu_read(cmci_storm_state)) {
case CMCI_STORM_ACTIVE:
/*
* We switch back to interrupt mode once the poll timer has
* silenced itself. That means no events recorded and the
* timer interval is back to our poll interval.
*/
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
r = atomic_sub_return(1, &cmci_storm_on_cpus);
if (r == 0)
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
/* FALLTHROUGH */
case CMCI_STORM_SUBSIDED:
/*
* We wait for all cpus to go back to SUBSIDED
* state. When that happens we switch back to
* interrupt mode.
*/
if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
cmci_reenable();
cmci_recheck();
}
return CMCI_POLL_INTERVAL;
default:
/*
* We have shiny weather. Let the poll do whatever it
* thinks.
*/
return interval;
}
}
static bool cmci_storm_detect(void)
{
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
unsigned long ts = __this_cpu_read(cmci_time_stamp);
unsigned long now = jiffies;
int r;
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
return true;
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
cnt++;
} else {
cnt = 1;
__this_cpu_write(cmci_time_stamp, now);
}
__this_cpu_write(cmci_storm_cnt, cnt);
if (cnt <= CMCI_STORM_THRESHOLD)
return false;
cmci_clear();
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL);
if (r == 1)
pr_notice("CMCI storm detected: switching to poll mode\n");
return true;
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
*/
static void intel_threshold_interrupt(void)
{
if (cmci_storm_detect())
return;
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
static void cmci_discover(int banks)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
int hdr = 0;
int i;
int bios_wrong_thresh = 0;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
int bios_zero_thresh = 0;
if (test_bit(i, owned))
continue;
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
/* Already owned by someone else? */
if (val & MCI_CTL2_CMCI_EN) {
if (test_and_clear_bit(i, owned) && !boot)
print_update("SHD", &hdr, i);
clear_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
if (!mce_bios_cmci_threshold) {
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= CMCI_THRESHOLD;
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
/*
* If bios_cmci_threshold boot option was specified
* but the threshold is zero, we'll try to initialize
* it to 1.
*/
bios_zero_thresh = 1;
val |= CMCI_THRESHOLD;
}
val |= MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & MCI_CTL2_CMCI_EN) {
if (!test_and_set_bit(i, owned) && !boot)
print_update("CMCI", &hdr, i);
set_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
/*
* We are able to set thresholds for some banks that
* had a threshold of 0. This means the BIOS has not
* set the thresholds properly or does not work with
* this boot option. Note down now and report later.
*/
if (mce_bios_cmci_threshold && bios_zero_thresh &&
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
bios_wrong_thresh = 1;
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
pr_info_once(
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
}
}
/*
@@ -156,7 +278,7 @@ void cmci_clear(void)
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
cmci_discover(banks);
}
set_cpus_allowed_ptr(current, old);
@@ -200,7 +322,7 @@ void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
cmci_discover(banks);
}
static void intel_init_cmci(void)
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
return;
mce_threshold_vector = intel_threshold_interrupt;
cmci_discover(banks, 1);
cmci_discover(banks);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another

View File

@@ -8,7 +8,10 @@
open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
print OUT "#include <asm/cpufeature.h>\n\n";
print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
print OUT "#include <asm/cpufeature.h>\n";
print OUT "#endif\n";
print OUT "\n";
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
%features = ();

View File

@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
extern struct event_constraint intel_snb_pebs_event_constraints[];
extern struct event_constraint intel_ivb_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
@@ -624,6 +626,8 @@ int p4_pmu_init(void);
int p6_pmu_init(void);
int knc_pmu_init(void);
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)

View File

@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
};
struct perf_ibs {
struct pmu pmu;
unsigned int msr;
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
struct cpu_perf_ibs __percpu *pcpu;
u64 (*get_count)(u64 config);
struct pmu pmu;
unsigned int msr;
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
struct attribute_group format_group;
const struct attribute_group *attr_groups[2];
u64 (*get_count)(u64 config);
};
struct perf_ibs_data {
@@ -209,6 +214,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}
static const struct perf_event_attr ibs_notsupp = {
.exclude_user = 1,
.exclude_kernel = 1,
.exclude_hv = 1,
.exclude_idle = 1,
.exclude_host = 1,
.exclude_guest = 1,
};
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -229,6 +243,9 @@ static int perf_ibs_init(struct perf_event *event)
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
return -EINVAL;
if (config & ~perf_ibs->config_mask)
return -EINVAL;
@@ -434,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
static struct attribute *ibs_fetch_format_attrs[] = {
&format_attr_rand_en.attr,
NULL,
};
static struct attribute *ibs_op_format_attrs[] = {
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
NULL,
};
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
@@ -453,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
.format_attrs = ibs_fetch_format_attrs,
.get_count = get_ibs_fetch_count,
};
@@ -476,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
.format_attrs = ibs_op_format_attrs,
.get_count = get_ibs_op_count,
};
@@ -585,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
perf_ibs->pcpu = pcpu;
/* register attributes */
if (perf_ibs->format_attrs[0]) {
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
perf_ibs->format_group.name = "format";
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
}
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
if (ret) {
perf_ibs->pcpu = NULL;
@@ -596,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
static __init int perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT)
if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr;
}
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);

View File

@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
switch (boot_cpu_data.x86) {
case 0x6:
return p6_pmu_init();
case 0xb:
return knc_pmu_init();
case 0xf:
return p4_pmu_init();
}
@@ -2008,6 +2010,7 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
case 54: /* Cedariew */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2047,7 +2050,6 @@ __init int intel_pmu_init(void)
case 42: /* SandyBridge */
case 45: /* SandyBridge, "Romely-EP" */
x86_add_quirk(intel_sandybridge_quirk);
case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2072,6 +2074,29 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
pr_cont("IvyBridge events, ");
break;
default:
switch (x86_pmu.version) {

View File

@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
EVENT_CONSTRAINT_END
};
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;

View File

@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
* to have an operational LBR which can freeze
* on PMU interrupt
*/
if (boot_cpu_data.x86_mask < 10) {
if (boot_cpu_data.x86_model == 28
&& boot_cpu_data.x86_mask < 10) {
pr_cont("LBR disabled due to erratum");
return;
}

View File

@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
static struct uncore_event_desc snb_uncore_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
{ /* end: all zeroes */ },
};
static struct attribute *snb_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
.constraints = snb_uncore_cbox_constraints,
.ops = &snb_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
.event_descs = snb_uncore_events,
};
static struct intel_uncore_type *snb_msr_uncores[] = {
@@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
static struct intel_uncore_box *
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
static struct intel_uncore_box *box;
struct intel_uncore_box *box;
box = *per_cpu_ptr(pmu->box, cpu);
if (box)
@@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
buf[n++] = '\n';
buf[n] = '\0';
return n;
}
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
static struct attribute *uncore_pmu_attrs[] = {
&dev_attr_cpumask.attr,
NULL,
};
static struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
{
int ret;
@@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
free_percpu(type->pmus[i].box);
kfree(type->pmus);
type->pmus = NULL;
kfree(type->attr_groups[1]);
type->attr_groups[1] = NULL;
kfree(type->events_group);
type->events_group = NULL;
}
static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
for (j = 0; j < i; j++)
attrs[j] = &type->event_descs[j].attr.attr;
type->attr_groups[1] = events_group;
type->events_group = events_group;
}
type->pmu_group = &uncore_pmu_attr_group;
type->pmus = pmus;
return 0;
fail:

View File

@@ -369,10 +369,12 @@ struct intel_uncore_type {
struct intel_uncore_pmu *pmus;
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
const struct attribute_group *attr_groups[3];
const struct attribute_group *attr_groups[4];
};
#define format_group attr_groups[0]
#define pmu_group attr_groups[0]
#define format_group attr_groups[1]
#define events_group attr_groups[2]
struct intel_uncore_ops {
void (*init_box)(struct intel_uncore_box *);

View File

@@ -0,0 +1,248 @@
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
#include <linux/perf_event.h>
#include <linux/types.h>
#include "perf_event.h"
static const u64 knc_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
};
static __initconst u64 knc_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
/* On Xeon Phi event "0" is a valid DATA_READ */
/* (L1 Data Cache Reads) Instruction. */
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
/* bit will always be set in x86_pmu_hw_config(). */
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
/* DATA_READ */
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
/* DATA_READ */
/* see note on L1 OP_READ */
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
static u64 knc_pmu_event_map(int hw_event)
{
return knc_perfmon_event_map[hw_event];
}
static struct event_constraint knc_event_constraints[] =
{
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
EVENT_CONSTRAINT_END
};
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
#define KNC_ENABLE_COUNTER0 0x00000001
#define KNC_ENABLE_COUNTER1 0x00000002
static void knc_pmu_disable_all(void)
{
u64 val;
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
}
static void knc_pmu_enable_all(int added)
{
u64 val;
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
}
static inline void
knc_pmu_disable_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
if (cpuc->enabled)
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
static void knc_pmu_enable_event(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
static struct attribute *intel_knc_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};
static __initconst struct x86_pmu knc_pmu = {
.name = "knc",
.handle_irq = x86_pmu_handle_irq,
.disable_all = knc_pmu_disable_all,
.enable_all = knc_pmu_enable_all,
.enable = knc_pmu_enable_event,
.disable = knc_pmu_disable_event,
.hw_config = x86_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_KNC_EVNTSEL0,
.perfctr = MSR_KNC_PERFCTR0,
.event_map = knc_pmu_event_map,
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
/* in theory 40 bits, early silicon is buggy though */
.cntval_bits = 32,
.cntval_mask = (1ULL << 32) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = knc_event_constraints,
.format_attrs = intel_knc_formats_attr,
};
__init int knc_pmu_init(void)
{
x86_pmu = knc_pmu;
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
return 0;
}

View File

@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_PERFCTR0;
case 11:
return msr - MSR_KNC_PERFCTR0;
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_EVNTSEL0;
case 11:
return msr - MSR_KNC_EVNTSEL0;
case 15:
return msr - MSR_P4_BSU_ESCR0;
}

View File

@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
static void *c_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0) /* just in case, cpu 0 is not the first */
*pos = cpumask_first(cpu_online_mask);
else
*pos = cpumask_next(*pos - 1, cpu_online_mask);
*pos = cpumask_next(*pos - 1, cpu_online_mask);
if ((*pos) < nr_cpu_ids)
return &cpu_data(*pos);
return NULL;

View File

@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
goto out_chrdev;
}
cpuid_class->devnode = cpuid_devnode;
get_online_cpus();
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
put_online_cpus();
err = 0;
goto out;
@@ -214,6 +216,7 @@ out_class:
for_each_online_cpu(i) {
cpuid_device_destroy(i);
}
put_online_cpus();
class_destroy(cpuid_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
{
int cpu = 0;
get_online_cpus();
for_each_online_cpu(cpu)
cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
put_online_cpus();
}
module_init(cpuid_init);

View File

@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
.xlate = ioapic_xlate,
};
static void dt_add_ioapic_domain(unsigned int ioapic_num,
struct device_node *np)
{
struct irq_domain *id;
struct mp_ioapic_gsi *gsi_cfg;
int ret;
int num;
gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
(void *)ioapic_num);
BUG_ON(!id);
if (gsi_cfg->gsi_base == 0) {
/*
* The first NR_IRQS_LEGACY irq descs are allocated in
* early_irq_init() and need just a mapping. The
* remaining irqs need both. All of them are preallocated
* and assigned so we can keep the 1:1 mapping which the ioapic
* is having.
*/
ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
if (ret)
pr_err("Error mapping legacy IRQs: %d\n", ret);
if (num > NR_IRQS_LEGACY) {
ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
if (ret)
pr_err("Error creating mapping for the "
"remaining IRQs: %d\n", ret);
}
irq_set_default_host(id);
} else {
ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
if (ret)
pr_err("Error creating IRQ mapping: %d\n", ret);
}
}
static void __init ioapic_add_ofnode(struct device_node *np)
{
struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
for (i = 0; i < nr_ioapics; i++) {
if (r.start == mpc_ioapic_addr(i)) {
struct irq_domain *id;
struct mp_ioapic_gsi *gsi_cfg;
gsi_cfg = mp_ioapic_gsi_routing(i);
id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
&ioapic_irq_domain_ops,
(void*)i);
BUG_ON(!id);
dt_add_ioapic_domain(i, np);
return;
}
}

View File

@@ -57,6 +57,7 @@
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -298,6 +299,21 @@ ENTRY(ret_from_fork)
CFI_ENDPROC
END(ret_from_fork)
ENTRY(ret_from_kernel_thread)
CFI_STARTPROC
pushl_cfi %eax
call schedule_tail
GET_THREAD_INFO(%ebp)
popl_cfi %eax
pushl_cfi $0x0202 # Reset kernel eflags
popfl_cfi
movl PT_EBP(%esp),%eax
call *PT_EBX(%esp)
movl $0,PT_EAX(%esp)
jmp syscall_exit
CFI_ENDPROC
ENDPROC(ret_from_kernel_thread)
/*
* Interrupt exit functions should be protected against kprobes
*/
@@ -322,8 +338,7 @@ ret_from_intr:
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
#else
/*
* We can be coming here from a syscall done in the kernel space,
* e.g. a failed kernel_execve().
* We can be coming here from child spawned by kernel_thread().
*/
movl PT_CS(%esp), %eax
andl $SEGMENT_RPL_MASK, %eax
@@ -407,7 +422,9 @@ sysenter_past_esp:
*/
cmpl $__PAGE_OFFSET-3,%ebp
jae syscall_fault
ASM_STAC
1: movl (%ebp),%ebp
ASM_CLAC
movl %ebp,PT_EBP(%esp)
_ASM_EXTABLE(1b,syscall_fault)
@@ -488,6 +505,7 @@ ENDPROC(ia32_sysenter_target)
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
ASM_CLAC
pushl_cfi %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
@@ -612,22 +630,7 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
movb PT_CS(%esp), %bl
andb $SEGMENT_RPL_MASK, %bl
cmpb $USER_RPL, %bl
jb resume_kernel
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace
ALIGN
work_notifysig_v86:
pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl_cfi %ecx
movl %eax, %esp
1:
#else
movl %esp, %eax
#endif
@@ -640,6 +643,16 @@ work_notifysig_v86:
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace
#ifdef CONFIG_VM86
ALIGN
work_notifysig_v86:
pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl_cfi %ecx
movl %eax, %esp
jmp 1b
#endif
END(work_pending)
# perform syscall exit tracing
@@ -670,6 +683,7 @@ END(syscall_exit_work)
RING0_INT_FRAME # can't unwind into user space anyway
syscall_fault:
ASM_CLAC
GET_THREAD_INFO(%ebp)
movl $-EFAULT,PT_EAX(%esp)
jmp resume_userspace
@@ -727,7 +741,6 @@ ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
PTREGSCALL3(execve)
PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
@@ -825,6 +838,7 @@ END(interrupt)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
ASM_CLAC
addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
@@ -841,6 +855,7 @@ ENDPROC(common_interrupt)
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
ASM_CLAC; \
pushl_cfi $~(nr); \
SAVE_ALL; \
TRACE_IRQS_OFF \
@@ -857,6 +872,7 @@ ENDPROC(name)
ENTRY(coprocessor_error)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_coprocessor_error
jmp error_code
@@ -865,6 +881,7 @@ END(coprocessor_error)
ENTRY(simd_coprocessor_error)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
@@ -886,6 +903,7 @@ END(simd_coprocessor_error)
ENTRY(device_not_available)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $-1 # mark this as an int
pushl_cfi $do_device_not_available
jmp error_code
@@ -906,6 +924,7 @@ END(native_irq_enable_sysexit)
ENTRY(overflow)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_overflow
jmp error_code
@@ -914,6 +933,7 @@ END(overflow)
ENTRY(bounds)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_bounds
jmp error_code
@@ -922,6 +942,7 @@ END(bounds)
ENTRY(invalid_op)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_invalid_op
jmp error_code
@@ -930,6 +951,7 @@ END(invalid_op)
ENTRY(coprocessor_segment_overrun)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_coprocessor_segment_overrun
jmp error_code
@@ -938,6 +960,7 @@ END(coprocessor_segment_overrun)
ENTRY(invalid_TSS)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_invalid_TSS
jmp error_code
CFI_ENDPROC
@@ -945,6 +968,7 @@ END(invalid_TSS)
ENTRY(segment_not_present)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_segment_not_present
jmp error_code
CFI_ENDPROC
@@ -952,6 +976,7 @@ END(segment_not_present)
ENTRY(stack_segment)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_stack_segment
jmp error_code
CFI_ENDPROC
@@ -959,6 +984,7 @@ END(stack_segment)
ENTRY(alignment_check)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_alignment_check
jmp error_code
CFI_ENDPROC
@@ -966,6 +992,7 @@ END(alignment_check)
ENTRY(divide_error)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0 # no error code
pushl_cfi $do_divide_error
jmp error_code
@@ -975,6 +1002,7 @@ END(divide_error)
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi machine_check_vector
jmp error_code
@@ -984,6 +1012,7 @@ END(machine_check)
ENTRY(spurious_interrupt_bug)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $0
pushl_cfi $do_spurious_interrupt_bug
jmp error_code
@@ -994,16 +1023,6 @@ END(spurious_interrupt_bug)
*/
.popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
CFI_STARTPROC
movl %edi,%eax
call *%esi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
#ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
entrypoint expects, so fix it up before using the normal path. */
@@ -1111,17 +1130,21 @@ ENTRY(ftrace_caller)
pushl %eax
pushl %ecx
pushl %edx
movl 0xc(%esp), %eax
pushl $0 /* Pass NULL as regs pointer */
movl 4*4(%esp), %eax
movl 0x4(%ebp), %edx
leal function_trace_op, %ecx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
ftrace_call:
call ftrace_stub
addl $4,%esp /* skip NULL pointer */
popl %edx
popl %ecx
popl %eax
ftrace_ret:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
@@ -1133,6 +1156,71 @@ ftrace_stub:
ret
END(ftrace_caller)
ENTRY(ftrace_regs_caller)
pushf /* push flags before compare (in cs location) */
cmpl $0, function_trace_stop
jne ftrace_restore_flags
/*
* i386 does not save SS and ESP when coming from kernel.
* Instead, to get sp, &regs->sp is used (see ptrace.h).
* Unfortunately, that means eflags must be at the same location
* as the current return ip is. We move the return ip into the
* ip location, and move flags into the return ip location.
*/
pushl 4(%esp) /* save return ip into ip slot */
pushl $0 /* Load 0 into orig_ax */
pushl %gs
pushl %fs
pushl %es
pushl %ds
pushl %eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
pushl %ecx
pushl %ebx
movl 13*4(%esp), %eax /* Get the saved flags */
movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
/* clobbering return ip */
movl $__KERNEL_CS,13*4(%esp)
movl 12*4(%esp), %eax /* Load ip (1st parameter) */
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
pushl %esp /* Save pt_regs as 4th parameter */
GLOBAL(ftrace_regs_call)
call ftrace_stub
addl $4, %esp /* Skip pt_regs */
movl 14*4(%esp), %eax /* Move flags back into cs */
movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
movl 12*4(%esp), %eax /* Get return ip from regs->ip */
movl %eax, 14*4(%esp) /* Put return ip back for ret */
popl %ebx
popl %ecx
popl %edx
popl %esi
popl %edi
popl %ebp
popl %eax
popl %ds
popl %es
popl %fs
popl %gs
addl $8, %esp /* Skip orig_ax and ip */
popf /* Pop flags at end (no addl to corrupt flags) */
jmp ftrace_ret
ftrace_restore_flags:
popf
jmp ftrace_stub
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
@@ -1173,9 +1261,6 @@ END(mcount)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
pushl %eax
pushl %ecx
pushl %edx
@@ -1209,6 +1294,7 @@ return_to_handler:
ENTRY(page_fault)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_page_fault
ALIGN
error_code:
@@ -1281,6 +1367,7 @@ END(page_fault)
ENTRY(debug)
RING0_INT_FRAME
ASM_CLAC
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
@@ -1305,6 +1392,7 @@ END(debug)
*/
ENTRY(nmi)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
@@ -1375,6 +1463,7 @@ END(nmi)
ENTRY(int3)
RING0_INT_FRAME
ASM_CLAC
pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
@@ -1395,6 +1484,7 @@ END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
ASM_CLAC
pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC

View File

@@ -56,6 +56,8 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
#include <asm/rcu.h>
#include <asm/smap.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -68,25 +70,51 @@
.section .entry.text, "ax"
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CC_USING_FENTRY
# define function_hook __fentry__
#else
# define function_hook mcount
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
ENTRY(function_hook)
retq
END(mcount)
END(function_hook)
/* skip is set if stack has been adjusted */
.macro ftrace_caller_setup skip=0
MCOUNT_SAVE_FRAME \skip
/* Load the ftrace_ops into the 3rd parameter */
leaq function_trace_op, %rdx
/* Load ip into the first parameter */
movq RIP(%rsp), %rdi
subq $MCOUNT_INSN_SIZE, %rdi
/* Load the parent_ip into the second parameter */
#ifdef CC_USING_FENTRY
movq SS+16(%rsp), %rsi
#else
movq 8(%rbp), %rsi
#endif
.endm
ENTRY(ftrace_caller)
/* Check if tracing was disabled (quick check) */
cmpl $0, function_trace_stop
jne ftrace_stub
MCOUNT_SAVE_FRAME
movq 0x38(%rsp), %rdi
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
ftrace_caller_setup
/* regs go into 4th parameter (but make it NULL) */
movq $0, %rcx
GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
ftrace_return:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -97,8 +125,78 @@ GLOBAL(ftrace_stub)
retq
END(ftrace_caller)
ENTRY(ftrace_regs_caller)
/* Save the current flags before compare (in SS location)*/
pushfq
/* Check if tracing was disabled (quick check) */
cmpl $0, function_trace_stop
jne ftrace_restore_flags
/* skip=8 to skip flags saved in SS */
ftrace_caller_setup 8
/* Save the rest of pt_regs */
movq %r15, R15(%rsp)
movq %r14, R14(%rsp)
movq %r13, R13(%rsp)
movq %r12, R12(%rsp)
movq %r11, R11(%rsp)
movq %r10, R10(%rsp)
movq %rbp, RBP(%rsp)
movq %rbx, RBX(%rsp)
/* Copy saved flags */
movq SS(%rsp), %rcx
movq %rcx, EFLAGS(%rsp)
/* Kernel segments */
movq $__KERNEL_DS, %rcx
movq %rcx, SS(%rsp)
movq $__KERNEL_CS, %rcx
movq %rcx, CS(%rsp)
/* Stack - skipping return address */
leaq SS+16(%rsp), %rcx
movq %rcx, RSP(%rsp)
/* regs go into 4th parameter */
leaq (%rsp), %rcx
GLOBAL(ftrace_regs_call)
call ftrace_stub
/* Copy flags back to SS, to restore them */
movq EFLAGS(%rsp), %rax
movq %rax, SS(%rsp)
/* Handlers can change the RIP */
movq RIP(%rsp), %rax
movq %rax, SS+8(%rsp)
/* restore the rest of pt_regs */
movq R15(%rsp), %r15
movq R14(%rsp), %r14
movq R13(%rsp), %r13
movq R12(%rsp), %r12
movq R10(%rsp), %r10
movq RBP(%rsp), %rbp
movq RBX(%rsp), %rbx
/* skip=8 to skip flags saved in SS */
MCOUNT_RESTORE_FRAME 8
/* Restore flags */
popfq
jmp ftrace_return
ftrace_restore_flags:
popfq
jmp ftrace_stub
END(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub
@@ -119,8 +217,12 @@ GLOBAL(ftrace_stub)
trace:
MCOUNT_SAVE_FRAME
movq 0x38(%rsp), %rdi
movq RIP(%rsp), %rdi
#ifdef CC_USING_FENTRY
movq SS+16(%rsp), %rsi
#else
movq 8(%rbp), %rsi
#endif
subq $MCOUNT_INSN_SIZE, %rdi
call *ftrace_trace_function
@@ -128,20 +230,22 @@ trace:
MCOUNT_RESTORE_FRAME
jmp ftrace_stub
END(mcount)
END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
cmpl $0, function_trace_stop
jne ftrace_stub
MCOUNT_SAVE_FRAME
#ifdef CC_USING_FENTRY
leaq SS+16(%rsp), %rdi
movq $0, %rdx /* No framepointers needed */
#else
leaq 8(%rbp), %rdi
movq 0x38(%rsp), %rsi
movq (%rbp), %rdx
#endif
movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
@@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64)
.macro SAVE_ARGS_IRQ
cld
/* start from rbp in pt_regs and jump over */
movq_cfi rdi, RDI-RBP
movq_cfi rsi, RSI-RBP
movq_cfi rdx, RDX-RBP
movq_cfi rcx, RCX-RBP
movq_cfi rax, RAX-RBP
movq_cfi r8, R8-RBP
movq_cfi r9, R9-RBP
movq_cfi r10, R10-RBP
movq_cfi r11, R11-RBP
movq_cfi rdi, (RDI-RBP)
movq_cfi rsi, (RSI-RBP)
movq_cfi rdx, (RDX-RBP)
movq_cfi rcx, (RCX-RBP)
movq_cfi rax, (RAX-RBP)
movq_cfi r8, (R8-RBP)
movq_cfi r9, (R9-RBP)
movq_cfi r10, (R10-RBP)
movq_cfi r11, (R11-RBP)
/* Save rbp so that we can unwind from get_irq_regs() */
movq_cfi rbp, 0
@@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64)
.endm
ENTRY(save_rest)
PARTIAL_FRAME 1 REST_SKIP+8
PARTIAL_FRAME 1 (REST_SKIP+8)
movq 5*8+16(%rsp), %r11 /* save return address */
movq_cfi rbx, RBX+16
movq_cfi rbp, RBP+16
@@ -440,7 +544,7 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK,TI_flags(%r8)
pushq_cfi kernel_eflags(%rip)
pushq_cfi $0x0002
popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
@@ -450,7 +554,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
jz retint_restore_args
jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -458,6 +562,14 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
1:
subq $REST_SKIP, %rsp # leave space for volatiles
CFI_ADJUST_CFA_OFFSET REST_SKIP
movq %rbp, %rdi
call *%rbx
movl $0, RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
CFI_ENDPROC
END(ret_from_fork)
@@ -465,7 +577,8 @@ END(ret_from_fork)
* System call entry. Up to 6 arguments in registers are supported.
*
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
* stack pointer. However, it does mask the flags register for us, so
* CLD and CLAC are not needed.
*/
/*
@@ -565,7 +678,7 @@ sysret_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
call schedule
SCHEDULE_USER
popq_cfi %rdi
jmp sysret_check
@@ -678,7 +791,7 @@ int_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
call schedule
SCHEDULE_USER
popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -757,7 +870,6 @@ ENTRY(stub_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -807,8 +919,7 @@ ENTRY(stub_x32_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
movq %rsp, %rcx
call sys32_execve
call compat_sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
@@ -884,6 +995,7 @@ END(interrupt)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
ASM_CLAC
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
@@ -974,7 +1086,7 @@ retint_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
call schedule
SCHEDULE_USER
popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1023,6 +1135,7 @@ END(common_interrupt)
*/
.macro apicinterrupt num sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
pushq_cfi $~(\num)
.Lcommon_\sym:
@@ -1077,6 +1190,7 @@ apicinterrupt IRQ_WORK_VECTOR \
*/
.macro zeroentry sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1094,6 +1208,7 @@ END(\sym)
.macro paranoidzeroentry sym do_sym
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1112,6 +1227,7 @@ END(\sym)
#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
.macro paranoidzeroentry_ist sym do_sym ist
ENTRY(\sym)
ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
@@ -1131,6 +1247,7 @@ END(\sym)
.macro errorentry sym do_sym
ENTRY(\sym)
ASM_CLAC
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
@@ -1149,6 +1266,7 @@ END(\sym)
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
ASM_CLAC
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
@@ -1206,52 +1324,6 @@ bad_gs:
jmp 2b
.previous
ENTRY(kernel_thread_helper)
pushq $0 # fake return address
CFI_STARTPROC
/*
* Here we are in the child and the registers are set as they were
* at kernel_thread() invocation in the parent.
*/
call *%rsi
# exit
mov %eax, %edi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
END(kernel_thread_helper)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
*
* C extern interface:
* extern long execve(const char *name, char **argv, char **envp)
*
* asm input arguments:
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
* extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
* rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
*/
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
RESTORE_ARGS
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
END(kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1449,7 +1521,7 @@ paranoid_userspace:
paranoid_schedule:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
SCHEDULE_USER
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
jmp paranoid_userspace

View File

@@ -206,6 +206,21 @@ static int
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code);
/*
* Should never be called:
* As it is only called by __ftrace_replace_code() which is called by
* ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
* which is called to turn mcount into nops or nops into function calls
* but not to convert a function from not using regs to one that uses
* regs, which ftrace_modify_call() is for.
*/
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
WARN_ON(1);
return -EINVAL;
}
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
ret = ftrace_modify_code(ip, old, new);
/* Also update the regs callback function */
if (!ret) {
ip = (unsigned long)(&ftrace_regs_call);
memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
ret = ftrace_modify_code(ip, old, new);
}
atomic_dec(&modifying_ftrace_code);
return ret;
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
return add_break(rec->ip, old);
}
/*
* If the record has the FTRACE_FL_REGS set, that means that it
* wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
* is not not set, then it wants to convert to the normal callback.
*/
static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
{
if (rec->flags & FTRACE_FL_REGS)
return (unsigned long)FTRACE_REGS_ADDR;
else
return (unsigned long)FTRACE_ADDR;
}
/*
* The FTRACE_FL_REGS_EN is set when the record already points to
* a function that saves all the regs. Basically the '_EN' version
* represents the current state of the function.
*/
static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
{
if (rec->flags & FTRACE_FL_REGS_EN)
return (unsigned long)FTRACE_REGS_ADDR;
else
return (unsigned long)FTRACE_ADDR;
}
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
{
unsigned long ftrace_addr;
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
ret = ftrace_test_record(rec, enable);
ftrace_addr = (unsigned long)FTRACE_ADDR;
ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
/* converting nop to call */
return add_brk_on_nop(rec);
case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
ftrace_addr = get_ftrace_old_addr(rec);
/* fall through */
case FTRACE_UPDATE_MAKE_NOP:
/* converting a call to a nop */
return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
* If not, don't touch the breakpoint, we make just create
* a disaster.
*/
ftrace_addr = (unsigned long)FTRACE_ADDR;
ftrace_addr = get_ftrace_addr(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
goto update;
/* Check both ftrace_addr and ftrace_old_addr */
ftrace_addr = get_ftrace_old_addr(rec);
nop = ftrace_call_replace(ip, ftrace_addr);
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
return -EINVAL;
}
update:
return probe_kernel_write((void *)ip, &nop[0], 1);
}
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_test_record(rec, enable);
ftrace_addr = (unsigned long)FTRACE_ADDR;
ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return add_update_call(rec, ftrace_addr);
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
ret = ftrace_update_record(rec, enable);
ftrace_addr = (unsigned long)FTRACE_ADDR;
ftrace_addr = get_ftrace_addr(rec);
switch (ret) {
case FTRACE_UPDATE_IGNORE:
return 0;
case FTRACE_UPDATE_MODIFY_CALL_REGS:
case FTRACE_UPDATE_MODIFY_CALL:
case FTRACE_UPDATE_MAKE_CALL:
/* converting nop to call */
return finish_update_call(rec, ftrace_addr);

View File

@@ -287,27 +287,28 @@ ENTRY(startup_32_smp)
leal -__PAGE_OFFSET(%ecx),%esp
default_entry:
/*
* New page tables may be in 4Mbyte page mode and may
* be using the global pages.
*
* NOTE! If we are on a 486 we may have no cr4 at all!
* So we do not try to touch it unless we really have
* some bits in it to set. This won't work if the BSP
* implements cr4 but this AP does not -- very unlikely
* but be warned! The same applies to the pse feature
* if not equally supported. --macro
*
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
* Specifically, cr4 exists if and only if CPUID exists,
* which in turn exists if and only if EFLAGS.ID exists.
*/
#define cr4_bits pa(mmu_cr4_features)
movl cr4_bits,%edx
andl %edx,%edx
jz 6f
movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
orl %edx,%eax
movl $X86_EFLAGS_ID,%ecx
pushl %ecx
popfl
pushfl
popl %eax
pushl $0
popfl
pushfl
popl %edx
xorl %edx,%eax
testl %ecx,%eax
jz 6f # No ID flag = no CPUID = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled

View File

@@ -19,24 +19,17 @@
#include <asm/fpu-internal.h>
#include <asm/user.h>
#ifdef CONFIG_X86_64
# include <asm/sigcontext32.h>
# include <asm/user32.h>
#else
# define save_i387_xstate_ia32 save_i387_xstate
# define restore_i387_xstate_ia32 restore_i387_xstate
# define _fpstate_ia32 _fpstate
# define _xstate_ia32 _xstate
# define sig_xstate_ia32_size sig_xstate_size
# define fx_sw_reserved_ia32 fx_sw_reserved
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
#endif
/*
* Were we in an interrupt that interrupted kernel mode?
*
* We can do a kernel_fpu_begin/end() pair *ONLY* if that
* For now, with eagerfpu we will return interrupted kernel FPU
* state as not-idle. TBD: Ideally we can change the return value
* to something like __thread_has_fpu(current). But we need to
* be careful of doing __thread_clear_has_fpu() before saving
* the FPU etc for supporting nested uses etc. For now, take
* the simple route!
*
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
* pair does nothing at all: the thread must not have fpu (so
* that we don't try to save the FPU state), and TS must
* be set (so that the clts/stts pair does nothing that is
@@ -44,6 +37,9 @@
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
if (use_eager_fpu())
return 0;
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
}
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void)
}
EXPORT_SYMBOL(irq_fpu_usable);
void kernel_fpu_begin(void)
void __kernel_fpu_begin(void)
{
struct task_struct *me = current;
WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
if (__thread_has_fpu(me)) {
__save_init_fpu(me);
__thread_clear_has_fpu(me);
/* We do 'stts()' in kernel_fpu_end() */
} else {
/* We do 'stts()' in __kernel_fpu_end() */
} else if (!use_eager_fpu()) {
this_cpu_write(fpu_owner_task, NULL);
clts();
}
}
EXPORT_SYMBOL(kernel_fpu_begin);
EXPORT_SYMBOL(__kernel_fpu_begin);
void kernel_fpu_end(void)
void __kernel_fpu_end(void)
{
stts();
preempt_enable();
if (use_eager_fpu())
math_state_restore();
else
stts();
}
EXPORT_SYMBOL(kernel_fpu_end);
EXPORT_SYMBOL(__kernel_fpu_end);
void unlazy_fpu(struct task_struct *tsk)
{
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk)
}
EXPORT_SYMBOL(unlazy_fpu);
#ifdef CONFIG_MATH_EMULATION
# define HAVE_HWFP (boot_cpu_data.hard_math)
#else
# define HAVE_HWFP 1
#endif
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
static void __cpuinit mxcsr_feature_mask_init(void)
{
unsigned long mask = 0;
clts();
if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
mask = 0x0000ffbf;
}
mxcsr_feature_mask &= mask;
stts();
}
static void __cpuinit init_thread_xstate(void)
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void)
init_thread_xstate();
mxcsr_feature_mask_init();
/* clean state in init */
current_thread_info()->status = 0;
clear_used_math();
xsave_init();
eager_fpu_init();
}
void fpu_finit(struct fpu *fpu)
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu)
}
if (cpu_has_fxsr) {
struct i387_fxsave_struct *fx = &fpu->state->fxsave;
memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
if (cpu_has_xmm)
fx->mxcsr = MXCSR_DEFAULT;
fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
memset(fp, 0, xstate_size);
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
* FXSR floating point environment conversions.
*/
static void
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0]));
}
static void convert_to_fxsr(struct task_struct *tsk,
const struct user_i387_ia32_struct *env)
void convert_to_fxsr(struct task_struct *tsk,
const struct user_i387_ia32_struct *env)
{
struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
@@ -588,223 +569,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
/*
* Signal frame handlers.
*/
static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave;
fp->status = fp->swd;
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
return -1;
return 1;
}
static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
struct user_i387_ia32_struct env;
int err = 0;
convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env)))
return -1;
err |= __put_user(fx->swd, &buf->status);
err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
if (err)
return -1;
if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
return -1;
return 1;
}
static int save_i387_xsave(void __user *buf)
{
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fx = buf;
int err = 0;
sanitize_i387_state(tsk);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context.
* This will enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware applications can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
if (save_i387_fxsave(fx) < 0)
return -1;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
sizeof(struct _fpx_sw_bytes));
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_ia32_size
- FP_XSTATE_MAGIC2_SIZE));
if (err)
return -1;
return 1;
}
int save_i387_xstate_ia32(void __user *buf)
{
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
struct task_struct *tsk = current;
if (!used_math())
return 0;
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
return -EACCES;
/*
* This will cause a "finit" to be triggered by the next
* attempted FPU operation by the 'current' process.
*/
clear_used_math();
if (!HAVE_HWFP) {
return fpregs_soft_get(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, fp) ? -1 : 1;
}
unlazy_fpu(tsk);
if (cpu_has_xsave)
return save_i387_xsave(fp);
if (cpu_has_fxsr)
return save_i387_fxsave(fp);
else
return save_i387_fsave(fp);
}
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
return __copy_from_user(&tsk->thread.fpu.state->fsave, buf,
sizeof(struct i387_fsave_struct));
}
static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
unsigned int size)
{
struct task_struct *tsk = current;
struct user_i387_ia32_struct env;
int err;
err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0],
size);
/* mxcsr reserved bits must be masked to zero for security reasons */
tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
if (err || __copy_from_user(&env, buf, sizeof(env)))
return 1;
convert_to_fxsr(tsk, &env);
return 0;
}
static int restore_i387_xsave(void __user *buf)
{
struct _fpx_sw_bytes fx_sw_user;
struct _fpstate_ia32 __user *fx_user =
((struct _fpstate_ia32 __user *) buf);
struct i387_fxsave_struct __user *fx =
(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
struct xsave_hdr_struct *xsave_hdr =
&current->thread.fpu.state->xsave.xsave_hdr;
u64 mask;
int err;
if (check_for_xstate(fx, buf, &fx_sw_user))
goto fx_only;
mask = fx_sw_user.xstate_bv;
err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
xsave_hdr->xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
/*
* Init the state that is not present in the memory layout
* and enabled by the OS.
*/
mask = ~(pcntxt_mask & ~mask);
xsave_hdr->xstate_bv &= mask;
return err;
fx_only:
/*
* Couldn't find the extended state information in the memory
* layout. Restore the FP/SSE and init the other extended state
* enabled by the OS.
*/
xsave_hdr->xstate_bv = XSTATE_FPSSE;
return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
}
int restore_i387_xstate_ia32(void __user *buf)
{
int err;
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
if (HAVE_HWFP)
clear_fpu(tsk);
if (!buf) {
if (used_math()) {
clear_fpu(tsk);
clear_used_math();
}
return 0;
} else
if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
return -EACCES;
if (!used_math()) {
err = init_fpu(tsk);
if (err)
return err;
}
if (HAVE_HWFP) {
if (cpu_has_xsave)
err = restore_i387_xsave(buf);
else if (cpu_has_fxsr)
err = restore_i387_fxsave(fp, sizeof(struct
i387_fxsave_struct));
else
err = restore_i387_fsave(fp);
} else {
err = fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, fp) != 0;
}
set_used_math();
return err;
}
/*
* FPU state for core dumps.
* This is only used for a.out dumps now.

View File

@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
* out of.
*/
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
}
static struct syscore_ops i8259_syscore_ops = {

View File

@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
irq_stats(j)->irq_tlb_count);
seq_printf(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
sum += irq_stats(cpu)->irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;

View File

@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,

View File

@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
#ifdef KPROBES_CAN_USE_FTRACE
static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
/*
* Emulate singlestep (and also recover regs->ip)
* as if there is a 5byte nop
*/
regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
__this_cpu_write(current_kprobe, NULL);
}
#endif
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
#ifdef KPROBES_CAN_USE_FTRACE
if (kprobe_ftrace(p)) {
skip_singlestep(p, regs, kcb);
return 1;
}
#endif
setup_singlestep(p, regs, kcb, 0);
return 1;
}
@@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
#ifdef KPROBES_CAN_USE_FTRACE
/* Ftrace callback handler for kprobes */
void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
unsigned long flags;
/* Disable irq for emulating a breakpoint and avoiding preempt */
local_irq_save(flags);
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto end;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs))
skip_singlestep(p, regs, kcb);
/*
* If pre_handler returns !0, it sets regs->ip and
* resets current kprobe.
*/
}
end:
local_irq_restore(flags);
}
int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
{
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
}
#endif
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();

View File

@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
kvm_disable_steal_time();
}
static int kvm_pv_reboot_notify(struct notifier_block *nb,
@@ -396,9 +397,7 @@ void kvm_disable_steal_time(void)
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
#ifdef CONFIG_KVM_CLOCK
WARN_ON(kvm_register_clock("primary cpu clock"));
#endif
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
}

View File

@@ -75,20 +75,113 @@ struct microcode_amd {
static struct equiv_cpu_entry *equiv_cpu_table;
/* page-sized ucode patch buffer */
void *patch;
struct ucode_patch {
struct list_head plist;
void *data;
u32 patch_id;
u16 equiv_cpu;
};
static LIST_HEAD(pcache);
static u16 find_equiv_id(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int i = 0;
if (!equiv_cpu_table)
return 0;
while (equiv_cpu_table[i].installed_cpu != 0) {
if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
return equiv_cpu_table[i].equiv_cpu;
i++;
}
return 0;
}
static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
{
int i = 0;
BUG_ON(!equiv_cpu_table);
while (equiv_cpu_table[i].equiv_cpu != 0) {
if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
return equiv_cpu_table[i].installed_cpu;
i++;
}
return 0;
}
/*
* a small, trivial cache of per-family ucode patches
*/
static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
{
struct ucode_patch *p;
list_for_each_entry(p, &pcache, plist)
if (p->equiv_cpu == equiv_cpu)
return p;
return NULL;
}
static void update_cache(struct ucode_patch *new_patch)
{
struct ucode_patch *p;
list_for_each_entry(p, &pcache, plist) {
if (p->equiv_cpu == new_patch->equiv_cpu) {
if (p->patch_id >= new_patch->patch_id)
/* we already have the latest patch */
return;
list_replace(&p->plist, &new_patch->plist);
kfree(p->data);
kfree(p);
return;
}
}
/* no patch found, add it */
list_add_tail(&new_patch->plist, &pcache);
}
static void free_cache(void)
{
struct ucode_patch *p, *tmp;
list_for_each_entry_safe(p, tmp, &pcache, plist) {
__list_del(p->plist.prev, p->plist.next);
kfree(p->data);
kfree(p);
}
}
static struct ucode_patch *find_patch(unsigned int cpu)
{
u16 equiv_id;
equiv_id = find_equiv_id(cpu);
if (!equiv_id)
return NULL;
return cache_find_patch(equiv_id);
}
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
csig->sig = cpuid_eax(0x00000001);
csig->rev = c->microcode;
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
return 0;
}
static unsigned int verify_ucode_size(int cpu, u32 patch_size,
static unsigned int verify_patch_size(int cpu, u32 patch_size,
unsigned int size)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
return patch_size;
}
static u16 find_equiv_id(void)
{
unsigned int current_cpu_id, i = 0;
BUG_ON(equiv_cpu_table == NULL);
current_cpu_id = cpuid_eax(0x00000001);
while (equiv_cpu_table[i].installed_cpu != 0) {
if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
return equiv_cpu_table[i].equiv_cpu;
i++;
}
return 0;
}
/*
* we signal a good patch is found by returning its size > 0
*/
static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
unsigned int leftover_size, int rev,
unsigned int *current_size)
{
struct microcode_header_amd *mc_hdr;
unsigned int actual_size, patch_size;
u16 equiv_cpu_id;
/* size of the current patch we're staring at */
patch_size = *(u32 *)(ucode_ptr + 4);
*current_size = patch_size + SECTION_HDR_SIZE;
equiv_cpu_id = find_equiv_id();
if (!equiv_cpu_id)
return 0;
/*
* let's look at the patch header itself now
*/
mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
/* ucode might be chipset specific -- currently we don't support this */
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
pr_err("CPU%d: chipset specific code not yet supported\n",
cpu);
return 0;
}
if (mc_hdr->patch_id <= rev)
return 0;
/*
* now that the header looks sane, verify its size
*/
actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
if (!actual_size)
return 0;
/* clear the patch buffer */
memset(patch, 0, PAGE_SIZE);
/* all looks ok, get the binary patch */
get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
return actual_size;
}
static int apply_microcode_amd(int cpu)
{
u32 rev, dummy;
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
struct microcode_amd *mc_amd = uci->mc;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
u32 rev, dummy;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
BUG_ON(raw_smp_processor_id() != cpu);
if (mc_amd == NULL)
uci = ucode_cpu_info + cpu;
p = find_patch(cpu);
if (!p)
return 0;
wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
/* get patch id after patching */
mc_amd = p->data;
uci->mc = p->data;
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
/* check current patch id and patch's id for match */
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
return 0;
}
wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
/* verify patch application was successful */
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
if (rev != mc_amd->hdr.patch_id) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
@@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf)
return -ENOMEM;
}
get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
/* add header length */
return size + CONTAINER_HDR_SZ;
@@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void)
equiv_cpu_table = NULL;
}
static enum ucode_state
generic_load_microcode(int cpu, const u8 *data, size_t size)
static void cleanup(void)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct microcode_header_amd *mc_hdr = NULL;
unsigned int mc_size, leftover, current_size = 0;
int offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
unsigned int new_rev = uci->cpu_sig.rev;
enum ucode_state state = UCODE_ERROR;
free_equiv_cpu_table();
free_cache();
}
offset = install_equiv_cpu_table(ucode_ptr);
/*
* We return the current size even if some of the checks failed so that
* we can skip over the next patch. If we return a negative value, we
* signal a grave error like a memory allocation has failed and the
* driver cannot continue functioning normally. In such cases, we tear
* down everything we've used up so far and exit.
*/
static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_header_amd *mc_hdr;
struct ucode_patch *patch;
unsigned int patch_size, crnt_size, ret;
u32 proc_fam;
u16 proc_id;
patch_size = *(u32 *)(fw + 4);
crnt_size = patch_size + SECTION_HDR_SIZE;
mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
proc_id = mc_hdr->processor_rev_id;
proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
if (!proc_fam) {
pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
return crnt_size;
}
/* check if patch is for the current family */
proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
if (proc_fam != c->x86)
return crnt_size;
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
mc_hdr->patch_id);
return crnt_size;
}
ret = verify_patch_size(cpu, patch_size, leftover);
if (!ret) {
pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
return crnt_size;
}
patch = kzalloc(sizeof(*patch), GFP_KERNEL);
if (!patch) {
pr_err("Patch allocation failure.\n");
return -EINVAL;
}
patch->data = kzalloc(patch_size, GFP_KERNEL);
if (!patch->data) {
pr_err("Patch data allocation failure.\n");
kfree(patch);
return -EINVAL;
}
/* All looks ok, copy patch... */
memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
INIT_LIST_HEAD(&patch->plist);
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
/* ... and add to cache. */
update_cache(patch);
return crnt_size;
}
static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
{
enum ucode_state ret = UCODE_ERROR;
unsigned int leftover;
u8 *fw = (u8 *)data;
int crnt_size = 0;
int offset;
offset = install_equiv_cpu_table(data);
if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
goto out;
return ret;
}
ucode_ptr += offset;
fw += offset;
leftover = size - offset;
if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
if (*(u32 *)fw != UCODE_UCODE_TYPE) {
pr_err("invalid type field in container file section header\n");
goto free_table;
free_equiv_cpu_table();
return ret;
}
while (leftover) {
mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
new_rev, &current_size);
if (mc_size) {
mc_hdr = patch;
new_mc = patch;
new_rev = mc_hdr->patch_id;
goto out_ok;
}
crnt_size = verify_and_add_patch(cpu, fw, leftover);
if (crnt_size < 0)
return ret;
ucode_ptr += current_size;
leftover -= current_size;
fw += crnt_size;
leftover -= crnt_size;
}
if (!new_mc) {
state = UCODE_NFOUND;
goto free_table;
}
out_ok:
uci->mc = new_mc;
state = UCODE_OK;
pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
cpu, uci->cpu_sig.rev, new_rev);
free_table:
free_equiv_cpu_table();
out:
return state;
return UCODE_OK;
}
/*
@@ -315,7 +402,7 @@ out:
*
* This legacy file is always smaller than 2K in size.
*
* Starting at family 15h they are in family specific firmware files:
* Beginning with family 15h, they are in family-specific firmware files:
*
* amd-ucode/microcode_amd_fam15h.bin
* amd-ucode/microcode_amd_fam16h.bin
@@ -323,12 +410,17 @@ out:
*
* These might be larger than 2K.
*/
static enum ucode_state request_microcode_amd(int cpu, struct device *device)
static enum ucode_state request_microcode_amd(int cpu, struct device *device,
bool refresh_fw)
{
char fw_name[36] = "amd-ucode/microcode_amd.bin";
const struct firmware *fw;
enum ucode_state ret = UCODE_NFOUND;
struct cpuinfo_x86 *c = &cpu_data(cpu);
enum ucode_state ret = UCODE_NFOUND;
const struct firmware *fw;
/* reload ucode container only on the boot cpu */
if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
return UCODE_OK;
if (c->x86 >= 0x15)
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
goto fw_release;
}
ret = generic_load_microcode(cpu, fw->data, fw->size);
/* free old equiv table */
free_equiv_cpu_table();
fw_release:
ret = load_microcode_amd(cpu, fw->data, fw->size);
if (ret != UCODE_OK)
cleanup();
fw_release:
release_firmware(fw);
out:
out:
return ret;
}
@@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
return NULL;
}
patch = (void *)get_zeroed_page(GFP_KERNEL);
if (!patch)
return NULL;
return &microcode_amd_ops;
}
void __exit exit_amd_microcode(void)
{
free_page((unsigned long)patch);
cleanup();
}

View File

@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
if (do_microcode_update(buf, len) == 0)
ret = (ssize_t)len;
if (ret > 0)
perf_check_microcode();
mutex_unlock(&microcode_mutex);
put_online_cpus();
@@ -276,19 +279,18 @@ static struct platform_device *microcode_pdev;
static int reload_for_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
enum ucode_state ustate;
int err = 0;
if (uci->valid) {
enum ucode_state ustate;
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
if (ustate == UCODE_OK)
apply_microcode_on_target(cpu);
else
if (ustate == UCODE_ERROR)
err = -EINVAL;
}
if (!uci->valid)
return err;
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
if (ustate == UCODE_OK)
apply_microcode_on_target(cpu);
else
if (ustate == UCODE_ERROR)
err = -EINVAL;
return err;
}
@@ -370,18 +372,15 @@ static void microcode_fini_cpu(int cpu)
static enum ucode_state microcode_resume_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->mc)
return UCODE_NFOUND;
pr_debug("CPU%d updated upon resume\n", cpu);
apply_microcode_on_target(cpu);
if (apply_microcode_on_target(cpu))
return UCODE_ERROR;
return UCODE_OK;
}
static enum ucode_state microcode_init_cpu(int cpu)
static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
{
enum ucode_state ustate;
@@ -392,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
if (system_state != SYSTEM_RUNNING)
return UCODE_NFOUND;
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
refresh_fw);
if (ustate == UCODE_OK) {
pr_debug("CPU%d updated upon init\n", cpu);
@@ -405,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
static enum ucode_state microcode_update_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
enum ucode_state ustate;
if (uci->valid)
ustate = microcode_resume_cpu(cpu);
else
ustate = microcode_init_cpu(cpu);
return microcode_resume_cpu(cpu);
return ustate;
return microcode_init_cpu(cpu, false);
}
static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -428,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
if (err)
return err;
if (microcode_init_cpu(cpu) == UCODE_ERROR)
if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
return -EINVAL;
return err;
@@ -477,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
struct device *dev;
dev = get_cpu_device(cpu);
switch (action) {
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
microcode_update_cpu(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
pr_debug("CPU%d added\n", cpu);
/*
* "break" is missing on purpose here because we want to fall
* through in order to create the sysfs group.
*/
case CPU_DOWN_FAILED:
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&dev->kobj, &mc_attr_group);
pr_debug("CPU%d removed\n", cpu);
break;
/*
* case CPU_DEAD:
*
* When a CPU goes offline, don't free up or invalidate the copy of
* the microcode in kernel memory, so that we can reuse it when the
* CPU comes back online without unnecessarily requesting the userspace
* for it again.
*/
case CPU_UP_CANCELED_FROZEN:
/* The CPU refused to come up during a system resume */
microcode_fini_cpu(cpu);
break;
}
/* The CPU refused to come up during a system resume */
if (action == CPU_UP_CANCELED_FROZEN)
microcode_fini_cpu(cpu);
return NOTIFY_OK;
}

View File

@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
return 0;
}
static enum ucode_state request_microcode_fw(int cpu, struct device *device)
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);

View File

@@ -257,12 +257,14 @@ static int __init msr_init(void)
goto out_chrdev;
}
msr_class->devnode = msr_devnode;
get_online_cpus();
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&msr_class_cpu_notifier);
put_online_cpus();
err = 0;
goto out;
@@ -271,6 +273,7 @@ out_class:
i = 0;
for_each_online_cpu(i)
msr_device_destroy(i);
put_online_cpus();
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
static void __exit msr_exit(void)
{
int cpu = 0;
get_online_cpus();
for_each_online_cpu(cpu)
msr_device_destroy(cpu);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
put_online_cpus();
}
module_init(msr_init);

105
arch/x86/kernel/perf_regs.c Normal file
View File

@@ -0,0 +1,105 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/bug.h>
#include <linux/stddef.h>
#include <asm/perf_regs.h>
#include <asm/ptrace.h>
#ifdef CONFIG_X86_32
#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
#else
#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
#endif
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
PT_REGS_OFFSET(PERF_REG_X86_SI, si),
PT_REGS_OFFSET(PERF_REG_X86_DI, di),
PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
#ifdef CONFIG_X86_32
PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
PT_REGS_OFFSET(PERF_REG_X86_ES, es),
PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
#else
/*
* The pt_regs struct does not store
* ds, es, fs, gs in 64 bit mode.
*/
(unsigned int) -1,
(unsigned int) -1,
(unsigned int) -1,
(unsigned int) -1,
#endif
#ifdef CONFIG_X86_64
PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
#endif
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}
#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
#ifdef CONFIG_X86_32
int perf_reg_validate(u64 mask)
{
if (!mask || mask & REG_RESERVED)
return -EINVAL;
return 0;
}
u64 perf_reg_abi(struct task_struct *task)
{
return PERF_SAMPLE_REGS_ABI_32;
}
#else /* CONFIG_X86_64 */
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
(1ULL << PERF_REG_X86_ES) | \
(1ULL << PERF_REG_X86_FS) | \
(1ULL << PERF_REG_X86_GS))
int perf_reg_validate(u64 mask)
{
if (!mask || mask & REG_RESERVED)
return -EINVAL;
if (mask & REG_NOSUPPORT)
return -EINVAL;
return 0;
}
u64 perf_reg_abi(struct task_struct *task)
{
if (test_tsk_thread_flag(task, TIF_IA32))
return PERF_SAMPLE_REGS_ABI_32;
else
return PERF_SAMPLE_REGS_ABI_64;
}
#endif /* CONFIG_X86_32 */

View File

@@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev)
return oprom;
}
void *pci_map_biosrom(struct pci_dev *pdev)
void __iomem *pci_map_biosrom(struct pci_dev *pdev)
{
struct resource *oprom = find_oprom(pdev);

View File

@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
int ret;
unlazy_fpu(src);
*dst = *src;
if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
ret = fpu_alloc(&dst->thread.fpu);
if (ret)
return ret;
fpu_copy(&dst->thread.fpu, &src->thread.fpu);
fpu_copy(dst, src);
}
return 0;
}
@@ -97,16 +95,6 @@ void arch_task_cache_init(void)
SLAB_PANIC | SLAB_NOTRACK, NULL);
}
static inline void drop_fpu(struct task_struct *tsk)
{
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
/*
* Free current thread data structures etc..
*/
@@ -163,7 +151,13 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
drop_fpu(tsk);
drop_init_fpu(tsk);
/*
* Free the FPU state for non xsave platforms. They get reallocated
* lazily at the first use.
*/
if (!use_eager_fpu())
free_thread_xstate(tsk);
}
static void hard_disable_TSC(void)
@@ -298,71 +292,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
* This gets run with %si containing the
* function to call, and %di containing
* the "args".
*/
extern void kernel_thread_helper(void);
/*
* Create a kernel thread
*/
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
memset(&regs, 0, sizeof(regs));
regs.si = (unsigned long) fn;
regs.di = (unsigned long) arg;
#ifdef CONFIG_X86_32
regs.ds = __USER_DS;
regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU;
regs.gs = __KERNEL_STACK_CANARY;
#else
regs.ss = __KERNEL_DS;
#endif
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
}
EXPORT_SYMBOL(kernel_thread);
/*
* sys_execve() executes a new program.
*/
long sys_execve(const char __user *name,
const char __user *const __user *argv,
const char __user *const __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
filename = getname(name);
error = PTR_ERR(filename);
if (IS_ERR(filename))
return error;
error = do_execve(filename, argv, envp, regs);
#ifdef CONFIG_X86_32
if (error == 0) {
/* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET);
}
#endif
putname(filename);
return error;
}
/*
* Idle related variables and functions
*/

View File

@@ -57,6 +57,7 @@
#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long unused,
unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
struct pt_regs *childregs;
struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
int err;
childregs = task_pt_regs(p);
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.ip = (unsigned long) ret_from_fork;
if (unlikely(!regs)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.ip = (unsigned long) ret_from_kernel_thread;
task_user_gs(p) = __KERNEL_STACK_CANARY;
childregs->ds = __USER_DS;
childregs->es = __USER_DS;
childregs->fs = __KERNEL_PERCPU;
childregs->bx = sp; /* function */
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
return 0;
}
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(regs);
p->fpu_counter = 0;
@@ -190,10 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
regs->flags = X86_EFLAGS_IF;
/*
* Free the old FP and other extended state
* force it to the iret return path by making it look as if there was
* some work pending.
*/
free_thread_xstate(current);
set_thread_flag(TIF_NOTIFY_RESUME);
}
EXPORT_SYMBOL_GPL(start_thread);

View File

@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long unused,
unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
childregs = ((struct pt_regs *)
(THREAD_SIZE + task_stack_page(p))) - 1;
*childregs = *regs;
childregs->ax = 0;
if (user_mode(regs))
childregs->sp = sp;
else
childregs->sp = (unsigned long)childregs;
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(!regs)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->sp = (unsigned long)childregs;
childregs->ss = __KERNEL_DS;
childregs->bx = sp; /* function */
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
return 0;
}
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -232,10 +239,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
void

View File

@@ -21,6 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = {
#define genregs32_get genregs_get
#define genregs32_set genregs_set
#define user_i387_ia32_struct user_i387_struct
#define user32_fxsr_struct user_fxsr_struct
#endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
rcu_user_exit();
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs)
!test_thread_flag(TIF_SYSCALL_EMU);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
rcu_user_enter();
}

View File

@@ -225,7 +225,7 @@ static struct platform_device rtc_device = {
static __init int add_rtc_cmos(void)
{
#ifdef CONFIG_PNP
static const char *ids[] __initconst =
static const char * const const ids[] __initconst =
{ "PNP0b00", "PNP0b01", "PNP0b02", };
struct pnp_dev *dev;
struct pnp_id *id;

View File

@@ -68,6 +68,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
#include <linux/jiffies.h>
#include <video/edid.h>
@@ -957,7 +958,7 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
memblock_find_dma_reserve();
#ifdef CONFIG_KVM_CLOCK
#ifdef CONFIG_KVM_GUEST
kvmclock_init();
#endif
@@ -1032,6 +1033,8 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();
arch_init_ideal_nops();
register_refined_jiffies(CLOCK_TICK_RATE);
}
#ifdef CONFIG_X86_32

View File

@@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
return err;
}
@@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate)
{
/* Default to using normal stack */
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
#ifdef CONFIG_X86_64
/* redzone */
sp -= 128;
#endif /* CONFIG_X86_64 */
if (config_enabled(CONFIG_X86_64))
sp -= 128;
if (!onsigstack) {
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (current->sas_ss_size)
sp = current->sas_ss_sp + current->sas_ss_size;
} else {
#ifdef CONFIG_X86_32
/* This is the legacy signal stack switching. */
if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer)
} else if (config_enabled(CONFIG_X86_32) &&
(regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
/* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer;
#endif /* CONFIG_X86_32 */
}
}
if (used_math()) {
sp -= sig_xstate_size;
#ifdef CONFIG_X86_64
sp = round_down(sp, 64);
#endif /* CONFIG_X86_64 */
sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
@@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (onsigstack && !likely(on_sig_stack(sp)))
return (void __user *)-1L;
/* save i387 state */
if (used_math() && save_i387_xstate(*fpstate) < 0)
/* save i387 and extended state */
if (used_math() &&
save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
return (void __user *)sp;
@@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(sig, &frame->sig);
put_user_ex(&frame->info, &frame->pinfo);
put_user_ex(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
/* Create the ucontext. */
if (cpu_has_xsave)
@@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
*/
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
} put_user_catch(err);
err |= copy_siginfo_to_user(&frame->info, info);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
@@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
} put_user_catch(err);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
@@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
#endif /* CONFIG_X86_32 */
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
#endif /* CONFIG_X86_X32_ABI */
return 0;
}
#ifdef CONFIG_X86_32
/*
* Atomically swap in the new signal mask, and wait for a signal.
@@ -612,55 +682,22 @@ static int signr_convert(int sig)
return sig;
}
#ifdef CONFIG_X86_32
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
#else /* !CONFIG_X86_32 */
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else /* !CONFIG_IA32_EMULATION */
#define is_ia32 0
#endif /* CONFIG_IA32_EMULATION */
#ifdef CONFIG_X86_X32_ABI
#define is_x32 test_thread_flag(TIF_X32)
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs);
#else /* !CONFIG_X86_X32_ABI */
#define is_x32 0
#endif /* CONFIG_X86_X32_ABI */
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs);
int ia32_setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
if (is_ia32) {
if (is_ia32_frame()) {
if (ka->sa.sa_flags & SA_SIGINFO)
return ia32_setup_rt_frame(usig, ka, info, set, regs);
return ia32_setup_rt_frame(usig, ka, info, cset, regs);
else
return ia32_setup_frame(usig, ka, set, regs);
#ifdef CONFIG_X86_X32_ABI
} else if (is_x32) {
return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
return ia32_setup_frame(usig, ka, cset, regs);
} else if (is_x32_frame()) {
return x32_setup_rt_frame(usig, ka, info, cset, regs);
} else {
return __setup_rt_frame(sig, ka, info, set, regs);
}
@@ -779,6 +816,8 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
rcu_user_exit();
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -801,9 +840,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
rcu_user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -824,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
siginfo_t *info, compat_sigset_t *set,
struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, info))
return -EFAULT;
}
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
put_user_ex(0, &frame->uc.uc__pad0);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ka->sa.sa_flags & SA_RESTORER) {
restorer = ka->sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
err |= -EFAULT;
}
put_user_ex(restorer, &frame->pretcode);
} put_user_catch(err);
if (err)
return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
regs->ip = (unsigned long) ka->sa.sa_handler;
/* We use the x32 calling convention here... */
regs->di = sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
return 0;
}
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;

View File

@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long boot_error = 0;
int timeout;
alternatives_smp_switch(1);
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
preempt_enable();
}
void arch_disable_nonboot_cpus_begin(void)
{
/*
* Avoid the smp alternatives switch during the disable_nonboot_cpus().
* In the suspend path, we will be back in the SMP mode shortly anyways.
*/
skip_smp_alternatives = true;
}
void arch_disable_nonboot_cpus_end(void)
{
skip_smp_alternatives = false;
}
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
if (system_state == SYSTEM_RUNNING)
pr_info("CPU %u is now offline\n", cpu);
if (1 == num_online_cpus())
alternatives_smp_switch(0);
return;
}
msleep(100);

View File

@@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child)
return 1;
}
void set_task_blockstep(struct task_struct *task, bool on)
{
unsigned long debugctl;
/*
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
* FIXME: this means that set/clear TIF_BLOCKSTEP is simply
* wrong if task != current, SIGKILL can wakeup the stopped
* tracee and set/clear can play with the running task, this
* can confuse the next __switch_to_xtra().
*/
local_irq_disable();
debugctl = get_debugctlmsr();
if (on) {
debugctl |= DEBUGCTLMSR_BTF;
set_tsk_thread_flag(task, TIF_BLOCKSTEP);
} else {
debugctl &= ~DEBUGCTLMSR_BTF;
clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
}
if (task == current)
update_debugctlmsr(debugctl);
local_irq_enable();
}
/*
* Enable single or block step.
*/
@@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block)
* So no one should try to use debugger block stepping in a program
* that uses user-mode single stepping itself.
*/
if (enable_single_step(child) && block) {
unsigned long debugctl = get_debugctlmsr();
debugctl |= DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
set_tsk_thread_flag(child, TIF_BLOCKSTEP);
} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
if (enable_single_step(child) && block)
set_task_blockstep(child, true);
else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
set_task_blockstep(child, false);
}
void user_enable_single_step(struct task_struct *child)
@@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child)
/*
* Make sure block stepping (BTF) is disabled.
*/
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
unsigned long debugctl = get_debugctlmsr();
debugctl &= ~DEBUGCTLMSR_BTF;
update_debugctlmsr(debugctl);
clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
}
if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
set_task_blockstep(child, false);
/* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP);

View File

@@ -1,40 +0,0 @@
/*
* This file contains various random system calls that
* have a non-standard calling sequence on the Linux/i386
* platform.
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/smp.h>
#include <linux/sem.h>
#include <linux/msg.h>
#include <linux/shm.h>
#include <linux/stat.h>
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/ipc.h>
#include <linux/uaccess.h>
#include <linux/unistd.h>
#include <asm/syscalls.h>
/*
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
int kernel_execve(const char *filename,
const char *const argv[],
const char *const envp[])
{
long __res;
asm volatile ("int $0x80"
: "=a" (__res)
: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
return __res;
}

View File

@@ -55,6 +55,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/rcu.h>
#include <asm/mach_traps.h>
@@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
dec_preempt_count();
}
static int __kprobes
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
/*
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called.
*/
if (trapnr < X86_TRAP_UD) {
if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, trapnr))
return 0;
}
return -1;
}
#endif
if (!user_mode(regs)) {
if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
}
return 0;
}
return -1;
}
static void __kprobes
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
long error_code, siginfo_t *info)
{
struct task_struct *tsk = current;
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
/*
* traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called.
*/
if (trapnr < X86_TRAP_UD)
goto vm86_trap;
goto trap_signal;
}
#endif
if (!user_mode(regs))
goto kernel_trap;
#ifdef CONFIG_X86_32
trap_signal:
#endif
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
return;
/*
* We want error_code and trap_nr set for userspace faults and
* kernelspace faults which result in die(), but not
@@ -158,33 +174,20 @@ trap_signal:
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
return;
kernel_trap:
if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
}
return;
#ifdef CONFIG_X86_32
vm86_trap:
if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, trapnr))
goto trap_signal;
return;
#endif
}
#define DO_ERROR(trapnr, signr, str, name) \
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
exception_enter(regs); \
if (notify_die(DIE_TRAP, str, regs, error_code, \
trapnr, signr) == NOTIFY_STOP) { \
exception_exit(regs); \
return; \
} \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
exception_exit(regs); \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void __user *)siaddr; \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
exception_enter(regs); \
if (notify_die(DIE_TRAP, str, regs, error_code, \
trapnr, signr) == NOTIFY_STOP) { \
exception_exit(regs); \
return; \
} \
conditional_sti(regs); \
do_trap(trapnr, signr, str, regs, error_code, &info); \
exception_exit(regs); \
}
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
/* Runs on IST stack */
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
return;
preempt_conditional_sti(regs);
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
preempt_conditional_cli(regs);
X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
preempt_conditional_sti(regs);
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
preempt_conditional_cli(regs);
}
exception_exit(regs);
}
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
static const char str[] = "double fault";
struct task_struct *tsk = current;
exception_enter(regs);
/* Return not checked because double check cannot be ignored */
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
exception_enter(regs);
conditional_sti(regs);
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK)
goto gp_in_vm86;
if (regs->flags & X86_VM_MASK) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto exit;
}
#endif
tsk = current;
if (!user_mode(regs))
goto gp_in_kernel;
if (!user_mode(regs)) {
if (fixup_exception(regs))
goto exit;
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die("general protection fault", regs, error_code);
goto exit;
}
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
force_sig(SIGSEGV, tsk);
return;
#ifdef CONFIG_X86_32
gp_in_vm86:
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
return;
#endif
gp_in_kernel:
if (fixup_exception(regs))
return;
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
return;
die("general protection fault", regs, error_code);
exit:
exception_exit(regs);
}
/* May run on IST stack. */
@@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
ftrace_int3_handler(regs))
return;
#endif
exception_enter(regs);
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
return;
goto exit;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
return;
goto exit;
/*
* Let others (NMI) know that the debug stack is in use
@@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
exit:
exception_exit(regs);
}
#ifdef CONFIG_X86_64
@@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
unsigned long dr6;
int si_code;
exception_enter(regs);
get_debugreg(dr6, 6);
/* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
goto exit;
/* DR6 may or may not be cleared by the CPU */
set_debugreg(0, 6);
@@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
return;
goto exit;
/*
* Let others (NMI) know that the debug stack is in use
@@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
X86_TRAP_DB);
preempt_conditional_cli(regs);
debug_stack_usage_dec();
return;
goto exit;
}
/*
@@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
debug_stack_usage_dec();
return;
exit:
exception_exit(regs);
}
/*
@@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
ignore_fpu_irq = 1;
#endif
exception_enter(regs);
math_error(regs, error_code, X86_TRAP_MF);
exception_exit(regs);
}
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
math_error(regs, error_code, X86_TRAP_XF);
exception_exit(regs);
}
dotraplinkage void
@@ -613,11 +628,12 @@ void math_state_restore(void)
}
__thread_fpu_begin(tsk);
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
__thread_fpu_end(tsk);
drop_init_fpu(tsk);
force_sig(SIGSEGV, tsk);
return;
}
@@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore);
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
exception_enter(regs);
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
@@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
info.regs = regs;
math_emulate(&info);
exception_exit(regs);
return;
}
#endif
@@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
exception_exit(regs);
}
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
siginfo_t info;
exception_enter(regs);
local_irq_enable();
info.si_signo = SIGILL;
@@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
info.si_code = ILL_BADSTK;
info.si_addr = NULL;
if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
return;
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
&info);
X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
&info);
}
exception_exit(regs);
}
#endif

View File

@@ -41,6 +41,9 @@
/* Adjust the return address of a call insn */
#define UPROBE_FIX_CALL 0x2
/* Instruction will modify TF, don't change it */
#define UPROBE_FIX_SETF 0x4
#define UPROBE_FIX_RIP_AX 0x8000
#define UPROBE_FIX_RIP_CX 0x4000
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
insn_get_opcode(insn); /* should be a nop */
switch (OPCODE1(insn)) {
case 0x9d:
/* popf */
auprobe->fixups |= UPROBE_FIX_SETF;
break;
case 0xc3: /* ret/lret */
case 0xcb:
case 0xc2:
@@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* Skip these instructions as per the currently known x86 ISA.
* 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
*/
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
@@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
}
return false;
}
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
bool ret = __skip_sstep(auprobe, regs);
if (ret && (regs->flags & X86_EFLAGS_TF))
send_sig(SIGTRAP, current, 0);
return ret;
}
void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
struct pt_regs *regs = task_pt_regs(task);
autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
regs->flags |= X86_EFLAGS_TF;
if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
set_task_blockstep(task, false);
}
void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
{
struct task_struct *task = current;
struct arch_uprobe_task *autask = &task->utask->autask;
bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
struct pt_regs *regs = task_pt_regs(task);
/*
* The state of TIF_BLOCKSTEP was not saved so we can get an extra
* SIGTRAP if we do not clear TF. We need to examine the opcode to
* make it right.
*/
if (unlikely(trapped)) {
if (!autask->saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
} else {
if (autask->saved_tf)
send_sig(SIGTRAP, task, 0);
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
}
}

View File

@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
if ((trapno == 3) || (trapno == 1)) {
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
/* setting this flag forces the code in entry_32.S to
call save_v86_state() and change the stack pointer
to KVM86->regs32 */
set_thread_flag(TIF_IRET);
the path where we call save_v86_state() and change
the stack pointer to KVM86->regs32 */
set_thread_flag(TIF_NOTIFY_RESUME);
return 0;
}
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));

View File

@@ -28,7 +28,7 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/topology.h>
#include <linux/clocksource.h>
#include <linux/timekeeper_internal.h>
#include <linux/getcpu.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
vsyscall_gtod_data.sys_tz = sys_tz;
}
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
void update_vsyscall(struct timekeeper *tk)
{
struct timespec monotonic;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
write_seqcount_begin(&vsyscall_gtod_data.seq);
write_seqcount_begin(&vdata->seq);
/* copy vsyscall data */
vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
vsyscall_gtod_data.clock.mask = clock->mask;
vsyscall_gtod_data.clock.mult = mult;
vsyscall_gtod_data.clock.shift = clock->shift;
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->clock->cycle_last;
vdata->clock.mask = tk->clock->mask;
vdata->clock.mult = tk->mult;
vdata->clock.shift = tk->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->xtime_nsec;
monotonic = timespec_add(*wall_time, *wtm);
vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->xtime_nsec
+ (tk->wall_to_monotonic.tv_nsec
<< tk->shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->shift)) {
vdata->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->shift;
vdata->monotonic_time_sec++;
}
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
vsyscall_gtod_data.monotonic_time_coarse =
timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
write_seqcount_end(&vsyscall_gtod_data.seq);
vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
tk->wall_to_monotonic);
write_seqcount_end(&vdata->seq);
}
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,

View File

@@ -13,9 +13,13 @@
#include <asm/ftrace.h>
#ifdef CONFIG_FUNCTION_TRACER
/* mcount is defined in assembly */
/* mcount and __fentry__ are defined in assembly */
#ifdef CC_USING_FENTRY
EXPORT_SYMBOL(__fentry__);
#else
EXPORT_SYMBOL(mcount);
#endif
#endif
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);

View File

@@ -10,9 +10,7 @@
#include <linux/compat.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#ifdef CONFIG_IA32_EMULATION
#include <asm/sigcontext32.h>
#endif
#include <asm/sigframe.h>
#include <asm/xcr.h>
/*
@@ -23,13 +21,9 @@ u64 pcntxt_mask;
/*
* Represents init state for the supported extended state.
*/
static struct xsave_struct *init_xstate_buf;
struct _fpx_sw_bytes fx_sw_reserved;
#ifdef CONFIG_IA32_EMULATION
struct _fpx_sw_bytes fx_sw_reserved_ia32;
#endif
struct xsave_struct *init_xstate_buf;
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
/*
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
*/
void __sanitize_i387_state(struct task_struct *tsk)
{
u64 xstate_bv;
int feature_bit = 0x2;
struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
int feature_bit = 0x2;
u64 xstate_bv;
if (!fx)
return;
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk)
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
*/
int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *fx_sw_user)
static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *fx_sw)
{
int min_xstate_size = sizeof(struct i387_fxsave_struct) +
sizeof(struct xsave_hdr_struct);
unsigned int magic2;
int err;
err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
sizeof(struct _fpx_sw_bytes));
if (err)
return -EFAULT;
if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
return -1;
/*
* First Magic check failed.
*/
if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
return -EINVAL;
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
fx_sw->xstate_size > xstate_size ||
fx_sw->xstate_size > fx_sw->extended_size)
return -1;
/*
* Check for error scenarios.
*/
if (fx_sw_user->xstate_size < min_xstate_size ||
fx_sw_user->xstate_size > xstate_size ||
fx_sw_user->xstate_size > fx_sw_user->extended_size)
return -EINVAL;
err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
fx_sw_user->extended_size -
FP_XSTATE_MAGIC2_SIZE));
if (err)
return err;
/*
* Check for the presence of second magic word at the end of memory
* layout. This detects the case where the user just copied the legacy
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
if (magic2 != FP_XSTATE_MAGIC2)
return -EFAULT;
if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
|| magic2 != FP_XSTATE_MAGIC2)
return -1;
return 0;
}
#ifdef CONFIG_X86_64
/*
* Signal frame handlers.
*/
int save_i387_xstate(void __user *buf)
static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
{
struct task_struct *tsk = current;
int err = 0;
if (use_fxsr()) {
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
struct user_i387_ia32_struct env;
struct _fpstate_ia32 __user *fp = buf;
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
return -EACCES;
convert_from_fxsr(&env, tsk);
BUG_ON(sig_xstate_size < xstate_size);
if ((unsigned long)buf % 64)
pr_err("%s: bad fpstate %p\n", __func__, buf);
if (!used_math())
return 0;
if (user_has_fpu()) {
if (use_xsave())
err = xsave_user(buf);
else
err = fxsave_user(buf);
if (err)
return err;
user_fpu_end();
if (__copy_to_user(buf, &env, sizeof(env)) ||
__put_user(xsave->i387.swd, &fp->status) ||
__put_user(X86_FXSR_MAGIC, &fp->magic))
return -1;
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
xstate_size))
struct i387_fsave_struct __user *fp = buf;
u32 swd;
if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
return -1;
}
clear_used_math(); /* trigger finit */
if (use_xsave()) {
struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
u64 xstate_bv;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_size
- FP_XSTATE_MAGIC2_SIZE));
/*
* Read the xstate_bv which we copied (directly from the cpu or
* from the state in task struct) to the user buffers and
* set the FP/SSE bits.
*/
err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware apps can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
xstate_bv |= XSTATE_FPSSE;
err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
if (err)
return err;
}
return 1;
return 0;
}
/*
* Restore the extended state if present. Otherwise, restore the FP/SSE
* state.
*/
static int restore_user_xstate(void __user *buf)
static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
{
struct _fpx_sw_bytes fx_sw_user;
u64 mask;
struct xsave_struct __user *x = buf;
struct _fpx_sw_bytes *sw_bytes;
u32 xstate_bv;
int err;
if (((unsigned long)buf % 64) ||
check_for_xstate(buf, buf, &fx_sw_user))
goto fx_only;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
mask = fx_sw_user.xstate_bv;
/*
* restore the state passed by the user.
*/
err = xrestore_user(buf, mask);
if (err)
if (!use_xsave())
return err;
/*
* init the state skipped by the user.
*/
mask = pcntxt_mask & ~mask;
if (unlikely(mask))
xrstor_state(init_xstate_buf, mask);
err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
return 0;
fx_only:
/*
* couldn't find the extended state information in the
* memory layout. Restore just the FP/SSE and init all
* the other extended state.
* Read the xstate_bv which we copied (directly from the cpu or
* from the state in task struct) to the user buffers.
*/
xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware apps can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
xstate_bv |= XSTATE_FPSSE;
err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
return err;
}
static inline int save_user_xstate(struct xsave_struct __user *buf)
{
int err;
if (use_xsave())
err = xsave_user(buf);
else if (use_fxsr())
err = fxsave_user((struct i387_fxsave_struct __user *) buf);
else
err = fsave_user((struct i387_fsave_struct __user *) buf);
if (unlikely(err) && __clear_user(buf, xstate_size))
err = -EFAULT;
return err;
}
/*
* This restores directly out of user space. Exceptions are handled.
* Save the fpu, extended register state to the user signal frame.
*
* 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
* state is copied.
* 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
*
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
* buf != buf_fx for 32-bit frames with fxstate.
*
* If the fpu, extended register state is live, save the state directly
* to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
* copy the thread's fpu state to the user frame starting at 'buf_fx'.
*
* If this is a 32-bit frame with fxstate, put a fsave header before
* the aligned state at 'buf_fx'.
*
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
* indicating the absence/presence of the extended state to the user.
*/
int restore_i387_xstate(void __user *buf)
int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
{
struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
struct task_struct *tsk = current;
int err = 0;
int ia32_fxstate = (buf != buf_fx);
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
config_enabled(CONFIG_IA32_EMULATION));
if (!access_ok(VERIFY_WRITE, buf, size))
return -EACCES;
if (!HAVE_HWFP)
return fpregs_soft_get(current, NULL, 0,
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
if (user_has_fpu()) {
/* Save the live register state to the user directly. */
if (save_user_xstate(buf_fx))
return -1;
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
fpu_fxsave(&tsk->thread.fpu);
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf_fx, xsave, xstate_size))
return -1;
}
/* Save the fsave header for the 32-bit frames. */
if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
return -1;
if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
return -1;
drop_init_fpu(tsk); /* trigger finit */
return 0;
}
static inline void
sanitize_restored_xstate(struct task_struct *tsk,
struct user_i387_ia32_struct *ia32_env,
u64 xstate_bv, int fx_only)
{
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
if (use_xsave()) {
/* These bits must be zero. */
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
/*
* Init the state that is not present in the memory
* layout and not enabled by the OS.
*/
if (fx_only)
xsave_hdr->xstate_bv = XSTATE_FPSSE;
else
xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
}
if (use_fxsr()) {
/*
* mscsr reserved bits must be masked to zero for security
* reasons.
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
convert_to_fxsr(tsk, ia32_env);
}
}
/*
* Restore the extended state if present. Otherwise, restore the FP/SSE state.
*/
static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
{
if (use_xsave()) {
if ((unsigned long)buf % 64 || fx_only) {
u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
xrstor_state(init_xstate_buf, init_bv);
return fxrstor_user(buf);
} else {
u64 init_bv = pcntxt_mask & ~xbv;
if (unlikely(init_bv))
xrstor_state(init_xstate_buf, init_bv);
return xrestore_user(buf, xbv);
}
} else if (use_fxsr()) {
return fxrstor_user(buf);
} else
return frstor_user(buf);
}
int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
{
int ia32_fxstate = (buf != buf_fx);
struct task_struct *tsk = current;
int state_size = xstate_size;
u64 xstate_bv = 0;
int fx_only = 0;
ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
config_enabled(CONFIG_IA32_EMULATION));
if (!buf) {
if (used_math())
goto clear;
drop_init_fpu(tsk);
return 0;
} else
if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
return -EACCES;
if (!used_math()) {
err = init_fpu(tsk);
if (err)
return err;
}
user_fpu_begin();
if (use_xsave())
err = restore_user_xstate(buf);
else
err = fxrstor_checking((__force struct i387_fxsave_struct *)
buf);
if (unlikely(err)) {
if (!access_ok(VERIFY_READ, buf, size))
return -EACCES;
if (!used_math() && init_fpu(tsk))
return -1;
if (!HAVE_HWFP) {
return fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, buf) != 0;
}
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
/*
* Couldn't find the extended state information in the
* memory layout. Restore just the FP/SSE and init all
* the other extended state.
*/
state_size = sizeof(struct i387_fxsave_struct);
fx_only = 1;
} else {
state_size = fx_sw_user.xstate_size;
xstate_bv = fx_sw_user.xstate_bv;
}
}
if (ia32_fxstate) {
/*
* Encountered an error while doing the restore from the
* user buffer, clear the fpu state.
* For 32-bit frames with fxstate, copy the user state to the
* thread's fpu state, reconstruct fxstate from the fsave
* header. Sanitize the copied state etc.
*/
clear:
clear_fpu(tsk);
clear_used_math();
struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
struct user_i387_ia32_struct env;
int err = 0;
/*
* Drop the current fpu which clears used_math(). This ensures
* that any context-switch during the copy of the new state,
* avoids the intermediate state from getting restored/saved.
* Thus avoiding the new restored state from getting corrupted.
* We will be ready to restore/save the state only after
* set_used_math() is again set.
*/
drop_fpu(tsk);
if (__copy_from_user(xsave, buf_fx, state_size) ||
__copy_from_user(&env, buf, sizeof(env))) {
err = -1;
} else {
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
set_used_math();
}
if (use_eager_fpu())
math_state_restore();
return err;
} else {
/*
* For 64-bit frames and 32-bit fsave frames, restore the user
* state to the registers directly (with exceptions handled).
*/
user_fpu_begin();
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
drop_init_fpu(tsk);
return -1;
}
}
return err;
return 0;
}
#endif
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
@@ -321,31 +428,22 @@ clear:
*/
static void prepare_fx_sw_frame(void)
{
int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
FP_XSTATE_MAGIC2_SIZE;
int fsave_header_size = sizeof(struct i387_fsave_struct);
int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
sig_xstate_size = sizeof(struct _fpstate) + size_extended;
#ifdef CONFIG_IA32_EMULATION
sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
#endif
memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
if (config_enabled(CONFIG_X86_32))
size += fsave_header_size;
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = sig_xstate_size;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xstate_bv = pcntxt_mask;
fx_sw_reserved.xstate_size = xstate_size;
#ifdef CONFIG_IA32_EMULATION
memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
#endif
}
#ifdef CONFIG_X86_64
unsigned int sig_xstate_size = sizeof(struct _fpstate);
#endif
if (config_enabled(CONFIG_IA32_EMULATION)) {
fx_sw_reserved_ia32 = fx_sw_reserved;
fx_sw_reserved_ia32.extended_size += fsave_header_size;
}
}
/*
* Enable the extended processor state save/restore feature
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void)
/*
* setup the xstate image representing the init state
*/
static void __init setup_xstate_init(void)
static void __init setup_init_fpu_buf(void)
{
setup_xstate_features();
/*
* Setup init_xstate_buf to represent the init state of
* all the features managed by the xsave
*/
init_xstate_buf = alloc_bootmem_align(xstate_size,
__alignof__(struct xsave_struct));
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
fx_finit(&init_xstate_buf->i387);
if (!cpu_has_xsave)
return;
setup_xstate_features();
clts();
/*
* Init all the features state with header_bv being 0x0
*/
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void)
* of any feature which is not represented by all zero's.
*/
xsave_state(init_xstate_buf, -1);
stts();
}
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
static int __init eager_fpu_setup(char *s)
{
if (!strcmp(s, "on"))
eagerfpu = ENABLE;
else if (!strcmp(s, "off"))
eagerfpu = DISABLE;
else if (!strcmp(s, "auto"))
eagerfpu = AUTO;
return 1;
}
__setup("eagerfpu=", eager_fpu_setup);
/*
* Enable and initialize the xsave feature.
*/
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void)
update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_init();
/* Auto enable eagerfpu for xsaveopt */
if (cpu_has_xsaveopt && eagerfpu != DISABLE)
eagerfpu = ENABLE;
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
pcntxt_mask, xstate_size);
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void)
next_func = xstate_enable;
this_func();
}
static inline void __init eager_fpu_init_bp(void)
{
current->thread.fpu.state =
alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
if (!init_xstate_buf)
setup_init_fpu_buf();
}
void __cpuinit eager_fpu_init(void)
{
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
clear_used_math();
current_thread_info()->status = 0;
if (eagerfpu == ENABLE)
setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
if (!cpu_has_eager_fpu) {
stts();
return;
}
if (boot_func) {
boot_func();
boot_func = NULL;
}
/*
* This is same as math_state_restore(). But use_xsave() is
* not yet patched to use math_state_restore().
*/
init_fpu(current);
__thread_fpu_begin(current);
if (cpu_has_xsave)
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
}