Merge commit '5bc66170dc486556a1e36fd384463536573f4b82' into x86/urgent
From Borislav Petkov <bp@amd64.org>: Below is a RAS fix which reverts the addition of a sysfs attribute which we agreed is not needed, post-factum. And this should go in now because that sysfs attribute is going to end up in 3.7 otherwise and thus exposed to userspace; removing it then would be a lot harder. This is done as a merge rather than a simple patch/cherry-pick since the baseline for this patch was not in the previous x86/urgent. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
|
||||
endif
|
||||
|
@@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has_invlpg)
|
||||
return;
|
||||
|
||||
tlb_flushall_shift = 5;
|
||||
|
||||
if (c->x86 <= 0x11)
|
||||
tlb_flushall_shift = 4;
|
||||
}
|
||||
|
||||
static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 ebx, eax, ecx, edx;
|
||||
u16 mask = 0xfff;
|
||||
|
||||
if (c->x86 < 0xf)
|
||||
return;
|
||||
|
||||
if (c->extended_cpuid_level < 0x80000006)
|
||||
return;
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
|
||||
tlb_lli_4k[ENTRIES] = ebx & mask;
|
||||
|
||||
/*
|
||||
* K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
|
||||
* characteristics from the CPUID function 0x80000005 instead.
|
||||
*/
|
||||
if (c->x86 == 0xf) {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
mask = 0xff;
|
||||
}
|
||||
|
||||
/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!((eax >> 16) & mask)) {
|
||||
u32 a, b, c, d;
|
||||
|
||||
cpuid(0x80000005, &a, &b, &c, &d);
|
||||
tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
|
||||
} else {
|
||||
tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
|
||||
}
|
||||
|
||||
/* a 4M entry uses two 2M entries */
|
||||
tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
|
||||
|
||||
/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
|
||||
if (!(eax & mask)) {
|
||||
/* Erratum 658 */
|
||||
if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
|
||||
tlb_lli_2m[ENTRIES] = 1024;
|
||||
} else {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
tlb_lli_2m[ENTRIES] = eax & 0xff;
|
||||
}
|
||||
} else
|
||||
tlb_lli_2m[ENTRIES] = eax & mask;
|
||||
|
||||
tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
|
||||
|
||||
cpu_set_tlb_flushall_shift(c);
|
||||
}
|
||||
|
||||
static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
|
||||
.c_vendor = "AMD",
|
||||
.c_ident = { "AuthenticAMD" },
|
||||
@@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
|
||||
.c_size_cache = amd_size_cache,
|
||||
#endif
|
||||
.c_early_init = early_init_amd,
|
||||
.c_detect_tlb = cpu_detect_tlb_amd,
|
||||
.c_bsp_init = bsp_init_amd,
|
||||
.c_init = init_amd,
|
||||
.c_x86_vendor = X86_VENDOR_AMD,
|
||||
|
@@ -489,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
|
||||
|
||||
printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
|
||||
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
|
||||
"tlb_flushall_shift is 0x%x\n",
|
||||
"tlb_flushall_shift: %d\n",
|
||||
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
|
||||
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
|
||||
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
|
||||
@@ -955,8 +955,7 @@ void __init identify_boot_cpu(void)
|
||||
#else
|
||||
vgetcpu_set_mode();
|
||||
#endif
|
||||
if (boot_cpu_data.cpuid_level >= 2)
|
||||
cpu_detect_tlb(&boot_cpu_data);
|
||||
cpu_detect_tlb(&boot_cpu_data);
|
||||
}
|
||||
|
||||
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
@@ -1036,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
||||
printk(KERN_CONT "%s ", vendor);
|
||||
|
||||
if (c->x86_model_id[0])
|
||||
printk(KERN_CONT "%s", c->x86_model_id);
|
||||
printk(KERN_CONT "%s", strim(c->x86_model_id));
|
||||
else
|
||||
printk(KERN_CONT "%d86", c->x86);
|
||||
|
||||
printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
|
||||
|
||||
if (c->x86_mask || c->cpuid_level >= 0)
|
||||
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
||||
printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
|
||||
else
|
||||
printk(KERN_CONT "\n");
|
||||
printk(KERN_CONT ")\n");
|
||||
|
||||
print_cpu_msr(c);
|
||||
}
|
||||
@@ -1130,8 +1131,6 @@ void syscall_init(void)
|
||||
X86_EFLAGS_IOPL|X86_EFLAGS_AC);
|
||||
}
|
||||
|
||||
unsigned long kernel_eflags;
|
||||
|
||||
/*
|
||||
* Copies of the original ist values from the tss are only accessed during
|
||||
* debugging, no special alignment required.
|
||||
@@ -1312,8 +1311,6 @@ void __cpuinit cpu_init(void)
|
||||
|
||||
fpu_init();
|
||||
|
||||
raw_local_save_flags(kernel_eflags);
|
||||
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
}
|
||||
|
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
|
||||
int i, j, n;
|
||||
unsigned int regs[4];
|
||||
unsigned char *desc = (unsigned char *)regs;
|
||||
|
||||
if (c->cpuid_level < 2)
|
||||
return;
|
||||
|
||||
/* Number of times to iterate */
|
||||
n = cpuid_eax(2) & 0xFF;
|
||||
|
||||
|
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
}
|
||||
|
||||
static cpumask_var_t mce_inject_cpumask;
|
||||
static DEFINE_MUTEX(mce_inject_mutex);
|
||||
|
||||
static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
|
||||
put_online_cpus();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
preempt_disable();
|
||||
raise_local();
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
*/
|
||||
schedule_timeout(2);
|
||||
|
||||
mutex_lock(&mce_inject_mutex);
|
||||
raise_mce(&m);
|
||||
mutex_unlock(&mce_inject_mutex);
|
||||
return usize;
|
||||
}
|
||||
|
||||
|
@@ -28,6 +28,18 @@ extern int mce_ser;
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval);
|
||||
void mce_intel_cmci_poll(void);
|
||||
void mce_intel_hcpu_update(unsigned long cpu);
|
||||
#else
|
||||
# define mce_intel_adjust_timer mce_adjust_timer_default
|
||||
static inline void mce_intel_cmci_poll(void) { }
|
||||
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
|
||||
#endif
|
||||
|
||||
void mce_timer_kick(unsigned long interval);
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
int apei_write_mce(struct mce *m);
|
||||
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
|
||||
|
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
|
||||
int mce_cmci_disabled __read_mostly;
|
||||
int mce_ignore_ce __read_mostly;
|
||||
int mce_ser __read_mostly;
|
||||
int mce_bios_cmci_threshold __read_mostly;
|
||||
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
|
||||
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
|
||||
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
|
||||
static DEFINE_PER_CPU(struct timer_list, mce_timer);
|
||||
|
||||
static unsigned long mce_adjust_timer_default(unsigned long interval)
|
||||
{
|
||||
return interval;
|
||||
}
|
||||
|
||||
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
|
||||
mce_adjust_timer_default;
|
||||
|
||||
static void mce_timer_fn(unsigned long data)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
|
||||
if (mce_available(__this_cpu_ptr(&cpu_info))) {
|
||||
machine_check_poll(MCP_TIMESTAMP,
|
||||
&__get_cpu_var(mce_poll_banks));
|
||||
mce_intel_cmci_poll();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
|
||||
* polling interval, otherwise increase the polling interval.
|
||||
*/
|
||||
iv = __this_cpu_read(mce_next_interval);
|
||||
if (mce_notify_irq())
|
||||
if (mce_notify_irq()) {
|
||||
iv = max(iv / 2, (unsigned long) HZ/100);
|
||||
else
|
||||
} else {
|
||||
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
|
||||
iv = mce_adjust_timer(iv);
|
||||
}
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
/* Might have become 0 after CMCI storm subsided */
|
||||
if (iv) {
|
||||
t->expires = jiffies + iv;
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
}
|
||||
|
||||
t->expires = jiffies + iv;
|
||||
add_timer_on(t, smp_processor_id());
|
||||
/*
|
||||
* Ensure that the timer is firing in @interval from now.
|
||||
*/
|
||||
void mce_timer_kick(unsigned long interval)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
unsigned long when = jiffies + interval;
|
||||
unsigned long iv = __this_cpu_read(mce_next_interval);
|
||||
|
||||
if (timer_pending(t)) {
|
||||
if (time_before(when, t->expires))
|
||||
mod_timer_pinned(t, when);
|
||||
} else {
|
||||
t->expires = round_jiffies(when);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
if (interval < iv)
|
||||
__this_cpu_write(mce_next_interval, interval);
|
||||
}
|
||||
|
||||
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
|
||||
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
mce_intel_feature_init(c);
|
||||
mce_adjust_timer = mce_intel_adjust_timer;
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
mce_amd_feature_init(c);
|
||||
@@ -1594,21 +1629,26 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
||||
{
|
||||
unsigned long iv = mce_adjust_timer(check_interval * HZ);
|
||||
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
|
||||
if (mce_ignore_ce || !iv)
|
||||
return;
|
||||
|
||||
t->expires = round_jiffies(jiffies + iv);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_timer(void)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
unsigned long iv = check_interval * HZ;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
setup_timer(t, mce_timer_fn, smp_processor_id());
|
||||
|
||||
if (mce_ignore_ce)
|
||||
return;
|
||||
|
||||
__this_cpu_write(mce_next_interval, iv);
|
||||
if (!iv)
|
||||
return;
|
||||
t->expires = round_jiffies(jiffies + iv);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
setup_timer(t, mce_timer_fn, cpu);
|
||||
mce_start_timer(cpu, t);
|
||||
}
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
|
||||
* check, or 0 to not wait
|
||||
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
||||
* mce=nobootlog Don't log MCEs from before booting.
|
||||
* mce=bios_cmci_threshold Don't program the CMCI threshold
|
||||
*/
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
|
||||
mce_ignore_ce = 1;
|
||||
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
||||
mce_bootlog = (str[0] == 'b');
|
||||
else if (!strcmp(str, "bios_cmci_threshold"))
|
||||
mce_bios_cmci_threshold = 1;
|
||||
else if (isdigit(str[0])) {
|
||||
get_option(&str, &tolerant);
|
||||
if (*str == ',') {
|
||||
@@ -2294,38 +2337,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct timer_list *t = &per_cpu(mce_timer, cpu);
|
||||
|
||||
switch (action) {
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
mce_device_create(cpu);
|
||||
if (threshold_cpu_callback)
|
||||
threshold_cpu_callback(action, cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
if (threshold_cpu_callback)
|
||||
threshold_cpu_callback(action, cpu);
|
||||
mce_device_remove(cpu);
|
||||
mce_intel_hcpu_update(cpu);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
del_timer_sync(t);
|
||||
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
|
||||
del_timer_sync(t);
|
||||
break;
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
if (!mce_ignore_ce && check_interval) {
|
||||
t->expires = round_jiffies(jiffies +
|
||||
per_cpu(mce_next_interval, cpu));
|
||||
add_timer_on(t, cpu);
|
||||
}
|
||||
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
|
||||
break;
|
||||
case CPU_POST_DEAD:
|
||||
/* intentionally ignoring frozen here */
|
||||
cmci_rediscover(cpu);
|
||||
mce_start_timer(cpu, t);
|
||||
break;
|
||||
}
|
||||
|
||||
if (action == CPU_POST_DEAD) {
|
||||
/* intentionally ignoring frozen here */
|
||||
cmci_rediscover(cpu);
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
@@ -15,6 +15,8 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
||||
*/
|
||||
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
|
||||
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_THRESHOLD 1
|
||||
#define CMCI_POLL_INTERVAL (30 * HZ)
|
||||
#define CMCI_STORM_INTERVAL (1 * HZ)
|
||||
#define CMCI_STORM_THRESHOLD 15
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
|
||||
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
|
||||
|
||||
enum {
|
||||
CMCI_STORM_NONE,
|
||||
CMCI_STORM_ACTIVE,
|
||||
CMCI_STORM_SUBSIDED,
|
||||
};
|
||||
|
||||
static atomic_t cmci_storm_on_cpus;
|
||||
|
||||
static int cmci_supported(int *banks)
|
||||
{
|
||||
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
|
||||
return !!(cap & MCG_CMCI_P);
|
||||
}
|
||||
|
||||
void mce_intel_cmci_poll(void)
|
||||
{
|
||||
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
|
||||
void mce_intel_hcpu_update(unsigned long cpu)
|
||||
{
|
||||
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
|
||||
atomic_dec(&cmci_storm_on_cpus);
|
||||
|
||||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
unsigned long mce_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (interval < CMCI_POLL_INTERVAL)
|
||||
return interval;
|
||||
|
||||
switch (__this_cpu_read(cmci_storm_state)) {
|
||||
case CMCI_STORM_ACTIVE:
|
||||
/*
|
||||
* We switch back to interrupt mode once the poll timer has
|
||||
* silenced itself. That means no events recorded and the
|
||||
* timer interval is back to our poll interval.
|
||||
*/
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
|
||||
r = atomic_sub_return(1, &cmci_storm_on_cpus);
|
||||
if (r == 0)
|
||||
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case CMCI_STORM_SUBSIDED:
|
||||
/*
|
||||
* We wait for all cpus to go back to SUBSIDED
|
||||
* state. When that happens we switch back to
|
||||
* interrupt mode.
|
||||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
default:
|
||||
/*
|
||||
* We have shiny weather. Let the poll do whatever it
|
||||
* thinks.
|
||||
*/
|
||||
return interval;
|
||||
}
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
unsigned long ts = __this_cpu_read(cmci_time_stamp);
|
||||
unsigned long now = jiffies;
|
||||
int r;
|
||||
|
||||
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
|
||||
return true;
|
||||
|
||||
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
|
||||
cnt++;
|
||||
} else {
|
||||
cnt = 1;
|
||||
__this_cpu_write(cmci_time_stamp, now);
|
||||
}
|
||||
__this_cpu_write(cmci_storm_cnt, cnt);
|
||||
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_clear();
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_POLL_INTERVAL);
|
||||
|
||||
if (r == 1)
|
||||
pr_notice("CMCI storm detected: switching to poll mode\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The interrupt handler. This is called on every event.
|
||||
* Just call the poller directly to log any events.
|
||||
@@ -61,33 +165,28 @@ static int cmci_supported(int *banks)
|
||||
*/
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
if (cmci_storm_detect())
|
||||
return;
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
static void print_update(char *type, int *hdr, int num)
|
||||
{
|
||||
if (*hdr == 0)
|
||||
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
|
||||
*hdr = 1;
|
||||
printk(KERN_CONT " %s:%d", type, num);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
||||
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
||||
* banks.
|
||||
*/
|
||||
static void cmci_discover(int banks, int boot)
|
||||
static void cmci_discover(int banks)
|
||||
{
|
||||
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
||||
unsigned long flags;
|
||||
int hdr = 0;
|
||||
int i;
|
||||
int bios_wrong_thresh = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
for (i = 0; i < banks; i++) {
|
||||
u64 val;
|
||||
int bios_zero_thresh = 0;
|
||||
|
||||
if (test_bit(i, owned))
|
||||
continue;
|
||||
@@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot)
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (test_and_clear_bit(i, owned) && !boot)
|
||||
print_update("SHD", &hdr, i);
|
||||
clear_bit(i, owned);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
|
||||
if (!mce_bios_cmci_threshold) {
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= CMCI_THRESHOLD;
|
||||
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
||||
/*
|
||||
* If bios_cmci_threshold boot option was specified
|
||||
* but the threshold is zero, we'll try to initialize
|
||||
* it to 1.
|
||||
*/
|
||||
bios_zero_thresh = 1;
|
||||
val |= CMCI_THRESHOLD;
|
||||
}
|
||||
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (!test_and_set_bit(i, owned) && !boot)
|
||||
print_update("CMCI", &hdr, i);
|
||||
set_bit(i, owned);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
/*
|
||||
* We are able to set thresholds for some banks that
|
||||
* had a threshold of 0. This means the BIOS has not
|
||||
* set the thresholds properly or does not work with
|
||||
* this boot option. Note down now and report later.
|
||||
*/
|
||||
if (mce_bios_cmci_threshold && bios_zero_thresh &&
|
||||
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
||||
bios_wrong_thresh = 1;
|
||||
} else {
|
||||
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
if (hdr)
|
||||
printk(KERN_CONT "\n");
|
||||
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
||||
pr_info_once(
|
||||
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -156,7 +278,7 @@ void cmci_clear(void)
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
@@ -186,7 +308,7 @@ void cmci_rediscover(int dying)
|
||||
continue;
|
||||
/* Recheck banks in case CPUs don't all have the same */
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, old);
|
||||
@@ -200,7 +322,7 @@ void cmci_reenable(void)
|
||||
{
|
||||
int banks;
|
||||
if (cmci_supported(&banks))
|
||||
cmci_discover(banks, 0);
|
||||
cmci_discover(banks);
|
||||
}
|
||||
|
||||
static void intel_init_cmci(void)
|
||||
@@ -211,7 +333,7 @@ static void intel_init_cmci(void)
|
||||
return;
|
||||
|
||||
mce_threshold_vector = intel_threshold_interrupt;
|
||||
cmci_discover(banks, 1);
|
||||
cmci_discover(banks);
|
||||
/*
|
||||
* For CPU #0 this runs with still disabled APIC, but that's
|
||||
* ok because only the vector is set up. We still do another
|
||||
|
@@ -8,7 +8,10 @@
|
||||
open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
|
||||
open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
|
||||
|
||||
print OUT "#include <asm/cpufeature.h>\n\n";
|
||||
print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
|
||||
print OUT "#include <asm/cpufeature.h>\n";
|
||||
print OUT "#endif\n";
|
||||
print OUT "\n";
|
||||
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
|
||||
|
||||
%features = ();
|
||||
|
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_snb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
@@ -624,6 +626,8 @@ int p4_pmu_init(void);
|
||||
|
||||
int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
|
||||
};
|
||||
|
||||
struct perf_ibs {
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
u64 (*get_count)(u64 config);
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
|
||||
struct attribute **format_attrs;
|
||||
struct attribute_group format_group;
|
||||
const struct attribute_group *attr_groups[2];
|
||||
|
||||
u64 (*get_count)(u64 config);
|
||||
};
|
||||
|
||||
struct perf_ibs_data {
|
||||
@@ -209,6 +214,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const struct perf_event_attr ibs_notsupp = {
|
||||
.exclude_user = 1,
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
.exclude_idle = 1,
|
||||
.exclude_host = 1,
|
||||
.exclude_guest = 1,
|
||||
};
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
@@ -229,6 +243,9 @@ static int perf_ibs_init(struct perf_event *event)
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
|
||||
return -EINVAL;
|
||||
|
||||
if (config & ~perf_ibs->config_mask)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -434,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
|
||||
|
||||
static void perf_ibs_read(struct perf_event *event) { }
|
||||
|
||||
PMU_FORMAT_ATTR(rand_en, "config:57");
|
||||
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
|
||||
|
||||
static struct attribute *ibs_fetch_format_attrs[] = {
|
||||
&format_attr_rand_en.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ibs_op_format_attrs[] = {
|
||||
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
@@ -453,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
|
||||
.max_period = IBS_FETCH_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
|
||||
.format_attrs = ibs_fetch_format_attrs,
|
||||
|
||||
.get_count = get_ibs_fetch_count,
|
||||
};
|
||||
@@ -476,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
|
||||
.max_period = IBS_OP_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
|
||||
.format_attrs = ibs_op_format_attrs,
|
||||
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
@@ -585,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
|
||||
perf_ibs->pcpu = pcpu;
|
||||
|
||||
/* register attributes */
|
||||
if (perf_ibs->format_attrs[0]) {
|
||||
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
|
||||
perf_ibs->format_group.name = "format";
|
||||
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
|
||||
|
||||
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
|
||||
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
|
||||
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
|
||||
}
|
||||
|
||||
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
|
||||
if (ret) {
|
||||
perf_ibs->pcpu = NULL;
|
||||
@@ -596,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
|
||||
static __init int perf_event_ibs_init(void)
|
||||
{
|
||||
struct attribute **attr = ibs_op_format_attrs;
|
||||
|
||||
if (!ibs_caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
if (ibs_caps & IBS_CAPS_OPCNT)
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNT) {
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
*attr++ = &format_attr_cnt_ctl.attr;
|
||||
}
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
|
||||
|
||||
|
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x6:
|
||||
return p6_pmu_init();
|
||||
case 0xb:
|
||||
return knc_pmu_init();
|
||||
case 0xf:
|
||||
return p4_pmu_init();
|
||||
}
|
||||
@@ -2008,6 +2010,7 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case 28: /* Atom */
|
||||
case 54: /* Cedariew */
|
||||
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
@@ -2047,7 +2050,6 @@ __init int intel_pmu_init(void)
|
||||
case 42: /* SandyBridge */
|
||||
case 45: /* SandyBridge, "Romely-EP" */
|
||||
x86_add_quirk(intel_sandybridge_quirk);
|
||||
case 58: /* IvyBridge */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
|
||||
@@ -2072,6 +2074,29 @@ __init int intel_pmu_init(void)
|
||||
|
||||
pr_cont("SandyBridge events, ");
|
||||
break;
|
||||
case 58: /* IvyBridge */
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_snb();
|
||||
|
||||
x86_pmu.event_constraints = intel_snb_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
|
||||
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
||||
x86_pmu.extra_regs = intel_snb_extra_regs;
|
||||
/* all extra regs are per-cpu when HT is on */
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
|
||||
pr_cont("IvyBridge events, ");
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
|
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
|
||||
* to have an operational LBR which can freeze
|
||||
* on PMU interrupt
|
||||
*/
|
||||
if (boot_cpu_data.x86_mask < 10) {
|
||||
if (boot_cpu_data.x86_model == 28
|
||||
&& boot_cpu_data.x86_mask < 10) {
|
||||
pr_cont("LBR disabled due to erratum");
|
||||
return;
|
||||
}
|
||||
|
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
}
|
||||
}
|
||||
|
||||
static struct uncore_event_desc snb_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct attribute *snb_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
|
||||
.constraints = snb_uncore_cbox_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
.event_descs = snb_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
@@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
|
||||
static struct intel_uncore_box *
|
||||
uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
|
||||
{
|
||||
static struct intel_uncore_box *box;
|
||||
struct intel_uncore_box *box;
|
||||
|
||||
box = *per_cpu_ptr(pmu->box, cpu);
|
||||
if (box)
|
||||
@@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t uncore_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
|
||||
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *uncore_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group uncore_pmu_attr_group = {
|
||||
.attrs = uncore_pmu_attrs,
|
||||
};
|
||||
|
||||
static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
int ret;
|
||||
@@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
|
||||
free_percpu(type->pmus[i].box);
|
||||
kfree(type->pmus);
|
||||
type->pmus = NULL;
|
||||
kfree(type->attr_groups[1]);
|
||||
type->attr_groups[1] = NULL;
|
||||
kfree(type->events_group);
|
||||
type->events_group = NULL;
|
||||
}
|
||||
|
||||
static void __init uncore_types_exit(struct intel_uncore_type **types)
|
||||
@@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &type->event_descs[j].attr.attr;
|
||||
|
||||
type->attr_groups[1] = events_group;
|
||||
type->events_group = events_group;
|
||||
}
|
||||
|
||||
type->pmu_group = &uncore_pmu_attr_group;
|
||||
type->pmus = pmus;
|
||||
return 0;
|
||||
fail:
|
||||
|
@@ -369,10 +369,12 @@ struct intel_uncore_type {
|
||||
struct intel_uncore_pmu *pmus;
|
||||
struct intel_uncore_ops *ops;
|
||||
struct uncore_event_desc *event_descs;
|
||||
const struct attribute_group *attr_groups[3];
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define pmu_group attr_groups[0]
|
||||
#define format_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
|
||||
struct intel_uncore_ops {
|
||||
void (*init_box)(struct intel_uncore_box *);
|
||||
|
248
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
248
arch/x86/kernel/cpu/perf_event_knc.c
Normal file
@@ -0,0 +1,248 @@
|
||||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static const u64 knc_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
|
||||
};
|
||||
|
||||
static __initconst u64 knc_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
/* On Xeon Phi event "0" is a valid DATA_READ */
|
||||
/* (L1 Data Cache Reads) Instruction. */
|
||||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
|
||||
/* bit will always be set in x86_pmu_hw_config(). */
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
|
||||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
|
||||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
/* see note on L1 OP_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
|
||||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static u64 knc_pmu_event_map(int hw_event)
|
||||
{
|
||||
return knc_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static struct event_constraint knc_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
|
||||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
|
||||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
|
||||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
|
||||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
|
||||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
|
||||
|
||||
#define KNC_ENABLE_COUNTER0 0x00000001
|
||||
#define KNC_ENABLE_COUNTER1 0x00000002
|
||||
|
||||
static void knc_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_all(int added)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
knc_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
if (cpuc->enabled)
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
if (cpuc->enabled)
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_knc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __initconst struct x86_pmu knc_pmu = {
|
||||
.name = "knc",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = knc_pmu_disable_all,
|
||||
.enable_all = knc_pmu_enable_all,
|
||||
.enable = knc_pmu_enable_event,
|
||||
.disable = knc_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_KNC_EVNTSEL0,
|
||||
.perfctr = MSR_KNC_PERFCTR0,
|
||||
.event_map = knc_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
/* in theory 40 bits, early silicon is buggy though */
|
||||
.cntval_bits = 32,
|
||||
.cntval_mask = (1ULL << 32) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = knc_event_constraints,
|
||||
.format_attrs = intel_knc_formats_attr,
|
||||
};
|
||||
|
||||
__init int knc_pmu_init(void)
|
||||
{
|
||||
x86_pmu = knc_pmu;
|
||||
|
||||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_PERFCTR0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_PERFCTR0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BPU_PERFCTR0;
|
||||
}
|
||||
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return msr - MSR_P6_EVNTSEL0;
|
||||
case 11:
|
||||
return msr - MSR_KNC_EVNTSEL0;
|
||||
case 15:
|
||||
return msr - MSR_P4_BSU_ESCR0;
|
||||
}
|
||||
|
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
|
||||
static void *c_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
if (*pos == 0) /* just in case, cpu 0 is not the first */
|
||||
*pos = cpumask_first(cpu_online_mask);
|
||||
else
|
||||
*pos = cpumask_next(*pos - 1, cpu_online_mask);
|
||||
*pos = cpumask_next(*pos - 1, cpu_online_mask);
|
||||
if ((*pos) < nr_cpu_ids)
|
||||
return &cpu_data(*pos);
|
||||
return NULL;
|
||||
|
Reference in New Issue
Block a user