Merge branch 'linus' into x86/urgent, to be able to merge a dependent fix

Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
Ingo Molnar
2015-09-05 09:00:30 +02:00
当前提交 95cd2ea7d5
修改 3401 个文件,包含 144883 行新增71332 行删除

查看文件

@@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
obj-$(CONFIG_COMPAT) += signal_compat.o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
obj-y += time.o ioport.o dumpstack.o nmi.o
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
@@ -107,8 +109,6 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o
obj-$(CONFIG_PMC_ATOM) += pmc_atom.o
###
# 64 bit specific files

查看文件

@@ -710,7 +710,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
#endif
}
static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
{
int cpu;
@@ -726,12 +726,6 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
*pcpu = cpu;
return 0;
}
/* wrapper to silence section mismatch warning */
int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
{
return _acpi_map_lsapic(handle, physid, pcpu);
}
EXPORT_SYMBOL(acpi_map_cpu);
int acpi_unmap_cpu(int cpu)

查看文件

@@ -263,7 +263,7 @@ static int apbt_clocksource_register(void)
/* Verify whether apbt counter works */
t1 = dw_apb_clocksource_read(clocksource_apbt);
rdtscll(start);
start = rdtsc();
/*
* We don't know the TSC frequency yet, but waiting for
@@ -273,7 +273,7 @@ static int apbt_clocksource_register(void)
*/
do {
rep_nop();
rdtscll(now);
now = rdtsc();
} while ((now - start) < 200000UL);
/* APBT is the only always on clocksource, it has to work! */
@@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void)
old = dw_apb_clocksource_read(clocksource_apbt);
old += loop;
t1 = __native_read_tsc();
t1 = rdtsc();
do {
new = dw_apb_clocksource_read(clocksource_apbt);
} while (new < old);
t2 = __native_read_tsc();
t2 = rdtsc();
shift = 5;
if (unlikely(loop >> shift == 0)) {

查看文件

@@ -457,45 +457,45 @@ static int lapic_next_deadline(unsigned long delta,
{
u64 tsc;
rdtscll(tsc);
tsc = rdtsc();
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
}
/*
* Setup the lapic timer in periodic or oneshot mode
*/
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt)
static int lapic_timer_shutdown(struct clock_event_device *evt)
{
unsigned long flags;
unsigned int v;
/* Lapic used as dummy for broadcast ? */
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return;
return 0;
local_irq_save(flags);
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
apic_write(APIC_TMICT, 0);
return 0;
}
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
case CLOCK_EVT_MODE_ONESHOT:
__setup_APIC_LVTT(lapic_timer_frequency,
mode != CLOCK_EVT_MODE_PERIODIC, 1);
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
apic_write(APIC_TMICT, 0);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
break;
}
static inline int
lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
{
/* Lapic used as dummy for broadcast ? */
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
local_irq_restore(flags);
__setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
return 0;
}
static int lapic_timer_set_periodic(struct clock_event_device *evt)
{
return lapic_timer_set_periodic_oneshot(evt, false);
}
static int lapic_timer_set_oneshot(struct clock_event_device *evt)
{
return lapic_timer_set_periodic_oneshot(evt, true);
}
/*
@@ -513,15 +513,18 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
* The local apic timer can be used for any function which is CPU local.
*/
static struct clock_event_device lapic_clockevent = {
.name = "lapic",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
.shift = 32,
.set_mode = lapic_timer_setup,
.set_next_event = lapic_next_event,
.broadcast = lapic_timer_broadcast,
.rating = 100,
.irq = -1,
.name = "lapic",
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
| CLOCK_EVT_FEAT_DUMMY,
.shift = 32,
.set_state_shutdown = lapic_timer_shutdown,
.set_state_periodic = lapic_timer_set_periodic,
.set_state_oneshot = lapic_timer_set_oneshot,
.set_next_event = lapic_next_event,
.broadcast = lapic_timer_broadcast,
.rating = 100,
.irq = -1,
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
@@ -592,7 +595,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
unsigned long pm = acpi_pm_read_early();
if (cpu_has_tsc)
rdtscll(tsc);
tsc = rdtsc();
switch (lapic_cal_loops++) {
case 0:
@@ -778,7 +781,7 @@ static int __init calibrate_APIC_clock(void)
* Setup the apic timer manually
*/
levt->event_handler = lapic_cal_handler;
lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
lapic_timer_set_periodic(levt);
lapic_cal_loops = -1;
/* Let the interrupts run */
@@ -788,7 +791,8 @@ static int __init calibrate_APIC_clock(void)
cpu_relax();
/* Stop the lapic timer */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
local_irq_disable();
lapic_timer_shutdown(levt);
/* Jiffies delta */
deltaj = lapic_cal_j2 - lapic_cal_j1;
@@ -799,8 +803,8 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
else
levt->features |= CLOCK_EVT_FEAT_DUMMY;
} else
local_irq_enable();
}
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
pr_warning("APIC timer disabled due to verification failure\n");
@@ -878,7 +882,7 @@ static void local_apic_timer_interrupt(void)
if (!evt->event_handler) {
pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
/* Switch it off */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
lapic_timer_shutdown(evt);
return;
}
@@ -1209,7 +1213,7 @@ void setup_local_APIC(void)
long long max_loops = cpu_khz ? cpu_khz : 1000000;
if (cpu_has_tsc)
rdtscll(tsc);
tsc = rdtsc();
if (disable_apic) {
disable_ioapic_support();
@@ -1293,7 +1297,7 @@ void setup_local_APIC(void)
}
if (queued) {
if (cpu_has_tsc && cpu_khz) {
rdtscll(ntsc);
ntsc = rdtsc();
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
max_loops--;

查看文件

@@ -191,7 +191,6 @@ static struct apic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
.wait_for_init_deassert = false,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
@@ -299,7 +298,6 @@ static struct apic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
.wait_for_init_deassert = false,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,

查看文件

@@ -152,7 +152,6 @@ struct apic apic_noop = {
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
.wait_for_init_deassert = false,
.inquire_remote_apic = NULL,
.read = noop_apic_read,

查看文件

@@ -92,7 +92,6 @@ static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
atomic_set(&init_deasserted, 1);
return 0;
}
@@ -235,7 +234,6 @@ static const struct apic apic_numachip __refconst = {
.send_IPI_self = numachip_send_IPI_self,
.wakeup_secondary_cpu = numachip_wakeup_secondary,
.wait_for_init_deassert = false,
.inquire_remote_apic = NULL, /* REMRD not supported */
.read = native_apic_mem_read,

查看文件

@@ -186,7 +186,6 @@ static struct apic apic_bigsmp = {
.send_IPI_all = bigsmp_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.wait_for_init_deassert = true,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,

查看文件

@@ -2541,7 +2541,7 @@ void __init setup_ioapic_dest(void)
* Honour affinities which have been set in early boot
*/
if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
mask = idata->affinity;
mask = irq_data_get_affinity_mask(idata);
else
mask = apic->target_cpus();

查看文件

@@ -264,7 +264,7 @@ static inline int hpet_dev_id(struct irq_domain *domain)
static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
hpet_msi_write(data->handler_data, msg);
hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
}
static struct irq_chip hpet_msi_controller = {

查看文件

@@ -111,7 +111,6 @@ static struct apic apic_default = {
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.wait_for_init_deassert = true,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,

查看文件

@@ -169,8 +169,7 @@ next:
goto next;
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
if (per_cpu(vector_irq, new_cpu)[vector] >
VECTOR_UNDEFINED)
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
goto next;
}
/* Found one! */
@@ -182,7 +181,7 @@ next:
cpumask_intersects(d->old_domain, cpu_online_mask);
}
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
d->cfg.vector = vector;
cpumask_copy(d->domain, vector_cpumask);
err = 0;
@@ -224,15 +223,16 @@ static int assign_irq_vector_policy(int irq, int node,
static void clear_irq_vector(int irq, struct apic_chip_data *data)
{
int cpu, vector;
struct irq_desc *desc;
unsigned long flags;
int cpu, vector;
raw_spin_lock_irqsave(&vector_lock, flags);
BUG_ON(!data->cfg.vector);
vector = data->cfg.vector;
for_each_cpu_and(cpu, data->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
data->cfg.vector = 0;
cpumask_clear(data->domain);
@@ -242,12 +242,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
return;
}
desc = irq_to_desc(irq);
for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
if (per_cpu(vector_irq, cpu)[vector] != desc)
continue;
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
break;
}
}
@@ -296,7 +297,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
struct irq_alloc_info *info = arg;
struct apic_chip_data *data;
struct irq_data *irq_data;
int i, err;
int i, err, node;
if (disable_apic)
return -ENXIO;
@@ -308,12 +309,13 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
BUG_ON(!irq_data);
node = irq_data_get_node(irq_data);
#ifdef CONFIG_X86_IO_APIC
if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
data = legacy_irq_data[virq + i];
else
#endif
data = alloc_apic_chip_data(irq_data->node);
data = alloc_apic_chip_data(node);
if (!data) {
err = -ENOMEM;
goto error;
@@ -322,8 +324,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irq_data->chip = &lapic_controller;
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
err = assign_irq_vector_policy(virq + i, irq_data->node, data,
info);
err = assign_irq_vector_policy(virq + i, node, data, info);
if (err)
goto error;
}
@@ -403,32 +404,32 @@ int __init arch_early_irq_init(void)
return arch_early_ioapic_init();
}
/* Initialize vector_irq on a new cpu */
static void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
int irq, vector;
struct apic_chip_data *data;
struct irq_desc *desc;
int irq, vector;
/* Mark the inuse vectors */
for_each_active_irq(irq) {
data = apic_chip_data(irq_get_irq_data(irq));
if (!data)
continue;
for_each_irq_desc(irq, desc) {
struct irq_data *idata = irq_desc_get_irq_data(desc);
if (!cpumask_test_cpu(cpu, data->domain))
data = apic_chip_data(idata);
if (!data || !cpumask_test_cpu(cpu, data->domain))
continue;
vector = data->cfg.vector;
per_cpu(vector_irq, cpu)[vector] = irq;
per_cpu(vector_irq, cpu)[vector] = desc;
}
/* Mark the free vectors */
for (vector = 0; vector < NR_VECTORS; ++vector) {
irq = per_cpu(vector_irq, cpu)[vector];
if (irq <= VECTOR_UNDEFINED)
desc = per_cpu(vector_irq, cpu)[vector];
if (IS_ERR_OR_NULL(desc))
continue;
data = apic_chip_data(irq_get_irq_data(irq));
data = apic_chip_data(irq_desc_get_irq_data(desc));
if (!cpumask_test_cpu(cpu, data->domain))
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
}
}
@@ -448,7 +449,7 @@ void setup_vector_irq(int cpu)
* legacy vector to irq mapping:
*/
for (irq = 0; irq < nr_legacy_irqs(); irq++)
per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq;
per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
__setup_vector_irq(cpu);
}
@@ -490,7 +491,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
if (err) {
struct irq_data *top = irq_get_irq_data(irq);
if (assign_irq_vector(irq, data, top->affinity))
if (assign_irq_vector(irq, data,
irq_data_get_affinity_mask(top)))
pr_err("Failed to recover vector for irq %d\n", irq);
return err;
}
@@ -538,27 +540,30 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
entering_ack_irq();
/* Prevent vectors vanishing under us */
raw_spin_lock(&vector_lock);
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
int irq;
unsigned int irr;
struct irq_desc *desc;
struct apic_chip_data *data;
struct irq_desc *desc;
unsigned int irr;
irq = __this_cpu_read(vector_irq[vector]);
if (irq <= VECTOR_UNDEFINED)
retry:
desc = __this_cpu_read(vector_irq[vector]);
if (IS_ERR_OR_NULL(desc))
continue;
desc = irq_to_desc(irq);
if (!desc)
continue;
if (!raw_spin_trylock(&desc->lock)) {
raw_spin_unlock(&vector_lock);
cpu_relax();
raw_spin_lock(&vector_lock);
goto retry;
}
data = apic_chip_data(&desc->irq_data);
data = apic_chip_data(irq_desc_get_irq_data(desc));
if (!data)
continue;
raw_spin_lock(&desc->lock);
goto unlock;
/*
* Check if the irq migration is in progress. If so, we
@@ -583,11 +588,13 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
unlock:
raw_spin_unlock(&desc->lock);
}
raw_spin_unlock(&vector_lock);
exiting_irq();
}

查看文件

@@ -182,7 +182,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
return notifier_from_errno(err);
}
static struct notifier_block __refdata x2apic_cpu_notifier = {
static struct notifier_block x2apic_cpu_notifier = {
.notifier_call = update_clusterinfo,
};
@@ -272,7 +272,6 @@ static struct apic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
.wait_for_init_deassert = false,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,

查看文件

@@ -128,7 +128,6 @@ static struct apic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
.wait_for_init_deassert = false,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,

查看文件

@@ -248,7 +248,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
APIC_DM_STARTUP;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
atomic_set(&init_deasserted, 1);
return 0;
}
@@ -414,7 +413,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
.send_IPI_self = uv_send_IPI_self,
.wakeup_secondary_cpu = uv_wakeup_secondary,
.wait_for_init_deassert = false,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,

查看文件

@@ -919,7 +919,7 @@ recalc:
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
idle_percentage = stime - last_stime;
idle_percentage = cputime_to_jiffies(stime - last_stime);
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);

查看文件

@@ -1,4 +1,4 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
@@ -163,6 +163,5 @@ static int start_periodic_check_for_corruption(void)
schedule_delayed_work(&bios_check_work, 0);
return 0;
}
module_init(start_periodic_check_for_corruption);
device_initcall(start_periodic_check_for_corruption);

查看文件

@@ -46,6 +46,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
perf_event_intel_uncore_snb.o \
perf_event_intel_uncore_snbep.o \
perf_event_intel_uncore_nhmex.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
endif

查看文件

@@ -11,6 +11,7 @@
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
@@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
const int K6_BUG_LOOP = 1000000;
int n;
void (*f_vide)(void);
unsigned long d, d2;
u64 d, d2;
printk(KERN_INFO "AMD K6 stepping B detected - ");
@@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
n = K6_BUG_LOOP;
f_vide = vide;
rdtscl(d);
d = rdtsc();
while (n--)
f_vide();
rdtscl(d2);
d2 = rdtsc();
d = d2-d;
if (d > 20*K6_BUG_LOOP)
@@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
/* A random value per boot for bit slice [12:upper_bit) */
va_align.bits = get_random_int() & va_align.mask;
}
if (cpu_has(c, X86_FEATURE_MWAITX))
use_mwaitx_delay();
}
static void early_init_amd(struct cpuinfo_x86 *c)

查看文件

@@ -13,6 +13,7 @@
#include <linux/kgdb.h>
#include <linux/smp.h>
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
@@ -1185,10 +1186,10 @@ void syscall_init(void)
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
@@ -1199,7 +1200,7 @@ void syscall_init(void)
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, ignore_sysret);
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
@@ -1488,3 +1489,20 @@ inline bool __static_cpu_has_safe(u16 bit)
return boot_cpu_has(bit);
}
EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
this_cpu->c_bsp_resume(&boot_cpu_data);
}
static struct syscore_ops cpu_syscore_ops = {
.resume = bsp_resume,
};
static int __init init_cpu_syscore(void)
{
register_syscore_ops(&cpu_syscore_ops);
return 0;
}
core_initcall(init_cpu_syscore);

查看文件

@@ -13,6 +13,7 @@ struct cpu_dev {
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
void (*c_bsp_resume)(struct cpuinfo_x86 *);
int c_x86_vendor;
#ifdef CONFIG_X86_32
/* Optional vendor specific routine to obtain the cache size. */

查看文件

@@ -371,6 +371,36 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
}
}
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
{
u64 epb;
/*
* Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
* (x86_energy_perf_policy(8) is available to change it at run-time.)
*/
if (!cpu_has(c, X86_FEATURE_EPB))
return;
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
static void intel_bsp_resume(struct cpuinfo_x86 *c)
{
/*
* MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
* so reinitialize it properly like during bootup:
*/
init_intel_energy_perf(c);
}
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
@@ -478,21 +508,7 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
/*
* Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not.
* x86_energy_perf_policy(8) is available to change it at run-time
*/
if (cpu_has(c, X86_FEATURE_EPB)) {
u64 epb;
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) {
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
}
init_intel_energy_perf(c);
}
#ifdef CONFIG_X86_32
@@ -747,6 +763,7 @@ static const struct cpu_dev intel_cpu_dev = {
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_init = init_intel,
.c_bsp_resume = intel_bsp_resume,
.c_x86_vendor = X86_VENDOR_INTEL,
};

查看文件

@@ -25,32 +25,11 @@
*/
#define TOPA_PMI_MARGIN 512
/*
* Table of Physical Addresses bits
*/
enum topa_sz {
TOPA_4K = 0,
TOPA_8K,
TOPA_16K,
TOPA_32K,
TOPA_64K,
TOPA_128K,
TOPA_256K,
TOPA_512K,
TOPA_1MB,
TOPA_2MB,
TOPA_4MB,
TOPA_8MB,
TOPA_16MB,
TOPA_32MB,
TOPA_64MB,
TOPA_128MB,
TOPA_SZ_END,
};
#define TOPA_SHIFT 12
static inline unsigned int sizes(enum topa_sz tsz)
static inline unsigned int sizes(unsigned int tsz)
{
return 1 << (tsz + 12);
return 1 << (tsz + TOPA_SHIFT);
};
struct topa_entry {
@@ -66,20 +45,26 @@ struct topa_entry {
u64 rsvd4 : 16;
};
#define TOPA_SHIFT 12
#define PT_CPUID_LEAVES 2
#define PT_CPUID_LEAVES 2
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
PT_CAP_cr3_filtering,
PT_CAP_psb_cyc,
PT_CAP_mtc,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
PT_CAP_payloads_lip,
PT_CAP_mtc_periods,
PT_CAP_cycle_thresholds,
PT_CAP_psb_periods,
};
struct pt_pmu {
struct pmu pmu;
u32 caps[4 * PT_CPUID_LEAVES];
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
};
/**

查看文件

@@ -1,4 +1,4 @@
obj-y = mce.o mce-severity.o
obj-y = mce.o mce-severity.o mce-genpool.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o

查看文件

@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
m.addr = mem_err->physical_addr;
mce_log(&m);
mce_notify_irq();
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

查看文件

@@ -0,0 +1,99 @@
/*
* MCE event pool management in MCE context
*
* Copyright (C) 2015 Intel Corp.
* Author: Chen, Gong <gong.chen@linux.intel.com>
*
* This file is licensed under GPLv2.
*/
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/genalloc.h>
#include <linux/llist.h>
#include "mce-internal.h"
/*
* printk() is not safe in MCE context. This is a lock-less memory allocator
* used to save error information organized in a lock-less list.
*
* This memory pool is only to be used to save MCE records in MCE context.
* MCE events are rare, so a fixed size memory pool should be enough. Use
* 2 pages to save MCE events for now (~80 MCE records at most).
*/
#define MCE_POOLSZ (2 * PAGE_SIZE)
static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
void mce_gen_pool_process(void)
{
struct llist_node *head;
struct mce_evt_llist *node;
struct mce *mce;
head = llist_del_all(&mce_event_llist);
if (!head)
return;
head = llist_reverse_order(head);
llist_for_each_entry(node, head, llnode) {
mce = &node->mce;
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
}
}
bool mce_gen_pool_empty(void)
{
return llist_empty(&mce_event_llist);
}
int mce_gen_pool_add(struct mce *mce)
{
struct mce_evt_llist *node;
if (!mce_evt_pool)
return -EINVAL;
node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
if (!node) {
pr_warn_ratelimited("MCE records pool full!\n");
return -ENOMEM;
}
memcpy(&node->mce, mce, sizeof(*mce));
llist_add(&node->llnode, &mce_event_llist);
return 0;
}
static int mce_gen_pool_create(void)
{
struct gen_pool *tmpp;
int ret = -ENOMEM;
tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
if (!tmpp)
goto out;
ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
if (ret) {
gen_pool_destroy(tmpp);
goto out;
}
mce_evt_pool = tmpp;
out:
return ret;
}
int mce_gen_pool_init(void)
{
/* Just init mce_gen_pool once. */
if (mce_evt_pool)
return 0;
return mce_gen_pool_create();
}

查看文件

@@ -13,6 +13,8 @@ enum severity_level {
MCE_PANIC_SEVERITY,
};
extern struct atomic_notifier_head x86_mce_decoder_chain;
#define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
@@ -24,6 +26,16 @@ struct mce_bank {
char attrname[ATTR_LEN]; /* attribute name */
};
struct mce_evt_llist {
struct llist_node llnode;
struct mce mce;
};
void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
return -EINVAL;
}
#endif
void mce_inject_log(struct mce *m);

查看文件

@@ -52,11 +52,11 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \
#define mce_log_get_idx_check(p) \
({ \
rcu_lockdep_assert(rcu_read_lock_sched_held() || \
lockdep_is_held(&mce_chrdev_read_mutex), \
"suspicious rcu_dereference_check_mce() usage"); \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
!lockdep_is_held(&mce_chrdev_read_mutex), \
"suspicious mce_log_get_idx_check() usage"); \
smp_load_acquire(&(p)); \
})
@@ -110,22 +110,24 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
*/
mce_banks_t mce_banks_ce_disabled;
static DEFINE_PER_CPU(struct work_struct, mce_work);
static struct work_struct mce_work;
static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
static int mce_usable_address(struct mce *m);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
rdtscll(m->tsc);
m->tsc = rdtsc();
/* We hope get_seconds stays lockless */
m->time = get_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
/* Emit the trace record: */
trace_mce_record(mce);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
mce->finished = 0;
wmb();
for (;;) {
entry = rcu_dereference_check_mce(mcelog.next);
entry = mce_log_get_idx_check(mcelog.next);
for (;;) {
/*
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
static void drain_mcelog_buffer(void)
void mce_inject_log(struct mce *m)
{
unsigned int next, i, prev = 0;
next = ACCESS_ONCE(mcelog.next);
do {
struct mce *m;
/* drain what was logged during boot */
for (i = prev; i < next; i++) {
unsigned long start = jiffies;
unsigned retries = 1;
m = &mcelog.entry[i];
while (!m->finished) {
if (time_after_eq(jiffies, start + 2*retries))
retries++;
cpu_relax();
if (!m->finished && retries >= 4) {
pr_err("skipping error being logged currently!\n");
break;
}
}
smp_rmb();
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
prev = next;
next = cmpxchg(&mcelog.next, prev, 0);
} while (next != prev);
mutex_lock(&mce_chrdev_read_mutex);
mce_log(m);
mutex_unlock(&mce_chrdev_read_mutex);
}
EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb;
void mce_register_decode_chain(struct notifier_block *nb)
{
/* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1;
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
drain_mcelog_buffer();
}
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
}
}
/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
*/
#define MCE_RING_SIZE 16 /* we use one entry less */
struct mce_ring {
unsigned short start;
unsigned short end;
unsigned long ring[MCE_RING_SIZE];
};
static DEFINE_PER_CPU(struct mce_ring, mce_ring);
/* Runs with CPU affinity in workqueue */
static int mce_ring_empty(void)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
return r->start == r->end;
}
static int mce_ring_get(unsigned long *pfn)
{
struct mce_ring *r;
int ret = 0;
*pfn = 0;
get_cpu();
r = this_cpu_ptr(&mce_ring);
if (r->start == r->end)
goto out;
*pfn = r->ring[r->start];
r->start = (r->start + 1) % MCE_RING_SIZE;
ret = 1;
out:
put_cpu();
return ret;
}
/* Always runs in MCE context with preempt off */
static int mce_ring_add(unsigned long pfn)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
unsigned next;
next = (r->end + 1) % MCE_RING_SIZE;
if (next == r->start)
return -1;
r->ring[r->end] = pfn;
wmb();
r->end = next;
return 0;
}
int mce_available(struct cpuinfo_x86 *c)
{
if (mca_cfg.disabled)
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
if (!mce_ring_empty())
schedule_work(this_cpu_ptr(&mce_work));
if (!mce_gen_pool_empty() && keventd_up())
schedule_work(&mce_work);
}
static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
static void mce_irq_work_cb(struct irq_work *entry)
{
mce_notify_irq();
@@ -551,9 +472,30 @@ static void mce_report_event(struct pt_regs *regs)
return;
}
irq_work_queue(this_cpu_ptr(&mce_irq_work));
irq_work_queue(&mce_irq_work);
}
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
unsigned long pfn;
if (!mce)
return NOTIFY_DONE;
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0);
}
return NOTIFY_OK;
}
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier,
.priority = INT_MAX,
};
/*
* Read ADDR and MISC registers.
*/
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
if (m.status & MCI_STATUS_ADDRV) {
mce_ring_add(m.addr >> PAGE_SHIFT);
mce_schedule_work();
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
if (!mce_gen_pool_add(&m))
mce_schedule_work();
}
}
@@ -1029,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
{
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
enum ctx_state prev_state;
int i;
int worst = 0;
int severity;
@@ -1055,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED;
int lmce = 0;
prev_state = ist_enter(regs);
ist_enter(regs);
this_cpu_inc(mce_exception_count);
@@ -1143,15 +1087,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
/* assuming valid severity level != 0 */
m.severity = severity;
m.usable_addr = mce_usable_address(&m);
mce_log(&m);
@@ -1227,7 +1165,7 @@ out:
local_irq_disable();
ist_end_non_atomic();
done:
ist_exit(regs, prev_state);
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1247,14 +1185,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
/*
* Action optional processing happens here (picking up
* from the list of faulting pages that do_machine_check()
* placed into the "ring").
* placed into the genpool).
*/
static void mce_process_work(struct work_struct *dummy)
{
unsigned long pfn;
while (mce_ring_get(&pfn))
memory_failure(pfn, MCE_VECTOR, 0);
mce_gen_pool_process();
}
#ifdef CONFIG_X86_MCE_INTEL
@@ -1678,6 +1613,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
}
}
static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_clear(c);
break;
default:
break;
}
}
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
@@ -1731,13 +1677,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
}
if (mce_gen_pool_init()) {
mca_cfg.disabled = true;
pr_emerg("Couldn't allocate MCE records pool!\n");
return;
}
machine_check_vector = do_machine_check;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
}
/*
* Called for each booted CPU to clear some machine checks opt-ins
*/
void mcheck_cpu_clear(struct cpuinfo_x86 *c)
{
if (mca_cfg.disabled)
return;
if (!mce_available(c))
return;
/*
* Possibly to clear general settings generic to x86
* __mcheck_cpu_clear_generic(c);
*/
__mcheck_cpu_clear_vendor(c);
}
/*
@@ -1784,7 +1753,7 @@ static void collect_tscs(void *data)
{
unsigned long *cpu_tsc = (unsigned long *)data;
rdtscll(cpu_tsc[smp_processor_id()]);
cpu_tsc[smp_processor_id()] = rdtsc();
}
static int mce_apei_read_done;
@@ -1850,7 +1819,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
goto out;
}
next = rcu_dereference_check_mce(mcelog.next);
next = mce_log_get_idx_check(mcelog.next);
/* Only supports full reads right now */
err = -EINVAL;
@@ -2056,8 +2025,12 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb);
mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work);
init_irq_work(&mce_irq_work, mce_irq_work_cb);
return 0;
}
@@ -2591,5 +2564,20 @@ static int __init mcheck_debugfs_init(void)
return 0;
}
late_initcall(mcheck_debugfs_init);
#else
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
#endif
static int __init mcheck_late_init(void)
{
mcheck_debugfs_init();
/*
* Flush out everything that has been logged during early boot, now that
* everything has been initialized (workqueues, decoders, ...).
*/
mce_schedule_work();
return 0;
}
late_initcall(mcheck_late_init);

查看文件

@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
}
static void cmci_toggle_interrupt_mode(bool on)
{
unsigned long flags, *owned;
int bank;
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
if (on)
val |= MCI_CTL2_CMCI_EN;
else
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
unsigned long cmci_intel_adjust_timer(unsigned long interval)
{
if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
*/
if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
cmci_reenable();
cmci_toggle_interrupt_mode(true);
cmci_recheck();
}
return CMCI_POLL_INTERVAL;
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
}
}
static void cmci_storm_disable_banks(void)
{
unsigned long flags, *owned;
int bank;
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static bool cmci_storm_detect(void)
{
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
if (cnt <= CMCI_STORM_THRESHOLD)
return false;
cmci_storm_disable_banks();
cmci_toggle_interrupt_mode(false);
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_STORM_INTERVAL);
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq();
}
/*
@@ -435,7 +439,7 @@ static void intel_init_cmci(void)
cmci_recheck();
}
void intel_init_lmce(void)
static void intel_init_lmce(void)
{
u64 val;
@@ -448,9 +452,26 @@ void intel_init_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
static void intel_clear_lmce(void)
{
u64 val;
if (!lmce_supported())
return;
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
val &= ~MCG_EXT_CTL_LMCE_EN;
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
}
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
intel_init_lmce();
}
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}

查看文件

@@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
u32 loaddr, hi, lotype;
prev_state = ist_enter(regs);
ist_enter(regs);
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs, prev_state);
ist_exit(regs);
}
/* Set up machine check reporting for processors with Intel style MCE: */

查看文件

@@ -15,12 +15,12 @@
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state = ist_enter(regs);
ist_enter(regs);
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
ist_exit(regs, prev_state);
ist_exit(regs);
}
/* Set up machine check reporting on the Winchip C6 series */

查看文件

@@ -377,17 +377,16 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
return err;
}
static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
static void mc_device_remove(struct device *dev, struct subsys_interface *sif)
{
int cpu = dev->id;
if (!cpu_online(cpu))
return 0;
return;
pr_debug("CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&dev->kobj, &mc_attr_group);
return 0;
}
static struct subsys_interface mc_cpu_interface = {
@@ -460,7 +459,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
static struct notifier_block __refdata mc_cpu_notifier = {
static struct notifier_block mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};

查看文件

@@ -390,7 +390,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
}
#ifdef DEBUG
static void __ref show_saved_mc(void)
static void show_saved_mc(void)
{
int i, j;
unsigned int sig, pf, rev, total_size, data_size, date;

查看文件

@@ -18,6 +18,7 @@
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
@@ -28,10 +29,14 @@
#include <asm/i8259.h>
#include <asm/apic.h>
#include <asm/timer.h>
#include <asm/reboot.h>
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
@@ -67,8 +72,47 @@ void hv_remove_vmbus_irq(void)
}
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
void hv_setup_kexec_handler(void (*handler)(void))
{
hv_kexec_handler = handler;
}
EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
void hv_remove_kexec_handler(void)
{
hv_kexec_handler = NULL;
}
EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
{
hv_crash_handler = handler;
}
EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
void hv_remove_crash_handler(void)
{
hv_crash_handler = NULL;
}
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
#endif
static void hv_machine_shutdown(void)
{
if (kexec_in_progress && hv_kexec_handler)
hv_kexec_handler();
native_machine_shutdown();
}
static void hv_machine_crash_shutdown(struct pt_regs *regs)
{
if (hv_crash_handler)
hv_crash_handler(regs);
native_machine_crash_shutdown(regs);
}
static uint32_t __init ms_hyperv_platform(void)
{
u32 eax;
@@ -114,6 +158,7 @@ static void __init ms_hyperv_init_platform(void)
* Extract the features and hints
*/
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
@@ -141,6 +186,9 @@ static void __init ms_hyperv_init_platform(void)
no_timer_check = 1;
#endif
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
mark_tsc_unstable("running on Hyper-V");
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {

查看文件

@@ -448,7 +448,6 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
increment);
}
EXPORT_SYMBOL(mtrr_add);
/**
* mtrr_del_page - delete a memory type region
@@ -537,7 +536,6 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
}
EXPORT_SYMBOL(mtrr_del);
/**
* arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable

查看文件

@@ -1551,7 +1551,7 @@ static void __init filter_events(struct attribute **attrs)
}
/* Merge two pointer arrays */
static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
{
struct attribute **new;
int j, i;
@@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment)
int idx = segment >> 3;
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
if (idx > LDT_ENTRIES)
@@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment)
return 0;
desc = &ldt->entries[idx];
#else
return 0;
#endif
} else {
if (idx > GDT_ENTRIES)
return 0;
@@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment)
return get_desc_base(desc);
}
#ifdef CONFIG_COMPAT
#ifdef CONFIG_IA32_EMULATION
#include <asm/compat.h>

查看文件

@@ -165,7 +165,7 @@ struct intel_excl_cntrs {
unsigned core_id; /* per-core: core id */
};
#define MAX_LBR_ENTRIES 16
#define MAX_LBR_ENTRIES 32
enum {
X86_PERF_KFREE_SHARED = 0,
@@ -594,6 +594,7 @@ struct x86_pmu {
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
unsigned long free_running_flags;
/*
* Intel LBR
@@ -624,6 +625,7 @@ struct x86_pmu {
struct x86_perf_task_context {
u64 lbr_from[MAX_LBR_ENTRIES];
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
int lbr_callstack_users;
int lbr_stack_state;
};
@@ -793,6 +795,8 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
ssize_t intel_event_sysfs_show(char *page, u64 config);
struct attribute **merge_attr(struct attribute **a, struct attribute **b);
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@@ -808,20 +812,6 @@ static inline int amd_pmu_init(void)
#ifdef CONFIG_CPU_SUP_INTEL
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
if (has_branch_stack(event))
return true;
/* implicit branch sampling to correct PEBS skid */
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
x86_pmu.intel_cap.pebs_format < 2)
return true;
return false;
}
static inline bool intel_pmu_has_bts(struct perf_event *event)
{
if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
@@ -873,6 +863,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
extern struct event_constraint intel_hsw_pebs_event_constraints[];
extern struct event_constraint intel_skl_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
@@ -911,6 +903,8 @@ void intel_pmu_lbr_init_snb(void);
void intel_pmu_lbr_init_hsw(void);
void intel_pmu_lbr_init_skl(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
@@ -934,6 +928,7 @@ static inline int is_ht_workaround_enabled(void)
{
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
}
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)

查看文件

@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
* - data counts include speculative execution (except L1 write, dtlb, bpu)
* - remote node access includes remote memory, remote cache, remote mmio.
* - prefetches are not included in the counts.
* - icache miss does not include decoded icache
*/
#define SKL_DEMAND_DATA_RD BIT_ULL(0)
#define SKL_DEMAND_RFO BIT_ULL(1)
#define SKL_ANY_RESPONSE BIT_ULL(16)
#define SKL_SUPPLIER_NONE BIT_ULL(17)
#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
SKL_L3_MISS_REMOTE_HOP0_DRAM| \
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
#define SKL_SPL_HIT BIT_ULL(30)
#define SKL_SNOOP_NONE BIT_ULL(31)
#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
#define SKL_SNOOP_MISS BIT_ULL(33)
#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
#define SKL_SNOOP_HITM BIT_ULL(36)
#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
SKL_SNOOP_HITM|SKL_SPL_HIT)
#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
static __initconst const u64 skl_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
[ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
static __initconst const u64 skl_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
SKL_L3_MISS|SKL_ANY_SNOOP|
SKL_SUPPLIER_NONE,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS|SKL_ANY_SNOOP|
SKL_SUPPLIER_NONE,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
#define SNB_DMND_DATA_RD (1ULL << 0)
#define SNB_DMND_RFO (1ULL << 1)
#define SNB_DMND_IFETCH (1ULL << 2)
@@ -1114,7 +1323,7 @@ static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
EVENT_EXTRA_END
};
@@ -1594,6 +1803,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
loops = 0;
again:
intel_pmu_lbr_read();
intel_pmu_ack_status(status);
if (++loops > 100) {
static bool warned = false;
@@ -1608,16 +1818,16 @@ again:
inc_irq_stat(apic_perf_irqs);
intel_pmu_lbr_read();
/*
* CondChgd bit 63 doesn't mean any overflow status. Ignore
* and clear the bit.
* Ignore a range of extra bits in status that do not indicate
* overflow by themselves.
*/
if (__test_and_clear_bit(63, (unsigned long *)&status)) {
if (!status)
goto done;
}
status &= ~(GLOBAL_STATUS_COND_CHG |
GLOBAL_STATUS_ASIF |
GLOBAL_STATUS_LBRS_FROZEN);
if (!status)
goto done;
/*
* PEBS overflow sets bit 62 in the global status register
@@ -1699,18 +1909,22 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}
static int intel_alt_er(int idx)
static int intel_alt_er(int idx, u64 config)
{
int alt_idx;
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
return idx;
if (idx == EXTRA_REG_RSP_0)
return EXTRA_REG_RSP_1;
alt_idx = EXTRA_REG_RSP_1;
if (idx == EXTRA_REG_RSP_1)
return EXTRA_REG_RSP_0;
alt_idx = EXTRA_REG_RSP_0;
return idx;
if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
return idx;
return alt_idx;
}
static void intel_fixup_er(struct perf_event *event, int idx)
@@ -1799,7 +2013,7 @@ again:
*/
c = NULL;
} else {
idx = intel_alt_er(idx);
idx = intel_alt_er(idx, reg->config);
if (idx != reg->idx) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -2253,6 +2467,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
}
}
static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
{
unsigned long flags = x86_pmu.free_running_flags;
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
return flags;
}
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -2263,7 +2486,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
if (!(event->attr.sample_type &
~intel_pmu_free_running_flags(event)))
event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
}
if (x86_pmu.pebs_aliases)
@@ -2694,6 +2918,8 @@ static __initconst const struct x86_pmu core_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.free_running_flags = PEBS_FREERUNNING_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
* so we install an artificial 1<<31 period regardless of
@@ -2732,6 +2958,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.free_running_flags = PEBS_FREERUNNING_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -3269,6 +3496,29 @@ __init int intel_pmu_init(void)
pr_cont("Broadwell events, ");
break;
case 78: /* 14nm Skylake Mobile */
case 94: /* 14nm Skylake Desktop */
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
pr_cont("Skylake events, ");
break;
default:
switch (x86_pmu.version) {
case 1:
@@ -3338,7 +3588,7 @@ __init int intel_pmu_init(void)
*/
if (x86_pmu.extra_regs) {
for (er = x86_pmu.extra_regs; er->msr; er++) {
er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
er->extra_msr_access = check_msr(er->msr, 0x11UL);
/* Disable LBR select mapping */
if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
x86_pmu.lbr_sel_map = NULL;

查看文件

@@ -62,9 +62,6 @@ struct bts_buffer {
struct pmu bts_pmu;
void intel_pmu_enable_bts(u64 config);
void intel_pmu_disable_bts(void);
static size_t buf_size(struct page *page)
{
return 1 << (PAGE_SHIFT + page_private(page));

查看文件

@@ -224,6 +224,19 @@ union hsw_tsx_tuning {
#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
/* Same as HSW, plus TSC */
struct pebs_record_skl {
u64 flags, ip;
u64 ax, bx, cx, dx;
u64 si, di, bp, sp;
u64 r8, r9, r10, r11;
u64 r12, r13, r14, r15;
u64 status, dla, dse, lat;
u64 real_ip, tsx_tuning;
u64 tsc;
};
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -675,6 +688,28 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -754,6 +789,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
bool large_pebs = ds->pebs_interrupt_threshold >
ds->pebs_buffer_base + x86_pmu.pebs_record_size;
if (large_pebs)
intel_pmu_drain_pebs_buffer();
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -762,12 +802,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
if (ds->pebs_interrupt_threshold >
ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
intel_pmu_drain_pebs_buffer();
if (!pebs_is_enabled(cpuc))
perf_sched_cb_dec(event->ctx->pmu);
}
if (large_pebs && !pebs_is_enabled(cpuc))
perf_sched_cb_dec(event->ctx->pmu);
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -885,7 +921,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
}
static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
{
if (pebs->tsx_tuning) {
union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
@@ -894,7 +930,7 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
return 0;
}
static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
{
u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
@@ -918,7 +954,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
* unconditionally access the 'extra' entries.
*/
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_hsw *pebs = __pebs;
struct pebs_record_skl *pebs = __pebs;
u64 sample_type;
int fll, fst, dsrc;
int fl = event->hw.flags;
@@ -1016,6 +1052,16 @@ static void setup_pebs_sample_data(struct perf_event *event,
data->txn = intel_hsw_transaction(pebs);
}
/*
* v3 supplies an accurate time stamp, so we use that
* for the time stamp.
*
* We can only do this for the default trace clock.
*/
if (x86_pmu.intel_cap.pebs_format >= 3 &&
event->attr.use_clockid == 0)
data->time = native_sched_clock_from_tsc(pebs->tsc);
if (has_branch_stack(event))
data->br_stack = &cpuc->lbr_stack;
}
@@ -1142,6 +1188,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
u64 pebs_status;
/* PEBS v3 has accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3) {
@@ -1152,12 +1199,17 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
continue;
}
bit = find_first_bit((unsigned long *)&p->status,
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
bit = find_first_bit((unsigned long *)&pebs_status,
x86_pmu.max_pebs_events);
if (bit >= x86_pmu.max_pebs_events)
continue;
if (!test_bit(bit, cpuc->active_mask))
if (WARN(bit >= x86_pmu.max_pebs_events,
"PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
(unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
*(unsigned long long *)cpuc->active_mask))
continue;
/*
* The PEBS hardware does not deal well with the situation
* when events happen near to each other and multiple bits
@@ -1172,27 +1224,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* one, and it's not possible to reconstruct all events
* that caused the PEBS record. It's called collision.
* If collision happened, the record will be dropped.
*
*/
if (p->status != (1 << bit)) {
u64 pebs_status;
/* slow path */
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
if (pebs_status != (1 << bit)) {
for_each_set_bit(i, (unsigned long *)&pebs_status,
MAX_PEBS_EVENTS)
error[i]++;
continue;
}
if (p->status != (1ULL << bit)) {
for_each_set_bit(i, (unsigned long *)&pebs_status,
x86_pmu.max_pebs_events)
error[i]++;
continue;
}
counts[bit]++;
}
for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
if ((counts[bit] == 0) && (error[bit] == 0))
continue;
event = cpuc->events[bit];
WARN_ON_ONCE(!event);
WARN_ON_ONCE(!event->attr.precise_ip);
@@ -1245,6 +1291,14 @@ void __init intel_ds_init(void)
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
case 3:
pr_cont("PEBS fmt3%c, ", pebs_type);
x86_pmu.pebs_record_size =
sizeof(struct pebs_record_skl);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
break;
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;

查看文件

@@ -13,7 +13,8 @@ enum {
LBR_FORMAT_EIP = 0x02,
LBR_FORMAT_EIP_FLAGS = 0x03,
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
};
static enum {
@@ -139,6 +140,13 @@ static void __intel_pmu_lbr_enable(bool pmi)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 debugctl, lbr_select = 0, orig_debugctl;
/*
* No need to unfreeze manually, as v4 can do that as part
* of the GLOBAL_STATUS ack.
*/
if (pmi && x86_pmu.version >= 4)
return;
/*
* No need to reprogram LBR_SELECT in a PMI, as it
* did not change.
@@ -186,6 +194,8 @@ static void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + i, 0);
}
}
@@ -230,10 +240,12 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
for (i = 0; i < x86_pmu.lbr_nr; i++) {
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
task_ctx->lbr_stack_state = LBR_NONE;
}
@@ -251,10 +263,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
for (i = 0; i < x86_pmu.lbr_nr; i++) {
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
task_ctx->lbr_stack_state = LBR_VALID;
}
@@ -411,16 +425,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
int num = x86_pmu.lbr_nr;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
num = tos;
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
int skip = 0;
u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
if (lbr_format == LBR_FORMAT_INFO) {
u64 info;
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
mis = !!(info & LBR_INFO_MISPRED);
pred = !mis;
in_tx = !!(info & LBR_INFO_IN_TX);
abort = !!(info & LBR_INFO_ABORT);
cycles = (info & LBR_INFO_CYCLES);
}
if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
@@ -450,6 +479,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[out].predicted = pred;
cpuc->lbr_entries[out].in_tx = in_tx;
cpuc->lbr_entries[out].abort = abort;
cpuc->lbr_entries[out].cycles = cycles;
cpuc->lbr_entries[out].reserved = 0;
out++;
}
@@ -947,6 +977,26 @@ void intel_pmu_lbr_init_hsw(void)
pr_cont("16-deep LBR, ");
}
/* skylake */
__init void intel_pmu_lbr_init_skl(void)
{
x86_pmu.lbr_nr = 32;
x86_pmu.lbr_tos = MSR_LBR_TOS;
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
/*
* SW branch filter usage:
* - support syscall, sysret capture.
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
pr_cont("32-deep LBR, ");
}
/* atom */
void __init intel_pmu_lbr_init_atom(void)
{

查看文件

@@ -65,15 +65,21 @@ static struct pt_cap_desc {
} pt_caps[] = {
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
};
static u32 pt_cap_get(enum pt_capabilities cap)
{
struct pt_cap_desc *cd = &pt_caps[cap];
u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
unsigned int shift = __ffs(cd->mask);
return (c & cd->mask) >> shift;
@@ -94,12 +100,22 @@ static struct attribute_group pt_cap_group = {
.name = "caps",
};
PMU_FORMAT_ATTR(cyc, "config:1" );
PMU_FORMAT_ATTR(mtc, "config:9" );
PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
static struct attribute *pt_formats_attr[] = {
&format_attr_cyc.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
&format_attr_noretcomp.attr,
&format_attr_mtc_period.attr,
&format_attr_cyc_thresh.attr,
&format_attr_psb_period.attr,
NULL,
};
@@ -129,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
&pt_pmu.caps[CR_EAX + i*4],
&pt_pmu.caps[CR_EBX + i*4],
&pt_pmu.caps[CR_ECX + i*4],
&pt_pmu.caps[CR_EDX + i*4]);
&pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
}
ret = -ENOMEM;
@@ -170,15 +186,65 @@ fail:
return ret;
}
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
RTIT_CTL_CYC_THRESH | \
RTIT_CTL_PSB_FREQ)
#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
RTIT_CTL_MTC_RANGE)
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
RTIT_CTL_DISRETC | \
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC)
static bool pt_event_valid(struct perf_event *event)
{
u64 config = event->attr.config;
u64 allowed, requested;
if ((config & PT_CONFIG_MASK) != config)
return false;
if (config & RTIT_CTL_CYC_PSB) {
if (!pt_cap_get(PT_CAP_psb_cyc))
return false;
allowed = pt_cap_get(PT_CAP_psb_periods);
requested = (config & RTIT_CTL_PSB_FREQ) >>
RTIT_CTL_PSB_FREQ_OFFSET;
if (requested && (!(allowed & BIT(requested))))
return false;
allowed = pt_cap_get(PT_CAP_cycle_thresholds);
requested = (config & RTIT_CTL_CYC_THRESH) >>
RTIT_CTL_CYC_THRESH_OFFSET;
if (requested && (!(allowed & BIT(requested))))
return false;
}
if (config & RTIT_CTL_MTC) {
/*
* In the unlikely case that CPUID lists valid mtc periods,
* but not the mtc capability, drop out here.
*
* Spec says that setting mtc period bits while mtc bit in
* CPUID is 0 will #GP, so better safe than sorry.
*/
if (!pt_cap_get(PT_CAP_mtc))
return false;
allowed = pt_cap_get(PT_CAP_mtc_periods);
if (!allowed)
return false;
requested = (config & RTIT_CTL_MTC_RANGE) >>
RTIT_CTL_MTC_RANGE_OFFSET;
if (!(allowed & BIT(requested)))
return false;
}
return true;
}
@@ -191,6 +257,11 @@ static void pt_config(struct perf_event *event)
{
u64 reg;
if (!event->hw.itrace_started) {
event->hw.itrace_started = 1;
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
if (!event->attr.exclude_kernel)
@@ -910,7 +981,6 @@ void intel_pt_interrupt(void)
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
pt_config(event);
}
}
@@ -934,7 +1004,6 @@ static void pt_event_start(struct perf_event *event, int mode)
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
pt_config(event);
}

查看文件

@@ -86,6 +86,10 @@ static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
1<<RAPL_IDX_RAM_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT)
/* Knights Landing has PKG, RAM */
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -486,6 +490,18 @@ static struct attribute *rapl_events_hsw_attr[] = {
NULL,
};
static struct attribute *rapl_events_knl_attr[] = {
EVENT_PTR(rapl_pkg),
EVENT_PTR(rapl_ram),
EVENT_PTR(rapl_pkg_unit),
EVENT_PTR(rapl_ram_unit),
EVENT_PTR(rapl_pkg_scale),
EVENT_PTR(rapl_ram_scale),
NULL,
};
static struct attribute_group rapl_pmu_events_group = {
.name = "events",
.attrs = NULL, /* patched at runtime */
@@ -730,6 +746,10 @@ static int __init rapl_pmu_init(void)
rapl_cntr_mask = RAPL_IDX_SRV;
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
case 87: /* Knights Landing */
rapl_add_quirk(rapl_hsw_server_quirk);
rapl_cntr_mask = RAPL_IDX_KNL;
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
default:
/* unsupported */

查看文件

@@ -911,6 +911,9 @@ static int __init uncore_pci_init(void)
case 63: /* Haswell-EP */
ret = hswep_uncore_pci_init();
break;
case 86: /* BDX-DE */
ret = bdx_uncore_pci_init();
break;
case 42: /* Sandy Bridge */
ret = snb_uncore_pci_init();
break;
@@ -1209,6 +1212,11 @@ static int __init uncore_cpu_init(void)
break;
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
case 60: /* Haswell */
case 69: /* Haswell */
case 70: /* Haswell */
case 61: /* Broadwell */
case 71: /* Broadwell */
snb_uncore_cpu_init();
break;
case 45: /* Sandy Bridge-EP */
@@ -1224,6 +1232,9 @@ static int __init uncore_cpu_init(void)
case 63: /* Haswell-EP */
hswep_uncore_cpu_init();
break;
case 86: /* BDX-DE */
bdx_uncore_cpu_init();
break;
default:
return 0;
}

查看文件

@@ -336,6 +336,8 @@ int ivbep_uncore_pci_init(void);
void ivbep_uncore_cpu_init(void);
int hswep_uncore_pci_init(void);
void hswep_uncore_cpu_init(void);
int bdx_uncore_pci_init(void);
void bdx_uncore_cpu_init(void);
/* perf_event_intel_uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);

查看文件

@@ -45,6 +45,11 @@
#define SNB_UNC_CBO_0_PER_CTR0 0x706
#define SNB_UNC_CBO_MSR_OFFSET 0x10
/* SNB ARB register */
#define SNB_UNC_ARB_PER_CTR0 0x3b0
#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
#define SNB_UNC_ARB_MSR_OFFSET 0x10
/* NHM global control register */
#define NHM_UNC_PERF_GLOBAL_CTL 0x391
#define NHM_UNC_FIXED_CTR 0x394
@@ -115,7 +120,7 @@ static struct intel_uncore_ops snb_uncore_msr_ops = {
.read_counter = uncore_msr_read_counter,
};
static struct event_constraint snb_uncore_cbox_constraints[] = {
static struct event_constraint snb_uncore_arb_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
EVENT_CONSTRAINT_END
@@ -134,14 +139,28 @@ static struct intel_uncore_type snb_uncore_cbox = {
.single_fixed = 1,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
.constraints = snb_uncore_cbox_constraints,
.ops = &snb_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
.event_descs = snb_uncore_events,
};
static struct intel_uncore_type snb_uncore_arb = {
.name = "arb",
.num_counters = 2,
.num_boxes = 1,
.perf_ctr_bits = 44,
.perf_ctr = SNB_UNC_ARB_PER_CTR0,
.event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_ARB_MSR_OFFSET,
.constraints = snb_uncore_arb_constraints,
.ops = &snb_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
static struct intel_uncore_type *snb_msr_uncores[] = {
&snb_uncore_cbox,
&snb_uncore_arb,
NULL,
};

查看文件

@@ -2215,7 +2215,7 @@ static struct intel_uncore_type *hswep_pci_uncores[] = {
NULL,
};
static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
static const struct pci_device_id hswep_uncore_pci_ids[] = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
.driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
@@ -2321,3 +2321,167 @@ int hswep_uncore_pci_init(void)
return 0;
}
/* end of Haswell-EP uncore support */
/* BDX-DE uncore support */
static struct intel_uncore_type bdx_uncore_ubox = {
.name = "ubox",
.num_counters = 2,
.num_boxes = 1,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
.perf_ctr = HSWEP_U_MSR_PMON_CTR0,
.event_ctl = HSWEP_U_MSR_PMON_CTL0,
.event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
.fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
.fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
.num_shared_regs = 1,
.ops = &ivbep_uncore_msr_ops,
.format_group = &ivbep_uncore_ubox_format_group,
};
static struct event_constraint bdx_uncore_cbox_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type bdx_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.event_ctl = HSWEP_C0_MSR_PMON_CTL0,
.perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
.event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
.box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
.msr_offset = HSWEP_CBO_MSR_OFFSET,
.num_shared_regs = 1,
.constraints = bdx_uncore_cbox_constraints,
.ops = &hswep_uncore_cbox_ops,
.format_group = &hswep_uncore_cbox_format_group,
};
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
&hswep_uncore_pcu,
NULL,
};
void bdx_uncore_cpu_init(void)
{
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
}
static struct intel_uncore_type bdx_uncore_ha = {
.name = "ha",
.num_counters = 4,
.num_boxes = 1,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
.num_counters = 5,
.num_boxes = 2,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
.fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
.fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
.event_descs = hswep_uncore_imc_events,
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
static struct intel_uncore_type bdx_uncore_irp = {
.name = "irp",
.num_counters = 4,
.num_boxes = 1,
.perf_ctr_bits = 48,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
.box_ctl = SNBEP_PCI_PMON_BOX_CTL,
.ops = &hswep_uncore_irp_ops,
.format_group = &snbep_uncore_format_group,
};
static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type bdx_uncore_r2pcie = {
.name = "r2pcie",
.num_counters = 4,
.num_boxes = 1,
.perf_ctr_bits = 48,
.constraints = bdx_uncore_r2pcie_constraints,
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
enum {
BDX_PCI_UNCORE_HA,
BDX_PCI_UNCORE_IMC,
BDX_PCI_UNCORE_IRP,
BDX_PCI_UNCORE_R2PCIE,
};
static struct intel_uncore_type *bdx_pci_uncores[] = {
[BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
[BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
[BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
[BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
NULL,
};
static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
},
{ /* MC0 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
},
{ /* MC0 Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
},
{ /* IRP */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
},
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
},
{ /* end: all zeroes */ }
};
static struct pci_driver bdx_uncore_pci_driver = {
.name = "bdx_uncore",
.id_table = bdx_uncore_pci_ids,
};
int bdx_uncore_pci_init(void)
{
int ret = snbep_pci2phy_map_init(0x6f1e);
if (ret)
return ret;
uncore_pci_uncores = bdx_pci_uncores;
uncore_pci_driver = &bdx_uncore_pci_driver;
return 0;
}
/* end of BDX-DE uncore support */

查看文件

@@ -0,0 +1,242 @@
#include <linux/perf_event.h>
enum perf_msr_id {
PERF_MSR_TSC = 0,
PERF_MSR_APERF = 1,
PERF_MSR_MPERF = 2,
PERF_MSR_PPERF = 3,
PERF_MSR_SMI = 4,
PERF_MSR_EVENT_MAX,
};
bool test_aperfmperf(int idx)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
return false;
switch (boot_cpu_data.x86_model) {
case 30: /* 45nm Nehalem */
case 26: /* 45nm Nehalem-EP */
case 46: /* 45nm Nehalem-EX */
case 37: /* 32nm Westmere */
case 44: /* 32nm Westmere-EP */
case 47: /* 32nm Westmere-EX */
case 42: /* 32nm SandyBridge */
case 45: /* 32nm SandyBridge-E/EN/EP */
case 58: /* 22nm IvyBridge */
case 62: /* 22nm IvyBridge-EP/EX */
case 60: /* 22nm Haswell Core */
case 63: /* 22nm Haswell Server */
case 69: /* 22nm Haswell ULT */
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
case 61: /* 14nm Broadwell Core-M */
case 86: /* 14nm Broadwell Xeon D */
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
case 79: /* 14nm Broadwell Server */
case 55: /* 22nm Atom "Silvermont" */
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
case 76: /* 14nm Atom "Airmont" */
if (idx == PERF_MSR_SMI)
return true;
break;
case 78: /* 14nm Skylake Mobile */
case 94: /* 14nm Skylake Desktop */
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
}
return false;
}
struct perf_msr {
u64 msr;
struct perf_pmu_events_attr *attr;
bool (*test)(int idx);
};
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
static struct perf_msr msr[] = {
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
};
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
NULL,
};
static struct attribute_group events_attr_group = {
.name = "events",
.attrs = events_attrs,
};
PMU_FORMAT_ATTR(event, "config:0-63");
static struct attribute *format_attrs[] = {
&format_attr_event.attr,
NULL,
};
static struct attribute_group format_attr_group = {
.name = "format",
.attrs = format_attrs,
};
static const struct attribute_group *attr_groups[] = {
&events_attr_group,
&format_attr_group,
NULL,
};
static int msr_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
if (event->attr.type != event->pmu->type)
return -ENOENT;
if (cfg >= PERF_MSR_EVENT_MAX)
return -EINVAL;
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
event->attr.exclude_guest ||
event->attr.sample_period) /* no sampling */
return -EINVAL;
if (!msr[cfg].attr)
return -EINVAL;
event->hw.idx = -1;
event->hw.event_base = msr[cfg].msr;
event->hw.config = cfg;
return 0;
}
static inline u64 msr_read_counter(struct perf_event *event)
{
u64 now;
if (event->hw.event_base)
rdmsrl(event->hw.event_base, now);
else
rdtscll(now);
return now;
}
static void msr_event_update(struct perf_event *event)
{
u64 prev, now;
s64 delta;
/* Careful, an NMI might modify the previous event value. */
again:
prev = local64_read(&event->hw.prev_count);
now = msr_read_counter(event);
if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
goto again;
delta = now - prev;
if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
delta <<= 32;
delta >>= 32; /* sign extend */
}
local64_add(now - prev, &event->count);
}
static void msr_event_start(struct perf_event *event, int flags)
{
u64 now;
now = msr_read_counter(event);
local64_set(&event->hw.prev_count, now);
}
static void msr_event_stop(struct perf_event *event, int flags)
{
msr_event_update(event);
}
static void msr_event_del(struct perf_event *event, int flags)
{
msr_event_stop(event, PERF_EF_UPDATE);
}
static int msr_event_add(struct perf_event *event, int flags)
{
if (flags & PERF_EF_START)
msr_event_start(event, flags);
return 0;
}
static struct pmu pmu_msr = {
.task_ctx_nr = perf_sw_context,
.attr_groups = attr_groups,
.event_init = msr_event_init,
.add = msr_event_add,
.del = msr_event_del,
.start = msr_event_start,
.stop = msr_event_stop,
.read = msr_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
};
static int __init msr_init(void)
{
int i, j = 0;
if (!boot_cpu_has(X86_FEATURE_TSC)) {
pr_cont("no MSR PMU driver.\n");
return 0;
}
/* Probe the MSRs. */
for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
u64 val;
/*
* Virt sucks arse; you cannot tell if a R/O MSR is present :/
*/
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
msr[i].attr = NULL;
}
/* List remaining MSRs in the sysfs attrs. */
for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
if (msr[i].attr)
events_attrs[j++] = &msr[i].attr->attr.attr;
}
events_attrs[j] = NULL;
perf_pmu_register(&pmu_msr, "msr", -1);
return 0;
}
device_initcall(msr_init);

查看文件

@@ -170,7 +170,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb,
return notifier_from_errno(err);
}
static struct notifier_block __refdata cpuid_class_cpu_notifier =
static struct notifier_block cpuid_class_cpu_notifier =
{
.notifier_call = cpuid_class_cpu_callback,
};

查看文件

@@ -110,7 +110,7 @@ static void init_espfix_random(void)
*/
if (!arch_get_random_long(&rand)) {
/* The constant is an arbitrary large prime */
rdtscll(rand);
rand = rdtsc();
rand *= 0xc345c6b72fd16123UL;
}

查看文件

@@ -226,22 +226,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
*/
static unsigned long hpet_freq;
static void hpet_legacy_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt);
static int hpet_legacy_next_event(unsigned long delta,
struct clock_event_device *evt);
/*
* The hpet clock event device
*/
static struct clock_event_device hpet_clockevent = {
.name = "hpet",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = hpet_legacy_set_mode,
.set_next_event = hpet_legacy_next_event,
.irq = 0,
.rating = 50,
};
static struct clock_event_device hpet_clockevent;
static void hpet_stop_counter(void)
{
@@ -306,64 +291,74 @@ static void hpet_legacy_clockevent_register(void)
printk(KERN_DEBUG "hpet clockevent registered\n");
}
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
static int hpet_set_periodic(struct clock_event_device *evt, int timer)
{
unsigned int cfg, cmp, now;
uint64_t delta;
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
hpet_writel(cmp, HPET_Tn_CMP(timer));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
* cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
* bit is automatically cleared after the first write.
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
break;
hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC / HZ)) * evt->mult;
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int)delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
hpet_writel(cmp, HPET_Tn_CMP(timer));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
* cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
* bit is automatically cleared after the first write.
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
case CLOCK_EVT_MODE_ONESHOT:
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
break;
return 0;
}
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_Tn_CFG(timer));
break;
static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
{
unsigned int cfg;
case CLOCK_EVT_MODE_RESUME:
if (timer == 0) {
hpet_enable_legacy_int();
} else {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
disable_irq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
hpet_print_config();
break;
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
return 0;
}
static int hpet_shutdown(struct clock_event_device *evt, int timer)
{
unsigned int cfg;
cfg = hpet_readl(HPET_Tn_CFG(timer));
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_Tn_CFG(timer));
return 0;
}
static int hpet_resume(struct clock_event_device *evt, int timer)
{
if (!timer) {
hpet_enable_legacy_int();
} else {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
disable_irq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
hpet_print_config();
return 0;
}
static int hpet_next_event(unsigned long delta,
@@ -403,10 +398,24 @@ static int hpet_next_event(unsigned long delta,
return res < HPET_MIN_CYCLES ? -ETIME : 0;
}
static void hpet_legacy_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
static int hpet_legacy_shutdown(struct clock_event_device *evt)
{
hpet_set_mode(mode, evt, 0);
return hpet_shutdown(evt, 0);
}
static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
{
return hpet_set_oneshot(evt, 0);
}
static int hpet_legacy_set_periodic(struct clock_event_device *evt)
{
return hpet_set_periodic(evt, 0);
}
static int hpet_legacy_resume(struct clock_event_device *evt)
{
return hpet_resume(evt, 0);
}
static int hpet_legacy_next_event(unsigned long delta,
@@ -415,6 +424,22 @@ static int hpet_legacy_next_event(unsigned long delta,
return hpet_next_event(delta, evt, 0);
}
/*
* The hpet clock event device
*/
static struct clock_event_device hpet_clockevent = {
.name = "hpet",
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
.set_state_periodic = hpet_legacy_set_periodic,
.set_state_oneshot = hpet_legacy_set_oneshot,
.set_state_shutdown = hpet_legacy_shutdown,
.tick_resume = hpet_legacy_resume,
.set_next_event = hpet_legacy_next_event,
.irq = 0,
.rating = 50,
};
/*
* HPET MSI Support
*/
@@ -426,7 +451,7 @@ static struct irq_domain *hpet_domain;
void hpet_msi_unmask(struct irq_data *data)
{
struct hpet_dev *hdev = data->handler_data;
struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
unsigned int cfg;
/* unmask it */
@@ -437,7 +462,7 @@ void hpet_msi_unmask(struct irq_data *data)
void hpet_msi_mask(struct irq_data *data)
{
struct hpet_dev *hdev = data->handler_data;
struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
unsigned int cfg;
/* mask it */
@@ -459,11 +484,32 @@ void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
msg->address_hi = 0;
}
static void hpet_msi_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
static int hpet_msi_shutdown(struct clock_event_device *evt)
{
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
hpet_set_mode(mode, evt, hdev->num);
return hpet_shutdown(evt, hdev->num);
}
static int hpet_msi_set_oneshot(struct clock_event_device *evt)
{
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
return hpet_set_oneshot(evt, hdev->num);
}
static int hpet_msi_set_periodic(struct clock_event_device *evt)
{
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
return hpet_set_periodic(evt, hdev->num);
}
static int hpet_msi_resume(struct clock_event_device *evt)
{
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
return hpet_resume(evt, hdev->num);
}
static int hpet_msi_next_event(unsigned long delta,
@@ -523,10 +569,14 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
evt->rating = 110;
evt->features = CLOCK_EVT_FEAT_ONESHOT;
if (hdev->flags & HPET_DEV_PERI_CAP)
if (hdev->flags & HPET_DEV_PERI_CAP) {
evt->features |= CLOCK_EVT_FEAT_PERIODIC;
evt->set_state_periodic = hpet_msi_set_periodic;
}
evt->set_mode = hpet_msi_set_mode;
evt->set_state_shutdown = hpet_msi_shutdown;
evt->set_state_oneshot = hpet_msi_set_oneshot;
evt->tick_resume = hpet_msi_resume;
evt->set_next_event = hpet_msi_next_event;
evt->cpumask = cpumask_of(hdev->cpu);
@@ -735,7 +785,7 @@ static int hpet_clocksource_register(void)
/* Verify whether hpet counter works */
t1 = hpet_readl(HPET_COUNTER);
rdtscll(start);
start = rdtsc();
/*
* We don't know the TSC frequency yet, but waiting for
@@ -745,7 +795,7 @@ static int hpet_clocksource_register(void)
*/
do {
rep_nop();
rdtscll(now);
now = rdtsc();
} while ((now - start) < 200000UL);
if (t1 == hpet_readl(HPET_COUNTER)) {

查看文件

@@ -32,6 +32,7 @@
#include <linux/irqflags.h>
#include <linux/notifier.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/percpu.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
@@ -179,7 +180,11 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
va = info->address;
len = bp->attr.bp_len;
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
/*
* We don't need to worry about va + len - 1 overflowing:
* we already require that va is aligned to a multiple of len.
*/
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
}
int arch_bp_generic_fields(int x86_len, int x86_type,
@@ -243,6 +248,20 @@ static int arch_build_bp_info(struct perf_event *bp)
info->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
* We don't allow kernel breakpoints in places that are not
* acceptable for kprobes. On non-kprobes kernels, we don't
* allow kernel breakpoints at all.
*/
if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
#ifdef CONFIG_KPROBES
if (within_kprobe_blacklist(bp->attr.bp_addr))
return -EINVAL;
#else
return -EINVAL;
#endif
}
info->type = X86_BREAKPOINT_EXECUTE;
/*
* x86 inst breakpoints need to have a specific undefined len.
@@ -276,8 +295,18 @@ static int arch_build_bp_info(struct perf_event *bp)
break;
#endif
default:
/* AMD range breakpoint */
if (!is_power_of_2(bp->attr.bp_len))
return -EINVAL;
if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
return -EINVAL;
/*
* It's impossible to use a range breakpoint to fake out
* user vs kernel detection because bp_len - 1 can't
* have the high bit set. If we ever allow range instruction
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
if (!cpu_has_bpext)
return -EOPNOTSUPP;
info->mask = bp->attr.bp_len - 1;

查看文件

@@ -34,7 +34,7 @@ static int __init init_pit_clocksource(void)
* - when local APIC timer is active (PIT is switched off)
*/
if (num_possible_cpus() > 1 || is_hpet_enabled() ||
i8253_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
!clockevent_state_periodic(&i8253_clockevent))
return 0;
return clocksource_i8253_init();

查看文件

@@ -1,328 +0,0 @@
/*
* IOSF-SB MailBox Interface Driver
* Copyright (c) 2013, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*
* The IOSF-SB is a fabric bus available on Atom based SOC's that uses a
* mailbox interface (MBI) to communicate with mutiple devices. This
* driver implements access to this interface for those platforms that can
* enumerate the device using PCI.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <linux/capability.h>
#include <asm/iosf_mbi.h>
#define PCI_DEVICE_ID_BAYTRAIL 0x0F00
#define PCI_DEVICE_ID_BRASWELL 0x2280
#define PCI_DEVICE_ID_QUARK_X1000 0x0958
static DEFINE_SPINLOCK(iosf_mbi_lock);
static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
{
return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE;
}
static struct pci_dev *mbi_pdev; /* one mbi device */
static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr)
{
int result;
if (!mbi_pdev)
return -ENODEV;
if (mcrx) {
result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
mcrx);
if (result < 0)
goto fail_read;
}
result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
if (result < 0)
goto fail_read;
result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
if (result < 0)
goto fail_read;
return 0;
fail_read:
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
return result;
}
static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
{
int result;
if (!mbi_pdev)
return -ENODEV;
result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
if (result < 0)
goto fail_write;
if (mcrx) {
result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
mcrx);
if (result < 0)
goto fail_write;
}
result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
if (result < 0)
goto fail_write;
return 0;
fail_write:
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
return result;
}
int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
{
u32 mcr, mcrx;
unsigned long flags;
int ret;
/*Access to the GFX unit is handled by GPU code */
if (port == BT_MBI_UNIT_GFX) {
WARN_ON(1);
return -EPERM;
}
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
mcrx = offset & MBI_MASK_HI;
spin_lock_irqsave(&iosf_mbi_lock, flags);
ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr);
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
return ret;
}
EXPORT_SYMBOL(iosf_mbi_read);
int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
{
u32 mcr, mcrx;
unsigned long flags;
int ret;
/*Access to the GFX unit is handled by GPU code */
if (port == BT_MBI_UNIT_GFX) {
WARN_ON(1);
return -EPERM;
}
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
mcrx = offset & MBI_MASK_HI;
spin_lock_irqsave(&iosf_mbi_lock, flags);
ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr);
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
return ret;
}
EXPORT_SYMBOL(iosf_mbi_write);
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
{
u32 mcr, mcrx;
u32 value;
unsigned long flags;
int ret;
/*Access to the GFX unit is handled by GPU code */
if (port == BT_MBI_UNIT_GFX) {
WARN_ON(1);
return -EPERM;
}
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
mcrx = offset & MBI_MASK_HI;
spin_lock_irqsave(&iosf_mbi_lock, flags);
/* Read current mdr value */
ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value);
if (ret < 0) {
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
return ret;
}
/* Apply mask */
value &= ~mask;
mdr &= mask;
value |= mdr;
/* Write back */
ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value);
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
return ret;
}
EXPORT_SYMBOL(iosf_mbi_modify);
bool iosf_mbi_available(void)
{
/* Mbi isn't hot-pluggable. No remove routine is provided */
return mbi_pdev;
}
EXPORT_SYMBOL(iosf_mbi_available);
#ifdef CONFIG_IOSF_MBI_DEBUG
static u32 dbg_mdr;
static u32 dbg_mcr;
static u32 dbg_mcrx;
static int mcr_get(void *data, u64 *val)
{
*val = *(u32 *)data;
return 0;
}
static int mcr_set(void *data, u64 val)
{
u8 command = ((u32)val & 0xFF000000) >> 24,
port = ((u32)val & 0x00FF0000) >> 16,
offset = ((u32)val & 0x0000FF00) >> 8;
int err;
*(u32 *)data = val;
if (!capable(CAP_SYS_RAWIO))
return -EACCES;
if (command & 1u)
err = iosf_mbi_write(port,
command,
dbg_mcrx | offset,
dbg_mdr);
else
err = iosf_mbi_read(port,
command,
dbg_mcrx | offset,
&dbg_mdr);
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n");
static struct dentry *iosf_dbg;
static void iosf_sideband_debug_init(void)
{
struct dentry *d;
iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
if (IS_ERR_OR_NULL(iosf_dbg))
return;
/* mdr */
d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
if (IS_ERR_OR_NULL(d))
goto cleanup;
/* mcrx */
debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
if (IS_ERR_OR_NULL(d))
goto cleanup;
/* mcr - initiates mailbox tranaction */
debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
if (IS_ERR_OR_NULL(d))
goto cleanup;
return;
cleanup:
debugfs_remove_recursive(d);
}
static void iosf_debugfs_init(void)
{
iosf_sideband_debug_init();
}
static void iosf_debugfs_remove(void)
{
debugfs_remove_recursive(iosf_dbg);
}
#else
static inline void iosf_debugfs_init(void) { }
static inline void iosf_debugfs_remove(void) { }
#endif /* CONFIG_IOSF_MBI_DEBUG */
static int iosf_mbi_probe(struct pci_dev *pdev,
const struct pci_device_id *unused)
{
int ret;
ret = pci_enable_device(pdev);
if (ret < 0) {
dev_err(&pdev->dev, "error: could not enable device\n");
return ret;
}
mbi_pdev = pci_dev_get(pdev);
return 0;
}
static const struct pci_device_id iosf_mbi_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) },
{ 0, },
};
MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids);
static struct pci_driver iosf_mbi_pci_driver = {
.name = "iosf_mbi_pci",
.probe = iosf_mbi_probe,
.id_table = iosf_mbi_pci_ids,
};
static int __init iosf_mbi_init(void)
{
iosf_debugfs_init();
return pci_register_driver(&iosf_mbi_pci_driver);
}
static void __exit iosf_mbi_exit(void)
{
iosf_debugfs_remove();
pci_unregister_driver(&iosf_mbi_pci_driver);
if (mbi_pdev) {
pci_dev_put(mbi_pdev);
mbi_pdev = NULL;
}
}
module_init(iosf_mbi_init);
module_exit(iosf_mbi_exit);
MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
MODULE_DESCRIPTION("IOSF Mailbox Interface accessor");
MODULE_LICENSE("GPL v2");

查看文件

@@ -139,10 +139,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Machine check polls\n");
#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
seq_puts(p, " Hypervisor callback interrupts\n");
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
irq_stats(j)->irq_hv_callback_count);
seq_puts(p, " Hypervisor callback interrupts\n");
}
#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
@@ -211,24 +214,38 @@ u64 arch_irq_stat(void)
__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc * desc;
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
unsigned irq;
/*
* NB: Unlike exception entries, IRQ entries do not reliably
* handle context tracking in the low-level entry code. This is
* because syscall entries execute briefly with IRQs on before
* updating context tracking state, so we can take an IRQ from
* kernel mode with CONTEXT_USER. The low-level entry code only
* updates the context if we came from user mode, so we won't
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
* code is cleaned up enough that we can cleanly defer enabling
* IRQs.
*/
entering_irq();
irq = __this_cpu_read(vector_irq[vector]);
/* entering_irq() tells RCU that we're not quiescent. Check it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
if (!handle_irq(irq, regs)) {
desc = __this_cpu_read(vector_irq[vector]);
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
if (irq != VECTOR_RETRIGGERED) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
if (desc != VECTOR_RETRIGGERED) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(),
vector, irq);
vector);
} else {
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
}
}
@@ -330,10 +347,10 @@ static struct cpumask affinity_new, online_new;
*/
int check_irq_vectors_for_cpu_disable(void)
{
int irq, cpu;
unsigned int this_cpu, vector, this_count, count;
struct irq_desc *desc;
struct irq_data *data;
int cpu;
this_cpu = smp_processor_id();
cpumask_copy(&online_new, cpu_online_mask);
@@ -341,47 +358,43 @@ int check_irq_vectors_for_cpu_disable(void)
this_count = 0;
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
irq = __this_cpu_read(vector_irq[vector]);
if (irq >= 0) {
desc = irq_to_desc(irq);
if (!desc)
continue;
/*
* Protect against concurrent action removal,
* affinity changes etc.
*/
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
cpumask_copy(&affinity_new, data->affinity);
cpumask_clear_cpu(this_cpu, &affinity_new);
/* Do not count inactive or per-cpu irqs. */
if (!irq_has_action(irq) || irqd_is_per_cpu(data)) {
raw_spin_unlock(&desc->lock);
continue;
}
desc = __this_cpu_read(vector_irq[vector]);
if (IS_ERR_OR_NULL(desc))
continue;
/*
* Protect against concurrent action removal, affinity
* changes etc.
*/
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
cpumask_copy(&affinity_new,
irq_data_get_affinity_mask(data));
cpumask_clear_cpu(this_cpu, &affinity_new);
/* Do not count inactive or per-cpu irqs. */
if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
raw_spin_unlock(&desc->lock);
/*
* A single irq may be mapped to multiple
* cpu's vector_irq[] (for example IOAPIC cluster
* mode). In this case we have two
* possibilities:
*
* 1) the resulting affinity mask is empty; that is
* this the down'd cpu is the last cpu in the irq's
* affinity mask, or
*
* 2) the resulting affinity mask is no longer
* a subset of the online cpus but the affinity
* mask is not zero; that is the down'd cpu is the
* last online cpu in a user set affinity mask.
*/
if (cpumask_empty(&affinity_new) ||
!cpumask_subset(&affinity_new, &online_new))
this_count++;
continue;
}
raw_spin_unlock(&desc->lock);
/*
* A single irq may be mapped to multiple cpu's
* vector_irq[] (for example IOAPIC cluster mode). In
* this case we have two possibilities:
*
* 1) the resulting affinity mask is empty; that is
* this the down'd cpu is the last cpu in the irq's
* affinity mask, or
*
* 2) the resulting affinity mask is no longer a
* subset of the online cpus but the affinity mask is
* not zero; that is the down'd cpu is the last online
* cpu in a user set affinity mask.
*/
if (cpumask_empty(&affinity_new) ||
!cpumask_subset(&affinity_new, &online_new))
this_count++;
}
count = 0;
@@ -400,8 +413,8 @@ int check_irq_vectors_for_cpu_disable(void)
for (vector = FIRST_EXTERNAL_VECTOR;
vector < first_system_vector; vector++) {
if (!test_bit(vector, used_vectors) &&
per_cpu(vector_irq, cpu)[vector] < 0)
count++;
IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
count++;
}
}
@@ -437,7 +450,7 @@ void fixup_irqs(void)
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
affinity = data->affinity;
affinity = irq_data_get_affinity_mask(data);
if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
@@ -505,14 +518,13 @@ void fixup_irqs(void)
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irr;
if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED)
if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
continue;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
if (irr & (1 << (vector % 32))) {
irq = __this_cpu_read(vector_irq[vector]);
desc = __this_cpu_read(vector_irq[vector]);
desc = irq_to_desc(irq);
raw_spin_lock(&desc->lock);
data = irq_desc_get_irq_data(desc);
chip = irq_data_get_irq_chip(data);
@@ -523,7 +535,7 @@ void fixup_irqs(void)
raw_spin_unlock(&desc->lock);
}
if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
}
}
#endif

查看文件

@@ -148,21 +148,21 @@ void do_softirq_own_stack(void)
call_on_stack(__do_softirq, isp);
}
bool handle_irq(unsigned irq, struct pt_regs *regs)
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
struct irq_desc *desc;
unsigned int irq;
int overflow;
overflow = check_stack_overflow();
desc = irq_to_desc(irq);
if (unlikely(!desc))
if (IS_ERR_OR_NULL(desc))
return false;
irq = irq_desc_get_irq(desc);
if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);
generic_handle_irq_desc(irq, desc);
}
return true;

查看文件

@@ -68,16 +68,13 @@ static inline void stack_overflow_check(struct pt_regs *regs)
#endif
}
bool handle_irq(unsigned irq, struct pt_regs *regs)
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
struct irq_desc *desc;
stack_overflow_check(regs);
desc = irq_to_desc(irq);
if (unlikely(!desc))
if (unlikely(IS_ERR_OR_NULL(desc)))
return false;
generic_handle_irq_desc(irq, desc);
generic_handle_irq_desc(irq_desc_get_irq(desc), desc);
return true;
}

查看文件

@@ -52,7 +52,7 @@ static struct irqaction irq2 = {
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
int vector_used_by_percpu_irq(unsigned int vector)
@@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
int cpu;
for_each_online_cpu(cpu) {
if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED)
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
return 1;
}
@@ -94,7 +94,7 @@ void __init init_IRQ(void)
* irq's migrate etc.
*/
for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i;
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
x86_init.irqs.intr_init();
}

查看文件

@@ -223,9 +223,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
memset(&params->hd0_info, 0, sizeof(params->hd0_info));
memset(&params->hd1_info, 0, sizeof(params->hd1_info));
/* Default sysdesc table */
params->sys_desc_table.length = 0;
if (image->type == KEXEC_TYPE_CRASH) {
ret = crash_setup_memmap_entries(image, params);
if (ret)

查看文件

@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
a->handler, whole_msecs, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
static int nmi_handle(unsigned int type, struct pt_regs *regs)
{
struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *a;
@@ -213,7 +213,7 @@ static void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
/* check to see if anyone registered against these types of errors */
if (nmi_handle(NMI_SERR, regs, false))
if (nmi_handle(NMI_SERR, regs))
return;
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
@@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
unsigned long i;
/* check to see if anyone registered against these types of errors */
if (nmi_handle(NMI_IO_CHECK, regs, false))
if (nmi_handle(NMI_IO_CHECK, regs))
return;
pr_emerg(
@@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
* as only the first one is ever run (unless it can actually determine
* if it caused the NMI)
*/
handled = nmi_handle(NMI_UNKNOWN, regs, false);
handled = nmi_handle(NMI_UNKNOWN, regs);
if (handled) {
__this_cpu_add(nmi_stats.unknown, handled);
return;
@@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs)
__this_cpu_write(last_nmi_rip, regs->ip);
handled = nmi_handle(NMI_LOCAL, regs, b2b);
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled);
if (handled) {
/*

查看文件

@@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.read_tscp = native_read_tscp,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,

查看文件

@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
@@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc);
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
if (pv_is_native_spin_unlock()) {

查看文件

@@ -1,371 +0,0 @@
/*
* Intel Atom SOC Power Management Controller Driver
* Copyright (c) 2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/io.h>
#include <asm/pmc_atom.h>
struct pmc_dev {
u32 base_addr;
void __iomem *regmap;
#ifdef CONFIG_DEBUG_FS
struct dentry *dbgfs_dir;
#endif /* CONFIG_DEBUG_FS */
};
static struct pmc_dev pmc_device;
static u32 acpi_base_addr;
struct pmc_bit_map {
const char *name;
u32 bit_mask;
};
static const struct pmc_bit_map dev_map[] = {
{"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA},
{"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1},
{"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2},
{"3 - LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1},
{"4 - LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2},
{"5 - LPSS1_F5_SPI", BIT_LPSS1_F5_SPI},
{"6 - LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX},
{"7 - LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX},
{"8 - SCC_EMMC", BIT_SCC_EMMC},
{"9 - SCC_SDIO", BIT_SCC_SDIO},
{"10 - SCC_SDCARD", BIT_SCC_SDCARD},
{"11 - SCC_MIPI", BIT_SCC_MIPI},
{"12 - HDA", BIT_HDA},
{"13 - LPE", BIT_LPE},
{"14 - OTG", BIT_OTG},
{"15 - USH", BIT_USH},
{"16 - GBE", BIT_GBE},
{"17 - SATA", BIT_SATA},
{"18 - USB_EHCI", BIT_USB_EHCI},
{"19 - SEC", BIT_SEC},
{"20 - PCIE_PORT0", BIT_PCIE_PORT0},
{"21 - PCIE_PORT1", BIT_PCIE_PORT1},
{"22 - PCIE_PORT2", BIT_PCIE_PORT2},
{"23 - PCIE_PORT3", BIT_PCIE_PORT3},
{"24 - LPSS2_F0_DMA", BIT_LPSS2_F0_DMA},
{"25 - LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1},
{"26 - LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2},
{"27 - LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3},
{"28 - LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4},
{"29 - LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5},
{"30 - LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6},
{"31 - LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7},
{"32 - SMB", BIT_SMB},
{"33 - OTG_SS_PHY", BIT_OTG_SS_PHY},
{"34 - USH_SS_PHY", BIT_USH_SS_PHY},
{"35 - DFX", BIT_DFX},
};
static const struct pmc_bit_map pss_map[] = {
{"0 - GBE", PMC_PSS_BIT_GBE},
{"1 - SATA", PMC_PSS_BIT_SATA},
{"2 - HDA", PMC_PSS_BIT_HDA},
{"3 - SEC", PMC_PSS_BIT_SEC},
{"4 - PCIE", PMC_PSS_BIT_PCIE},
{"5 - LPSS", PMC_PSS_BIT_LPSS},
{"6 - LPE", PMC_PSS_BIT_LPE},
{"7 - DFX", PMC_PSS_BIT_DFX},
{"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL},
{"9 - USH_SUS", PMC_PSS_BIT_USH_SUS},
{"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS},
{"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA},
{"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL},
{"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS},
{"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK},
{"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA},
{"16 - USB", PMC_PSS_BIT_USB},
{"17 - USB_SUS", PMC_PSS_BIT_USB_SUS},
};
static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
{
return readl(pmc->regmap + reg_offset);
}
static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val)
{
writel(val, pmc->regmap + reg_offset);
}
static void pmc_power_off(void)
{
u16 pm1_cnt_port;
u32 pm1_cnt_value;
pr_info("Preparing to enter system sleep state S5\n");
pm1_cnt_port = acpi_base_addr + PM1_CNT;
pm1_cnt_value = inl(pm1_cnt_port);
pm1_cnt_value &= SLEEP_TYPE_MASK;
pm1_cnt_value |= SLEEP_TYPE_S5;
pm1_cnt_value |= SLEEP_ENABLE;
outl(pm1_cnt_value, pm1_cnt_port);
}
static void pmc_hw_reg_setup(struct pmc_dev *pmc)
{
/*
* Disable PMC S0IX_WAKE_EN events coming from:
* - LPC clock run
* - GPIO_SUS ored dedicated IRQs
* - GPIO_SCORE ored dedicated IRQs
* - GPIO_SUS shared IRQ
* - GPIO_SCORE shared IRQ
*/
pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING);
}
#ifdef CONFIG_DEBUG_FS
static int pmc_dev_state_show(struct seq_file *s, void *unused)
{
struct pmc_dev *pmc = s->private;
u32 func_dis, func_dis_2, func_dis_index;
u32 d3_sts_0, d3_sts_1, d3_sts_index;
int dev_num, dev_index, reg_index;
func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS);
func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2);
d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0);
d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1);
dev_num = ARRAY_SIZE(dev_map);
for (dev_index = 0; dev_index < dev_num; dev_index++) {
reg_index = dev_index / PMC_REG_BIT_WIDTH;
if (reg_index) {
func_dis_index = func_dis_2;
d3_sts_index = d3_sts_1;
} else {
func_dis_index = func_dis;
d3_sts_index = d3_sts_0;
}
seq_printf(s, "Dev: %-32s\tState: %s [%s]\n",
dev_map[dev_index].name,
dev_map[dev_index].bit_mask & func_dis_index ?
"Disabled" : "Enabled ",
dev_map[dev_index].bit_mask & d3_sts_index ?
"D3" : "D0");
}
return 0;
}
static int pmc_dev_state_open(struct inode *inode, struct file *file)
{
return single_open(file, pmc_dev_state_show, inode->i_private);
}
static const struct file_operations pmc_dev_state_ops = {
.open = pmc_dev_state_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int pmc_pss_state_show(struct seq_file *s, void *unused)
{
struct pmc_dev *pmc = s->private;
u32 pss = pmc_reg_read(pmc, PMC_PSS);
int pss_index;
for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) {
seq_printf(s, "Island: %-32s\tState: %s\n",
pss_map[pss_index].name,
pss_map[pss_index].bit_mask & pss ? "Off" : "On");
}
return 0;
}
static int pmc_pss_state_open(struct inode *inode, struct file *file)
{
return single_open(file, pmc_pss_state_show, inode->i_private);
}
static const struct file_operations pmc_pss_state_ops = {
.open = pmc_pss_state_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
{
struct pmc_dev *pmc = s->private;
u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr;
s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT;
s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT;
s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT;
s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT;
s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT;
seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr);
seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr);
seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr);
seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr);
seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr);
return 0;
}
static int pmc_sleep_tmr_open(struct inode *inode, struct file *file)
{
return single_open(file, pmc_sleep_tmr_show, inode->i_private);
}
static const struct file_operations pmc_sleep_tmr_ops = {
.open = pmc_sleep_tmr_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
{
debugfs_remove_recursive(pmc->dbgfs_dir);
}
static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
{
struct dentry *dir, *f;
dir = debugfs_create_dir("pmc_atom", NULL);
if (!dir)
return -ENOMEM;
pmc->dbgfs_dir = dir;
f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
dir, pmc, &pmc_dev_state_ops);
if (!f) {
dev_err(&pdev->dev, "dev_state register failed\n");
goto err;
}
f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO,
dir, pmc, &pmc_pss_state_ops);
if (!f) {
dev_err(&pdev->dev, "pss_state register failed\n");
goto err;
}
f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
dir, pmc, &pmc_sleep_tmr_ops);
if (!f) {
dev_err(&pdev->dev, "sleep_state register failed\n");
goto err;
}
return 0;
err:
pmc_dbgfs_unregister(pmc);
return -ENODEV;
}
#else
static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
{
return 0;
}
#endif /* CONFIG_DEBUG_FS */
static int pmc_setup_dev(struct pci_dev *pdev)
{
struct pmc_dev *pmc = &pmc_device;
int ret;
/* Obtain ACPI base address */
pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr);
acpi_base_addr &= ACPI_BASE_ADDR_MASK;
/* Install power off function */
if (acpi_base_addr != 0 && pm_power_off == NULL)
pm_power_off = pmc_power_off;
pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr);
pmc->base_addr &= PMC_BASE_ADDR_MASK;
pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN);
if (!pmc->regmap) {
dev_err(&pdev->dev, "error: ioremap failed\n");
return -ENOMEM;
}
/* PMC hardware registers setup */
pmc_hw_reg_setup(pmc);
ret = pmc_dbgfs_register(pmc, pdev);
if (ret) {
iounmap(pmc->regmap);
}
return ret;
}
/*
* Data for PCI driver interface
*
* This data only exists for exporting the supported
* PCI ids via MODULE_DEVICE_TABLE. We do not actually
* register a pci_driver, because lpc_ich will register
* a driver on the same PCI id.
*/
static const struct pci_device_id pmc_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_VLV_PMC) },
{ 0, },
};
MODULE_DEVICE_TABLE(pci, pmc_pci_ids);
static int __init pmc_atom_init(void)
{
struct pci_dev *pdev = NULL;
const struct pci_device_id *ent;
/* We look for our device - PCU PMC
* we assume that there is max. one device.
*
* We can't use plain pci_driver mechanism,
* as the device is really a multiple function device,
* main driver that binds to the pci_device is lpc_ich
* and have to find & bind to the device this way.
*/
for_each_pci_dev(pdev) {
ent = pci_match_id(pmc_pci_ids, pdev);
if (ent)
return pmc_setup_dev(pdev);
}
/* Device not found. */
return -ENODEV;
}
module_init(pmc_atom_init);
/* no module_exit, this driver shouldn't be unloaded */
MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>");
MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface");
MODULE_LICENSE("GPL v2");

查看文件

@@ -29,6 +29,8 @@
#include <asm/debugreg.h>
#include <asm/nmi.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -110,6 +112,8 @@ void exit_thread(void)
kfree(bp);
}
free_vm86(t);
fpu__drop(fpu);
}
@@ -319,6 +323,7 @@ void stop_this_cpu(void *dummy)
*/
set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
for (;;)
halt();

查看文件

@@ -53,6 +53,7 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");

查看文件

@@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all)
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
@@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task)
dead_task->mm->context.ldt->size);
BUG();
}
#endif
}
}
@@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
__USER_CS, __USER_DS, 0);
}
#ifdef CONFIG_IA32_EMULATION
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
#ifdef CONFIG_COMPAT
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
{
start_thread_common(regs, new_ip, new_sp,
test_thread_flag(TIF_X32)

查看文件

@@ -37,12 +37,10 @@
#include <asm/proto.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/syscall.h>
#include "tls.h"
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
enum x86_regset {
REGSET_GENERAL,
REGSET_FP,
@@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target,
return ret;
}
static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
unsigned long addr = caddr;
unsigned long data = cdata;
void __user *datap = compat_ptr(data);
int ret;
__u32 val;
switch (request) {
case PTRACE_PEEKUSR:
ret = getreg32(child, addr, &val);
if (ret == 0)
ret = put_user(val, (__u32 __user *)datap);
break;
case PTRACE_POKEUSR:
ret = putreg32(child, addr, data);
break;
case PTRACE_GETREGS: /* Get all gp regs from the child. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct32),
datap);
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_x86_32_view,
REGSET_GENERAL, 0,
sizeof(struct user_regs_struct32),
datap);
case PTRACE_GETFPREGS: /* Get the child FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_FP, 0,
sizeof(struct user_i387_ia32_struct),
datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
return copy_regset_from_user(
child, &user_x86_32_view, REGSET_FP,
0, sizeof(struct user_i387_ia32_struct), datap);
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
return copy_regset_from_user(child, &user_x86_32_view,
REGSET_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
case PTRACE_GET_THREAD_AREA:
case PTRACE_SET_THREAD_AREA:
return arch_ptrace(child, request, addr, data);
default:
return compat_ptrace_request(child, request, addr, data);
}
return ret;
}
#endif /* CONFIG_IA32_EMULATION */
#ifdef CONFIG_X86_X32_ABI
static long x32_arch_ptrace(struct task_struct *child,
compat_long_t request, compat_ulong_t caddr,
@@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child,
}
#endif
#ifdef CONFIG_COMPAT
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
unsigned long addr = caddr;
unsigned long data = cdata;
void __user *datap = compat_ptr(data);
int ret;
__u32 val;
#ifdef CONFIG_X86_X32_ABI
if (!is_ia32_task())
return x32_arch_ptrace(child, request, caddr, cdata);
#endif
switch (request) {
case PTRACE_PEEKUSR:
ret = getreg32(child, addr, &val);
if (ret == 0)
ret = put_user(val, (__u32 __user *)datap);
break;
case PTRACE_POKEUSR:
ret = putreg32(child, addr, data);
break;
case PTRACE_GETREGS: /* Get all gp regs from the child. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct32),
datap);
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_x86_32_view,
REGSET_GENERAL, 0,
sizeof(struct user_regs_struct32),
datap);
case PTRACE_GETFPREGS: /* Get the child FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_FP, 0,
sizeof(struct user_i387_ia32_struct),
datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
return copy_regset_from_user(
child, &user_x86_32_view, REGSET_FP,
0, sizeof(struct user_i387_ia32_struct), datap);
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
return copy_regset_to_user(child, &user_x86_32_view,
REGSET_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
return copy_regset_from_user(child, &user_x86_32_view,
REGSET_XFP, 0,
sizeof(struct user32_fxsr_struct),
datap);
case PTRACE_GET_THREAD_AREA:
case PTRACE_SET_THREAD_AREA:
return arch_ptrace(child, request, addr, data);
default:
return compat_ptrace_request(child, request, addr, data);
}
return ret;
#ifdef CONFIG_IA32_EMULATION
return ia32_arch_ptrace(child, request, caddr, cdata);
#else
return 0;
#endif
}
#endif /* CONFIG_IA32_EMULATION */
#endif /* CONFIG_COMPAT */
#ifdef CONFIG_X86_64
@@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
/* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
}
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) {
audit_syscall_entry(regs->orig_ax, regs->di,
regs->si, regs->dx, regs->r10);
} else
#endif
{
audit_syscall_entry(regs->orig_ax, regs->bx,
regs->cx, regs->dx, regs->si);
}
}
/*
* We can return 0 to resume the syscall or anything else to go to phase
* 2. If we resume the syscall, we need to put something appropriate in
* regs->orig_ax.
*
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
* are fully functional.
*
* For phase 2's benefit, our return value is:
* 0: resume the syscall
* 1: go to phase 2; no seccomp phase 2 needed
* anything else: go to phase 2; pass return value to seccomp
*/
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
{
unsigned long ret = 0;
u32 work;
BUG_ON(regs != task_pt_regs(current));
work = ACCESS_ONCE(current_thread_info()->flags) &
_TIF_WORK_SYSCALL_ENTRY;
/*
* If TIF_NOHZ is set, we are required to call user_exit() before
* doing anything that could touch RCU.
*/
if (work & _TIF_NOHZ) {
user_exit();
work &= ~_TIF_NOHZ;
}
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
* code, and keeping seccomp fast is probably more valuable
* than the rest of this.
*/
if (work & _TIF_SECCOMP) {
struct seccomp_data sd;
sd.arch = arch;
sd.nr = regs->orig_ax;
sd.instruction_pointer = regs->ip;
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) {
sd.args[0] = regs->di;
sd.args[1] = regs->si;
sd.args[2] = regs->dx;
sd.args[3] = regs->r10;
sd.args[4] = regs->r8;
sd.args[5] = regs->r9;
} else
#endif
{
sd.args[0] = regs->bx;
sd.args[1] = regs->cx;
sd.args[2] = regs->dx;
sd.args[3] = regs->si;
sd.args[4] = regs->di;
sd.args[5] = regs->bp;
}
BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
ret = seccomp_phase1(&sd);
if (ret == SECCOMP_PHASE1_SKIP) {
regs->orig_ax = -1;
ret = 0;
} else if (ret != SECCOMP_PHASE1_OK) {
return ret; /* Go directly to phase 2 */
}
work &= ~_TIF_SECCOMP;
}
#endif
/* Do our best to finish without phase 2. */
if (work == 0)
return ret; /* seccomp and/or nohz only (ret == 0 here) */
#ifdef CONFIG_AUDITSYSCALL
if (work == _TIF_SYSCALL_AUDIT) {
/*
* If there is no more work to be done except auditing,
* then audit in phase 1. Phase 2 always audits, so, if
* we audit here, then we can't go on to phase 2.
*/
do_audit_syscall_entry(regs, arch);
return 0;
}
#endif
return 1; /* Something is enabled that we can't handle in phase 1 */
}
/* Returns the syscall nr to run (which should match regs->orig_ax). */
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
unsigned long phase1_result)
{
long ret = 0;
u32 work = ACCESS_ONCE(current_thread_info()->flags) &
_TIF_WORK_SYSCALL_ENTRY;
BUG_ON(regs != task_pt_regs(current));
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
* If user-mode had set TF itself, then it's still clear from
* do_debug() and we need to set it again to restore the user
* state. If we entered on the slow path, TF was already set.
*/
if (work & _TIF_SINGLESTEP)
regs->flags |= X86_EFLAGS_TF;
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
* they can see any changes made by a seccomp tracer.
*/
if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
/* seccomp failures shouldn't expose any additional code. */
return -1;
}
#endif
if (unlikely(work & _TIF_SYSCALL_EMU))
ret = -1L;
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
tracehook_report_syscall_entry(regs))
ret = -1L;
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
do_audit_syscall_entry(regs, arch);
return ret ?: regs->orig_ax;
}
long syscall_trace_enter(struct pt_regs *regs)
{
u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
if (phase1_result == 0)
return regs->orig_ax;
else
return syscall_trace_enter_phase2(regs, arch, phase1_result);
}
void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
/*
* We may come here right after calling schedule_user()
* or do_notify_resume(), in which case we can be in RCU
* user mode.
*/
user_exit();
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->ax);
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
* We already reported this syscall instruction in
* syscall_trace_enter().
*/
step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
!test_thread_flag(TIF_SYSCALL_EMU);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
user_enter();
}

查看文件

@@ -916,11 +916,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
apm_info.bios = boot_params.apm_bios_info;
ist_info = boot_params.ist_info;
if (boot_params.sys_desc_table.length != 0) {
machine_id = boot_params.sys_desc_table.table[0];
machine_submodel_id = boot_params.sys_desc_table.table[1];
BIOS_revision = boot_params.sys_desc_table.table[2];
}
#endif
saved_video_mode = boot_params.hdr.vid_mode;
bootloader_type = boot_params.hdr.type_of_loader;

查看文件

@@ -31,11 +31,11 @@
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
#include <asm/vm86.h>
#ifdef CONFIG_X86_64
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/sys_ia32.h>
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
@@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
bool stepping, failed;
struct fpu *fpu = &current->thread.fpu;
if (v8086_mode(regs))
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*/
static void do_signal(struct pt_regs *regs)
void do_signal(struct pt_regs *regs)
{
struct ksignal ksig;
@@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
/*
* notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
__visible void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
user_exit();
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;

查看文件

@@ -0,0 +1,95 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
{
int err = 0;
bool ia32 = test_thread_flag(TIF_IA32);
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
put_user_try {
/* If you change siginfo_t structure, please make sure that
this code is fixed accordingly.
It should never copy any pad contained in the structure
to avoid security leaks, but must copy the generic
3 ints plus the relevant union member. */
put_user_ex(from->si_signo, &to->si_signo);
put_user_ex(from->si_errno, &to->si_errno);
put_user_ex((short)from->si_code, &to->si_code);
if (from->si_code < 0) {
put_user_ex(from->si_pid, &to->si_pid);
put_user_ex(from->si_uid, &to->si_uid);
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
} else {
/*
* First 32bits of unions are always present:
* si_pid === si_band === si_tid === si_addr(LS half)
*/
put_user_ex(from->_sifields._pad[0],
&to->_sifields._pad[0]);
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
break;
case __SI_SYS >> 16:
put_user_ex(from->si_syscall, &to->si_syscall);
put_user_ex(from->si_arch, &to->si_arch);
break;
case __SI_CHLD >> 16:
if (ia32) {
put_user_ex(from->si_utime, &to->si_utime);
put_user_ex(from->si_stime, &to->si_stime);
} else {
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
}
put_user_ex(from->si_status, &to->si_status);
/* FALL THROUGH */
default:
case __SI_KILL >> 16:
put_user_ex(from->si_uid, &to->si_uid);
break;
case __SI_POLL >> 16:
put_user_ex(from->si_fd, &to->si_fd);
break;
case __SI_TIMER >> 16:
put_user_ex(from->si_overrun, &to->si_overrun);
put_user_ex(ptr_to_compat(from->si_ptr),
&to->si_ptr);
break;
/* This is not generated by the kernel as of now. */
case __SI_RT >> 16:
case __SI_MESGQ >> 16:
put_user_ex(from->si_uid, &to->si_uid);
put_user_ex(from->si_int, &to->si_int);
break;
}
}
} put_user_catch(err);
return err;
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
int err = 0;
u32 ptr32;
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
return -EFAULT;
get_user_try {
get_user_ex(to->si_signo, &from->si_signo);
get_user_ex(to->si_errno, &from->si_errno);
get_user_ex(to->si_code, &from->si_code);
get_user_ex(to->si_pid, &from->si_pid);
get_user_ex(to->si_uid, &from->si_uid);
get_user_ex(ptr32, &from->si_ptr);
to->si_ptr = compat_ptr(ptr32);
} get_user_catch(err);
return err;
}

查看文件

@@ -30,6 +30,7 @@
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
finish:
local_irq_save(flags);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
local_irq_restore(flags);
}

查看文件

@@ -97,8 +97,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -146,16 +144,11 @@ static void smp_callin(void)
/*
* If waken up by an INIT in an 82489DX configuration
* we may get here before an INIT-deassert IPI reaches
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*
* Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
* cpu_callout_mask guarantees we don't get here before
* an INIT_deassert IPI reaches our local APIC, so it is
* now safe to touch our local APIC.
*/
cpuid = smp_processor_id();
if (apic->wait_for_init_deassert && cpuid)
while (!atomic_read(&init_deasserted))
cpu_relax();
/*
* (This works even if the APIC is not enabled.)
@@ -620,7 +613,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
send_status = safe_apic_wait_icr_idle();
mb();
atomic_set(&init_deasserted, 1);
/*
* Should we send STARTUP IPIs ?
@@ -665,7 +657,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(300);
if (init_udelay)
udelay(300);
pr_debug("Startup point 1\n");
@@ -675,7 +668,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
if (init_udelay)
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -859,8 +853,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* the targeted processor.
*/
atomic_set(&init_deasserted, 0);
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
pr_debug("Setting warm reset code and vector.\n");
@@ -898,7 +890,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
if (!boot_error) {
/*
* Wait 10s total for a response from AP
* Wait 10s total for first sign of life from AP
*/
boot_error = -1;
timeout = jiffies + 10*HZ;
@@ -911,7 +903,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
boot_error = 0;
break;
}
udelay(100);
schedule();
}
}
@@ -927,7 +918,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* for the MTRR work(triggered by the AP coming online)
* to be completed in the stop machine context.
*/
udelay(100);
schedule();
}
}
@@ -1358,7 +1348,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
}
static void __ref remove_cpu_from_maps(int cpu)
static void remove_cpu_from_maps(int cpu)
{
set_cpu_online(cpu, false);
cpumask_clear_cpu(cpu, cpu_callout_mask);

查看文件

@@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
return addr;
}
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* We'll assume that the code segments in the GDT
* are all zero-based. That is largely true: the
@@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
}
mutex_unlock(&child->mm->context.lock);
}
#endif
return addr;
}

查看文件

@@ -57,7 +57,7 @@ __setup("cpu0_hotplug", enable_cpu0_hotplug);
*
* This is only called for debugging CPU offline/online feature.
*/
int __ref _debug_hotplug_cpu(int cpu, int action)
int _debug_hotplug_cpu(int cpu, int action)
{
struct device *dev = get_cpu_device(cpu);
int ret;
@@ -104,7 +104,7 @@ static int __init debug_hotplug_cpu(void)
late_initcall_sync(debug_hotplug_cpu);
#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
int __ref arch_register_cpu(int num)
int arch_register_cpu(int num)
{
struct cpuinfo_x86 *c = &cpu_data(num);

查看文件

@@ -12,10 +12,5 @@
*/
u64 notrace trace_clock_x86_tsc(void)
{
u64 ret;
rdtsc_barrier();
rdtscll(ret);
return ret;
return rdtsc_ordered();
}

查看文件

@@ -62,6 +62,7 @@
#include <asm/fpu/xstate.h>
#include <asm/trace/mpx.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
preempt_count_dec();
}
enum ctx_state ist_enter(struct pt_regs *regs)
void ist_enter(struct pt_regs *regs)
{
enum ctx_state prev_state;
if (user_mode(regs)) {
/* Other than that, we're just an exception. */
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
} else {
/*
* We might have interrupted pretty much anything. In
@@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs)
* but we need to notify RCU.
*/
rcu_nmi_enter();
prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
}
/*
* We are atomic because we're on the IST stack (or we're on x86_32,
* in which case we still shouldn't schedule).
*
* This must be after exception_enter(), because exception_enter()
* won't do anything if in_interrupt() returns true.
* We are atomic because we're on the IST stack; or we're on
* x86_32, in which case we still shouldn't schedule; or we're
* on x86_64 and entered from user mode, in which case we're
* still atomic unless ist_begin_non_atomic is called.
*/
preempt_count_add(HARDIRQ_OFFSET);
/* This code is a bit fragile. Test it. */
rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
return prev_state;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
}
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
void ist_exit(struct pt_regs *regs)
{
/* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET);
if (user_mode(regs))
return exception_exit(prev_state);
else
if (!user_mode(regs))
rcu_nmi_exit();
}
@@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
* begins a non-atomic section within an ist_enter()/ist_exit() region.
* Callers are responsible for enabling interrupts themselves inside
* the non-atomic section, and callers must call is_end_non_atomic()
* the non-atomic section, and callers must call ist_end_non_atomic()
* before ist_exit().
*/
void ist_begin_non_atomic(struct pt_regs *regs)
@@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap);
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
unsigned long trapnr, int signr)
{
enum ctx_state prev_state = exception_enter();
siginfo_t info;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
conditional_sti(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
exception_exit(prev_state);
}
#define DO_ERROR(trapnr, signr, str, name) \
@@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
}
#endif
ist_enter(regs); /* Discard prev_state because we won't return. */
ist_enter(regs);
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
tsk->thread.error_code = error_code;
@@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
const struct bndcsr *bndcsr;
siginfo_t *info;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
goto exit;
return;
conditional_sti(regs);
if (!user_mode(regs))
@@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
die("bounds", regs, error_code);
}
exit:
exception_exit(prev_state);
return;
exit_trap:
/*
* This path out is for all the cases where we could not
@@ -447,35 +435,33 @@ exit_trap:
* time..
*/
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
exception_exit(prev_state);
}
dotraplinkage void
do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
enum ctx_state prev_state;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
conditional_sti(regs);
if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto exit;
return;
}
tsk = current;
if (!user_mode(regs)) {
if (fixup_exception(regs))
goto exit;
return;
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die("general protection fault", regs, error_code);
goto exit;
return;
}
tsk->thread.error_code = error_code;
@@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
exit:
exception_exit(prev_state);
}
NOKPROBE_SYMBOL(do_general_protection);
/* May run on IST stack. */
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
#ifdef CONFIG_DYNAMIC_FTRACE
/*
* ftrace must be first, everything else may cause a recursive crash.
@@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
if (poke_int3_handler(regs))
return;
prev_state = ist_enter(regs);
ist_enter(regs);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
@@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
debug_stack_usage_dec();
exit:
ist_exit(regs, prev_state);
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_int3);
@@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
enum ctx_state prev_state;
int user_icebp = 0;
unsigned long dr6;
int si_code;
prev_state = ist_enter(regs);
ist_enter(regs);
get_debugreg(dr6, 6);
@@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();
exit:
ist_exit(regs, prev_state);
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
math_error(regs, error_code, X86_TRAP_MF);
exception_exit(prev_state);
}
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
math_error(regs, error_code, X86_TRAP_XF);
exception_exit(prev_state);
}
dotraplinkage void
@@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
enum ctx_state prev_state;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
@@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
info.regs = regs;
math_emulate(&info);
exception_exit(prev_state);
return;
}
#endif
@@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
exception_exit(prev_state);
}
NOKPROBE_SYMBOL(do_device_not_available);
@@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available);
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
siginfo_t info;
enum ctx_state prev_state;
prev_state = exception_enter();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
local_irq_enable();
info.si_signo = SIGILL;
@@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
&info);
}
exception_exit(prev_state);
}
#endif

查看文件

@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
data = cyc2ns_write_begin(cpu);
rdtscll(tsc_now);
tsc_now = rdtsc();
ns_now = cycles_2_ns(tsc_now);
/*
@@ -290,12 +290,20 @@ u64 native_sched_clock(void)
}
/* read the Time Stamp Counter: */
rdtscll(tsc_now);
tsc_now = rdtsc();
/* return the value in ns */
return cycles_2_ns(tsc_now);
}
/*
* Generate a sched_clock if you already have a TSC value.
*/
u64 native_sched_clock_from_tsc(u64 tsc)
{
return cycles_2_ns(tsc);
}
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
@@ -308,12 +316,6 @@ unsigned long long
sched_clock(void) __attribute__((alias("native_sched_clock")));
#endif
unsigned long long native_read_tsc(void)
{
return __native_read_tsc();
}
EXPORT_SYMBOL(native_read_tsc);
int check_tsc_unstable(void)
{
return tsc_unstable;
@@ -976,7 +978,7 @@ static struct clocksource clocksource_tsc;
*/
static cycle_t read_tsc(struct clocksource *cs)
{
return (cycle_t)get_cycles();
return (cycle_t)rdtsc_ordered();
}
/*

查看文件

@@ -39,16 +39,15 @@ static cycles_t max_warp;
static int nr_warps;
/*
* TSC-warp measurement loop running on both CPUs:
* TSC-warp measurement loop running on both CPUs. This is not called
* if there is no TSC.
*/
static void check_tsc_warp(unsigned int timeout)
{
cycles_t start, now, prev, end;
int i;
rdtsc_barrier();
start = get_cycles();
rdtsc_barrier();
start = rdtsc_ordered();
/*
* The measurement runs for 'timeout' msecs:
*/
@@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout)
*/
arch_spin_lock(&sync_lock);
prev = last_tsc;
rdtsc_barrier();
now = get_cycles();
rdtsc_barrier();
now = rdtsc_ordered();
last_tsc = now;
arch_spin_unlock(&sync_lock);
@@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu)
/*
* No need to check if we already know that the TSC is not
* synchronized:
* synchronized or if we have no TSC.
*/
if (unsynchronized_tsc())
return;
@@ -190,6 +187,7 @@ void check_tsc_sync_target(void)
{
int cpus = 2;
/* Also aborts if there is no TSC. */
if (unsynchronized_tsc() || tsc_clocksource_reliable)
return;

查看文件

@@ -985,3 +985,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return -1;
}
bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
struct pt_regs *regs)
{
if (ctx == RP_CHECK_CALL) /* sp was just decremented by "call" insn */
return regs->sp < ret->stack;
else
return regs->sp <= ret->stack;
}

查看文件

@@ -44,11 +44,14 @@
#include <linux/ptrace.h>
#include <linux/audit.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/irq.h>
#include <asm/traps.h>
#include <asm/vm86.h>
/*
* Known problems:
@@ -66,10 +69,6 @@
*/
#define KVM86 ((struct kernel_vm86_struct *)regs)
#define VMPI KVM86->vm86plus
/*
* 8- and 16-bit register defines..
*/
@@ -81,8 +80,8 @@
/*
* virtual flags (16 and 32-bit versions)
*/
#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
#define VEFLAGS (current->thread.v86flags)
#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags))
#define VEFLAGS (current->thread.vm86->veflags)
#define set_flags(X, new, mask) \
((X) = ((X) & ~(mask)) | ((new) & (mask)))
@@ -90,46 +89,13 @@
#define SAFE_MASK (0xDD5)
#define RETURN_MASK (0xDFF)
/* convert kernel_vm86_regs to vm86_regs */
static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
const struct kernel_vm86_regs *regs)
{
int ret = 0;
/*
* kernel_vm86_regs is missing gs, so copy everything up to
* (but not including) orig_eax, and then rest including orig_eax.
*/
ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
sizeof(struct kernel_vm86_regs) -
offsetof(struct kernel_vm86_regs, pt.orig_ax));
return ret;
}
/* convert vm86_regs to kernel_vm86_regs */
static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
const struct vm86_regs __user *user,
unsigned extra)
{
int ret = 0;
/* copy ax-fs inclusive */
ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
/* copy orig_ax-__gsh+extra */
ret += copy_from_user(&regs->pt.orig_ax, &user->orig_eax,
sizeof(struct kernel_vm86_regs) -
offsetof(struct kernel_vm86_regs, pt.orig_ax) +
extra);
return ret;
}
struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
{
struct tss_struct *tss;
struct pt_regs *ret;
unsigned long tmp;
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
long err = 0;
/*
* This gets called from entry.S with interrupts disabled, but
@@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
*/
local_irq_enable();
if (!current->thread.vm86_info) {
pr_alert("no vm86_info: BAD\n");
if (!vm86 || !vm86->user_vm86) {
pr_alert("no user_vm86: BAD\n");
do_exit(SIGSEGV);
}
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
if (tmp) {
pr_alert("could not access userspace vm86_info\n");
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
sizeof(struct vm86plus_struct) :
sizeof(struct vm86_struct))) {
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
}
put_user_try {
put_user_ex(regs->pt.bx, &user->regs.ebx);
put_user_ex(regs->pt.cx, &user->regs.ecx);
put_user_ex(regs->pt.dx, &user->regs.edx);
put_user_ex(regs->pt.si, &user->regs.esi);
put_user_ex(regs->pt.di, &user->regs.edi);
put_user_ex(regs->pt.bp, &user->regs.ebp);
put_user_ex(regs->pt.ax, &user->regs.eax);
put_user_ex(regs->pt.ip, &user->regs.eip);
put_user_ex(regs->pt.cs, &user->regs.cs);
put_user_ex(regs->pt.flags, &user->regs.eflags);
put_user_ex(regs->pt.sp, &user->regs.esp);
put_user_ex(regs->pt.ss, &user->regs.ss);
put_user_ex(regs->es, &user->regs.es);
put_user_ex(regs->ds, &user->regs.ds);
put_user_ex(regs->fs, &user->regs.fs);
put_user_ex(regs->gs, &user->regs.gs);
put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
} put_user_catch(err);
if (err) {
pr_alert("could not access userspace vm86 info\n");
do_exit(SIGSEGV);
}
tss = &per_cpu(cpu_tss, get_cpu());
current->thread.sp0 = current->thread.saved_sp0;
current->thread.sysenter_cs = __KERNEL_CS;
load_sp0(tss, &current->thread);
current->thread.saved_sp0 = 0;
tsk->thread.sp0 = vm86->saved_sp0;
tsk->thread.sysenter_cs = __KERNEL_CS;
load_sp0(tss, &tsk->thread);
vm86->saved_sp0 = 0;
put_cpu();
ret = KVM86->regs32;
memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
ret->fs = current->thread.saved_fs;
set_user_gs(ret, current->thread.saved_gs);
lazy_load_gs(vm86->regs32.gs);
return ret;
regs->pt.ax = retval;
}
static void mark_screen_rdonly(struct mm_struct *mm)
@@ -200,45 +192,16 @@ out:
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86)
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
* return to 32 bit user space.
*/
struct task_struct *tsk = current;
int tmp;
if (tsk->thread.saved_sp0)
return -EPERM;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, vm86plus) -
sizeof(info.regs));
if (tmp)
return -EFAULT;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
info.regs32 = current_pt_regs();
tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk);
return 0; /* we never return here */
return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
}
SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
* return to 32 bit user space.
*/
struct task_struct *tsk;
int tmp;
struct vm86plus_struct __user *v86;
tsk = current;
switch (cmd) {
case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ:
@@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
}
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
if (tsk->thread.saved_sp0)
return -EPERM;
v86 = (struct vm86plus_struct __user *)arg;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs));
if (tmp)
return -EFAULT;
info.regs32 = current_pt_regs();
info.vm86plus.is_vm86pus = 1;
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
do_sys_vm86(&info, tsk);
return 0; /* we never return here */
return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
}
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
{
struct tss_struct *tss;
/*
* make sure the vm86() system call doesn't try to do anything silly
*/
info->regs.pt.ds = 0;
info->regs.pt.es = 0;
info->regs.pt.fs = 0;
#ifndef CONFIG_X86_32_LAZY_GS
info->regs.pt.gs = 0;
#endif
struct task_struct *tsk = current;
struct vm86 *vm86 = tsk->thread.vm86;
struct kernel_vm86_regs vm86regs;
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;
if (!vm86) {
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
return -ENOMEM;
tsk->thread.vm86 = vm86;
}
if (vm86->saved_sp0)
return -EPERM;
if (!access_ok(VERIFY_READ, user_vm86, plus ?
sizeof(struct vm86_struct) :
sizeof(struct vm86plus_struct)))
return -EFAULT;
memset(&vm86regs, 0, sizeof(vm86regs));
get_user_try {
unsigned short seg;
get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
get_user_ex(seg, &user_vm86->regs.cs);
vm86regs.pt.cs = seg;
get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
get_user_ex(seg, &user_vm86->regs.ss);
vm86regs.pt.ss = seg;
get_user_ex(vm86regs.es, &user_vm86->regs.es);
get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
get_user_ex(vm86->flags, &user_vm86->flags);
get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
} get_user_catch(err);
if (err)
return err;
if (copy_from_user(&vm86->int_revectored,
&user_vm86->int_revectored,
sizeof(struct revectored_struct)))
return -EFAULT;
if (copy_from_user(&vm86->int21_revectored,
&user_vm86->int21_revectored,
sizeof(struct revectored_struct)))
return -EFAULT;
if (plus) {
if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
sizeof(struct vm86plus_info_struct)))
return -EFAULT;
vm86->vm86plus.is_vm86pus = 1;
} else
memset(&vm86->vm86plus, 0,
sizeof(struct vm86plus_info_struct));
memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
vm86->user_vm86 = user_vm86;
/*
* The flags register is also special: we cannot trust that the user
* has set it up safely, so this makes sure interrupt etc flags are
* inherited from protected mode.
*/
VEFLAGS = info->regs.pt.flags;
info->regs.pt.flags &= SAFE_MASK;
info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
info->regs.pt.flags |= X86_VM_MASK;
VEFLAGS = vm86regs.pt.flags;
vm86regs.pt.flags &= SAFE_MASK;
vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
vm86regs.pt.flags |= X86_VM_MASK;
switch (info->cpu_type) {
vm86regs.pt.orig_ax = regs->orig_ax;
switch (vm86->cpu_type) {
case CPU_286:
tsk->thread.v86mask = 0;
vm86->veflags_mask = 0;
break;
case CPU_386:
tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
break;
case CPU_486:
tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
break;
default:
tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
break;
}
/*
* Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
* Save old state
*/
info->regs32->ax = VM86_SIGNAL;
tsk->thread.saved_sp0 = tsk->thread.sp0;
tsk->thread.saved_fs = info->regs32->fs;
tsk->thread.saved_gs = get_user_gs(info->regs32);
vm86->saved_sp0 = tsk->thread.sp0;
lazy_save_gs(vm86->regs32.gs);
tss = &per_cpu(cpu_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
if (cpu_has_sep)
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
put_cpu();
tsk->thread.screen_bitmap = info->screen_bitmap;
if (info->flags & VM86_SCREEN_BITMAP)
if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
/*call __audit_syscall_exit since we do not exit via the normal paths */
#ifdef CONFIG_AUDITSYSCALL
if (unlikely(current->audit_context))
__audit_syscall_exit(1, 0);
#endif
__asm__ __volatile__(
"movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
#ifdef CONFIG_X86_32_LAZY_GS
"mov %2, %%gs\n\t"
#endif
"jmp resume_userspace"
: /* no outputs */
:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
/* we never return here */
}
static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
{
struct pt_regs *regs32;
regs32 = save_v86_state(regs16);
regs32->ax = retval;
__asm__ __volatile__("movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
"jmp resume_userspace"
: : "r" (regs32), "r" (current_thread_info()));
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
force_iret();
return regs->ax;
}
static inline void set_IF(struct kernel_vm86_regs *regs)
{
VEFLAGS |= X86_EFLAGS_VIF;
if (VEFLAGS & X86_EFLAGS_VIP)
return_to_32bit(regs, VM86_STI);
}
static inline void clear_IF(struct kernel_vm86_regs *regs)
@@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs)
static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
{
set_flags(VEFLAGS, flags, current->thread.v86mask);
set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
set_flags(regs->pt.flags, flags, SAFE_MASK);
if (flags & X86_EFLAGS_IF)
set_IF(regs);
@@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
{
set_flags(VFLAGS, flags, current->thread.v86mask);
set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
set_flags(regs->pt.flags, flags, SAFE_MASK);
if (flags & X86_EFLAGS_IF)
set_IF(regs);
@@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
if (VEFLAGS & X86_EFLAGS_VIF)
flags |= X86_EFLAGS_IF;
flags |= X86_EFLAGS_IOPL;
return flags | (VEFLAGS & current->thread.v86mask);
return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
}
static inline int is_revectored(int nr, struct revectored_struct *bitmap)
@@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
{
unsigned long __user *intr_ptr;
unsigned long segoffs;
struct vm86 *vm86 = current->thread.vm86;
if (regs->pt.cs == BIOSSEG)
goto cannot_handle;
if (is_revectored(i, &KVM86->int_revectored))
if (is_revectored(i, &vm86->int_revectored))
goto cannot_handle;
if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
goto cannot_handle;
intr_ptr = (unsigned long __user *) (i << 2);
if (get_user(segoffs, intr_ptr))
@@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
return;
cannot_handle:
return_to_32bit(regs, VM86_INTx + (i << 8));
save_v86_state(regs, VM86_INTx + (i << 8));
}
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
{
if (VMPI.is_vm86pus) {
struct vm86 *vm86 = current->thread.vm86;
if (vm86->vm86plus.is_vm86pus) {
if ((trapno == 3) || (trapno == 1)) {
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
/* setting this flag forces the code in entry_32.S to
the path where we call save_v86_state() and change
the stack pointer to KVM86->regs32 */
set_thread_flag(TIF_NOTIFY_RESUME);
save_v86_state(regs, VM86_TRAP + (trapno << 8));
return 0;
}
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
@@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
unsigned char __user *ssp;
unsigned short ip, sp, orig_flags;
int data32, pref_done;
struct vm86plus_info_struct *vmpi = &current->thread.vm86->vm86plus;
#define CHECK_IF_IN_TRAP \
if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
newflags |= X86_EFLAGS_TF
#define VM86_FAULT_RETURN do { \
if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
return_to_32bit(regs, VM86_PICRETURN); \
if (orig_flags & X86_EFLAGS_TF) \
handle_vm86_trap(regs, 0, 1); \
return; } while (0)
orig_flags = *(unsigned short *)&regs->pt.flags;
@@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
SP(regs) -= 2;
}
IP(regs) = ip;
VM86_FAULT_RETURN;
goto vm86_fault_return;
/* popf */
case 0x9d:
@@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
else
set_vflags_short(newflags, regs);
VM86_FAULT_RETURN;
goto check_vip;
}
/* int xx */
case 0xcd: {
int intno = popb(csp, ip, simulate_sigsegv);
IP(regs) = ip;
if (VMPI.vm86dbg_active) {
if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
return_to_32bit(regs, VM86_INTx + (intno << 8));
if (vmpi->vm86dbg_active) {
if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
save_v86_state(regs, VM86_INTx + (intno << 8));
return;
}
}
do_int(regs, intno, ssp, sp);
return;
@@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
} else {
set_vflags_short(newflags, regs);
}
VM86_FAULT_RETURN;
goto check_vip;
}
/* cli */
case 0xfa:
IP(regs) = ip;
clear_IF(regs);
VM86_FAULT_RETURN;
goto vm86_fault_return;
/* sti */
/*
@@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
case 0xfb:
IP(regs) = ip;
set_IF(regs);
VM86_FAULT_RETURN;
goto check_vip;
default:
return_to_32bit(regs, VM86_UNKNOWN);
save_v86_state(regs, VM86_UNKNOWN);
}
return;
check_vip:
if (VEFLAGS & X86_EFLAGS_VIP) {
save_v86_state(regs, VM86_STI);
return;
}
vm86_fault_return:
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
save_v86_state(regs, VM86_PICRETURN);
return;
}
if (orig_flags & X86_EFLAGS_TF)
handle_vm86_trap(regs, 0, X86_TRAP_DB);
return;
simulate_sigsegv:
/* FIXME: After a long discussion with Stas we finally
* agreed, that this is wrong. Here we should
@@ -720,7 +713,7 @@ simulate_sigsegv:
* should be a mixture of the two, but how do we
* get the information? [KD]
*/
return_to_32bit(regs, VM86_UNKNOWN);
save_v86_state(regs, VM86_UNKNOWN);
}
/* ---------------- vm86 special IRQ passing stuff ----------------- */