Merge branch 'linus' into x86/urgent, to be able to merge a dependent fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
@@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
CFLAGS_irq.o := -I$(src)/../include/asm/trace
|
||||
|
||||
obj-y := process_$(BITS).o signal.o
|
||||
obj-$(CONFIG_COMPAT) += signal_compat.o
|
||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
|
||||
obj-y += time.o ioport.o dumpstack.o nmi.o
|
||||
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
|
||||
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
obj-y += probe_roms.o
|
||||
@@ -107,8 +109,6 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o
|
||||
obj-$(CONFIG_PMC_ATOM) += pmc_atom.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
|
@@ -710,7 +710,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
|
||||
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
@@ -726,12 +726,6 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
|
||||
*pcpu = cpu;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* wrapper to silence section mismatch warning */
|
||||
int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
|
||||
{
|
||||
return _acpi_map_lsapic(handle, physid, pcpu);
|
||||
}
|
||||
EXPORT_SYMBOL(acpi_map_cpu);
|
||||
|
||||
int acpi_unmap_cpu(int cpu)
|
||||
|
@@ -263,7 +263,7 @@ static int apbt_clocksource_register(void)
|
||||
|
||||
/* Verify whether apbt counter works */
|
||||
t1 = dw_apb_clocksource_read(clocksource_apbt);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
@@ -273,7 +273,7 @@ static int apbt_clocksource_register(void)
|
||||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
/* APBT is the only always on clocksource, it has to work! */
|
||||
@@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void)
|
||||
old = dw_apb_clocksource_read(clocksource_apbt);
|
||||
old += loop;
|
||||
|
||||
t1 = __native_read_tsc();
|
||||
t1 = rdtsc();
|
||||
|
||||
do {
|
||||
new = dw_apb_clocksource_read(clocksource_apbt);
|
||||
} while (new < old);
|
||||
|
||||
t2 = __native_read_tsc();
|
||||
t2 = rdtsc();
|
||||
|
||||
shift = 5;
|
||||
if (unlikely(loop >> shift == 0)) {
|
||||
|
@@ -457,45 +457,45 @@ static int lapic_next_deadline(unsigned long delta,
|
||||
{
|
||||
u64 tsc;
|
||||
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the lapic timer in periodic or oneshot mode
|
||||
*/
|
||||
static void lapic_timer_setup(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
static int lapic_timer_shutdown(struct clock_event_device *evt)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned int v;
|
||||
|
||||
/* Lapic used as dummy for broadcast ? */
|
||||
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
v = apic_read(APIC_LVTT);
|
||||
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
|
||||
apic_write(APIC_LVTT, v);
|
||||
apic_write(APIC_TMICT, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
__setup_APIC_LVTT(lapic_timer_frequency,
|
||||
mode != CLOCK_EVT_MODE_PERIODIC, 1);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
v = apic_read(APIC_LVTT);
|
||||
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
|
||||
apic_write(APIC_LVTT, v);
|
||||
apic_write(APIC_TMICT, 0);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
/* Nothing to do here */
|
||||
break;
|
||||
}
|
||||
static inline int
|
||||
lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
|
||||
{
|
||||
/* Lapic used as dummy for broadcast ? */
|
||||
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
|
||||
return 0;
|
||||
|
||||
local_irq_restore(flags);
|
||||
__setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lapic_timer_set_periodic(struct clock_event_device *evt)
|
||||
{
|
||||
return lapic_timer_set_periodic_oneshot(evt, false);
|
||||
}
|
||||
|
||||
static int lapic_timer_set_oneshot(struct clock_event_device *evt)
|
||||
{
|
||||
return lapic_timer_set_periodic_oneshot(evt, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -513,15 +513,18 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
|
||||
* The local apic timer can be used for any function which is CPU local.
|
||||
*/
|
||||
static struct clock_event_device lapic_clockevent = {
|
||||
.name = "lapic",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
|
||||
| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
|
||||
.shift = 32,
|
||||
.set_mode = lapic_timer_setup,
|
||||
.set_next_event = lapic_next_event,
|
||||
.broadcast = lapic_timer_broadcast,
|
||||
.rating = 100,
|
||||
.irq = -1,
|
||||
.name = "lapic",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC |
|
||||
CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
|
||||
| CLOCK_EVT_FEAT_DUMMY,
|
||||
.shift = 32,
|
||||
.set_state_shutdown = lapic_timer_shutdown,
|
||||
.set_state_periodic = lapic_timer_set_periodic,
|
||||
.set_state_oneshot = lapic_timer_set_oneshot,
|
||||
.set_next_event = lapic_next_event,
|
||||
.broadcast = lapic_timer_broadcast,
|
||||
.rating = 100,
|
||||
.irq = -1,
|
||||
};
|
||||
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
|
||||
|
||||
@@ -592,7 +595,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
|
||||
unsigned long pm = acpi_pm_read_early();
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
switch (lapic_cal_loops++) {
|
||||
case 0:
|
||||
@@ -778,7 +781,7 @@ static int __init calibrate_APIC_clock(void)
|
||||
* Setup the apic timer manually
|
||||
*/
|
||||
levt->event_handler = lapic_cal_handler;
|
||||
lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
|
||||
lapic_timer_set_periodic(levt);
|
||||
lapic_cal_loops = -1;
|
||||
|
||||
/* Let the interrupts run */
|
||||
@@ -788,7 +791,8 @@ static int __init calibrate_APIC_clock(void)
|
||||
cpu_relax();
|
||||
|
||||
/* Stop the lapic timer */
|
||||
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
|
||||
local_irq_disable();
|
||||
lapic_timer_shutdown(levt);
|
||||
|
||||
/* Jiffies delta */
|
||||
deltaj = lapic_cal_j2 - lapic_cal_j1;
|
||||
@@ -799,8 +803,8 @@ static int __init calibrate_APIC_clock(void)
|
||||
apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
|
||||
else
|
||||
levt->features |= CLOCK_EVT_FEAT_DUMMY;
|
||||
} else
|
||||
local_irq_enable();
|
||||
}
|
||||
local_irq_enable();
|
||||
|
||||
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
|
||||
pr_warning("APIC timer disabled due to verification failure\n");
|
||||
@@ -878,7 +882,7 @@ static void local_apic_timer_interrupt(void)
|
||||
if (!evt->event_handler) {
|
||||
pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
|
||||
/* Switch it off */
|
||||
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
|
||||
lapic_timer_shutdown(evt);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1209,7 +1213,7 @@ void setup_local_APIC(void)
|
||||
long long max_loops = cpu_khz ? cpu_khz : 1000000;
|
||||
|
||||
if (cpu_has_tsc)
|
||||
rdtscll(tsc);
|
||||
tsc = rdtsc();
|
||||
|
||||
if (disable_apic) {
|
||||
disable_ioapic_support();
|
||||
@@ -1293,7 +1297,7 @@ void setup_local_APIC(void)
|
||||
}
|
||||
if (queued) {
|
||||
if (cpu_has_tsc && cpu_khz) {
|
||||
rdtscll(ntsc);
|
||||
ntsc = rdtsc();
|
||||
max_loops = (cpu_khz << 10) - (ntsc - tsc);
|
||||
} else
|
||||
max_loops--;
|
||||
|
@@ -191,7 +191,6 @@ static struct apic apic_flat = {
|
||||
.send_IPI_all = flat_send_IPI_all,
|
||||
.send_IPI_self = apic_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = default_inquire_remote_apic,
|
||||
|
||||
.read = native_apic_mem_read,
|
||||
@@ -299,7 +298,6 @@ static struct apic apic_physflat = {
|
||||
.send_IPI_all = physflat_send_IPI_all,
|
||||
.send_IPI_self = apic_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = default_inquire_remote_apic,
|
||||
|
||||
.read = native_apic_mem_read,
|
||||
|
@@ -152,7 +152,6 @@ struct apic apic_noop = {
|
||||
|
||||
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
|
||||
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = NULL,
|
||||
|
||||
.read = noop_apic_read,
|
||||
|
@@ -92,7 +92,6 @@ static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
|
||||
|
||||
write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
|
||||
|
||||
atomic_set(&init_deasserted, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -235,7 +234,6 @@ static const struct apic apic_numachip __refconst = {
|
||||
.send_IPI_self = numachip_send_IPI_self,
|
||||
|
||||
.wakeup_secondary_cpu = numachip_wakeup_secondary,
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = NULL, /* REMRD not supported */
|
||||
|
||||
.read = native_apic_mem_read,
|
||||
|
@@ -186,7 +186,6 @@ static struct apic apic_bigsmp = {
|
||||
.send_IPI_all = bigsmp_send_IPI_all,
|
||||
.send_IPI_self = default_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = true,
|
||||
.inquire_remote_apic = default_inquire_remote_apic,
|
||||
|
||||
.read = native_apic_mem_read,
|
||||
|
@@ -2541,7 +2541,7 @@ void __init setup_ioapic_dest(void)
|
||||
* Honour affinities which have been set in early boot
|
||||
*/
|
||||
if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
|
||||
mask = idata->affinity;
|
||||
mask = irq_data_get_affinity_mask(idata);
|
||||
else
|
||||
mask = apic->target_cpus();
|
||||
|
||||
|
@@ -264,7 +264,7 @@ static inline int hpet_dev_id(struct irq_domain *domain)
|
||||
|
||||
static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
|
||||
{
|
||||
hpet_msi_write(data->handler_data, msg);
|
||||
hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
|
||||
}
|
||||
|
||||
static struct irq_chip hpet_msi_controller = {
|
||||
|
@@ -111,7 +111,6 @@ static struct apic apic_default = {
|
||||
.send_IPI_all = default_send_IPI_all,
|
||||
.send_IPI_self = default_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = true,
|
||||
.inquire_remote_apic = default_inquire_remote_apic,
|
||||
|
||||
.read = native_apic_mem_read,
|
||||
|
@@ -169,8 +169,7 @@ next:
|
||||
goto next;
|
||||
|
||||
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
|
||||
if (per_cpu(vector_irq, new_cpu)[vector] >
|
||||
VECTOR_UNDEFINED)
|
||||
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
|
||||
goto next;
|
||||
}
|
||||
/* Found one! */
|
||||
@@ -182,7 +181,7 @@ next:
|
||||
cpumask_intersects(d->old_domain, cpu_online_mask);
|
||||
}
|
||||
for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
|
||||
per_cpu(vector_irq, new_cpu)[vector] = irq;
|
||||
per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
|
||||
d->cfg.vector = vector;
|
||||
cpumask_copy(d->domain, vector_cpumask);
|
||||
err = 0;
|
||||
@@ -224,15 +223,16 @@ static int assign_irq_vector_policy(int irq, int node,
|
||||
|
||||
static void clear_irq_vector(int irq, struct apic_chip_data *data)
|
||||
{
|
||||
int cpu, vector;
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
int cpu, vector;
|
||||
|
||||
raw_spin_lock_irqsave(&vector_lock, flags);
|
||||
BUG_ON(!data->cfg.vector);
|
||||
|
||||
vector = data->cfg.vector;
|
||||
for_each_cpu_and(cpu, data->domain, cpu_online_mask)
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||
|
||||
data->cfg.vector = 0;
|
||||
cpumask_clear(data->domain);
|
||||
@@ -242,12 +242,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
|
||||
return;
|
||||
}
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
|
||||
vector++) {
|
||||
if (per_cpu(vector_irq, cpu)[vector] != irq)
|
||||
if (per_cpu(vector_irq, cpu)[vector] != desc)
|
||||
continue;
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -296,7 +297,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
|
||||
struct irq_alloc_info *info = arg;
|
||||
struct apic_chip_data *data;
|
||||
struct irq_data *irq_data;
|
||||
int i, err;
|
||||
int i, err, node;
|
||||
|
||||
if (disable_apic)
|
||||
return -ENXIO;
|
||||
@@ -308,12 +309,13 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
|
||||
for (i = 0; i < nr_irqs; i++) {
|
||||
irq_data = irq_domain_get_irq_data(domain, virq + i);
|
||||
BUG_ON(!irq_data);
|
||||
node = irq_data_get_node(irq_data);
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
|
||||
data = legacy_irq_data[virq + i];
|
||||
else
|
||||
#endif
|
||||
data = alloc_apic_chip_data(irq_data->node);
|
||||
data = alloc_apic_chip_data(node);
|
||||
if (!data) {
|
||||
err = -ENOMEM;
|
||||
goto error;
|
||||
@@ -322,8 +324,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
|
||||
irq_data->chip = &lapic_controller;
|
||||
irq_data->chip_data = data;
|
||||
irq_data->hwirq = virq + i;
|
||||
err = assign_irq_vector_policy(virq + i, irq_data->node, data,
|
||||
info);
|
||||
err = assign_irq_vector_policy(virq + i, node, data, info);
|
||||
if (err)
|
||||
goto error;
|
||||
}
|
||||
@@ -403,32 +404,32 @@ int __init arch_early_irq_init(void)
|
||||
return arch_early_ioapic_init();
|
||||
}
|
||||
|
||||
/* Initialize vector_irq on a new cpu */
|
||||
static void __setup_vector_irq(int cpu)
|
||||
{
|
||||
/* Initialize vector_irq on a new cpu */
|
||||
int irq, vector;
|
||||
struct apic_chip_data *data;
|
||||
struct irq_desc *desc;
|
||||
int irq, vector;
|
||||
|
||||
/* Mark the inuse vectors */
|
||||
for_each_active_irq(irq) {
|
||||
data = apic_chip_data(irq_get_irq_data(irq));
|
||||
if (!data)
|
||||
continue;
|
||||
for_each_irq_desc(irq, desc) {
|
||||
struct irq_data *idata = irq_desc_get_irq_data(desc);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, data->domain))
|
||||
data = apic_chip_data(idata);
|
||||
if (!data || !cpumask_test_cpu(cpu, data->domain))
|
||||
continue;
|
||||
vector = data->cfg.vector;
|
||||
per_cpu(vector_irq, cpu)[vector] = irq;
|
||||
per_cpu(vector_irq, cpu)[vector] = desc;
|
||||
}
|
||||
/* Mark the free vectors */
|
||||
for (vector = 0; vector < NR_VECTORS; ++vector) {
|
||||
irq = per_cpu(vector_irq, cpu)[vector];
|
||||
if (irq <= VECTOR_UNDEFINED)
|
||||
desc = per_cpu(vector_irq, cpu)[vector];
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
continue;
|
||||
|
||||
data = apic_chip_data(irq_get_irq_data(irq));
|
||||
data = apic_chip_data(irq_desc_get_irq_data(desc));
|
||||
if (!cpumask_test_cpu(cpu, data->domain))
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
|
||||
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -448,7 +449,7 @@ void setup_vector_irq(int cpu)
|
||||
* legacy vector to irq mapping:
|
||||
*/
|
||||
for (irq = 0; irq < nr_legacy_irqs(); irq++)
|
||||
per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq;
|
||||
per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
|
||||
|
||||
__setup_vector_irq(cpu);
|
||||
}
|
||||
@@ -490,7 +491,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
|
||||
if (err) {
|
||||
struct irq_data *top = irq_get_irq_data(irq);
|
||||
|
||||
if (assign_irq_vector(irq, data, top->affinity))
|
||||
if (assign_irq_vector(irq, data,
|
||||
irq_data_get_affinity_mask(top)))
|
||||
pr_err("Failed to recover vector for irq %d\n", irq);
|
||||
return err;
|
||||
}
|
||||
@@ -538,27 +540,30 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
|
||||
|
||||
entering_ack_irq();
|
||||
|
||||
/* Prevent vectors vanishing under us */
|
||||
raw_spin_lock(&vector_lock);
|
||||
|
||||
me = smp_processor_id();
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
||||
int irq;
|
||||
unsigned int irr;
|
||||
struct irq_desc *desc;
|
||||
struct apic_chip_data *data;
|
||||
struct irq_desc *desc;
|
||||
unsigned int irr;
|
||||
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
|
||||
if (irq <= VECTOR_UNDEFINED)
|
||||
retry:
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
continue;
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (!desc)
|
||||
continue;
|
||||
if (!raw_spin_trylock(&desc->lock)) {
|
||||
raw_spin_unlock(&vector_lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock(&vector_lock);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
data = apic_chip_data(&desc->irq_data);
|
||||
data = apic_chip_data(irq_desc_get_irq_data(desc));
|
||||
if (!data)
|
||||
continue;
|
||||
|
||||
raw_spin_lock(&desc->lock);
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* Check if the irq migration is in progress. If so, we
|
||||
@@ -583,11 +588,13 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
|
||||
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
||||
goto unlock;
|
||||
}
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
||||
unlock:
|
||||
raw_spin_unlock(&desc->lock);
|
||||
}
|
||||
|
||||
raw_spin_unlock(&vector_lock);
|
||||
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
|
@@ -182,7 +182,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
return notifier_from_errno(err);
|
||||
}
|
||||
|
||||
static struct notifier_block __refdata x2apic_cpu_notifier = {
|
||||
static struct notifier_block x2apic_cpu_notifier = {
|
||||
.notifier_call = update_clusterinfo,
|
||||
};
|
||||
|
||||
@@ -272,7 +272,6 @@ static struct apic apic_x2apic_cluster = {
|
||||
.send_IPI_all = x2apic_send_IPI_all,
|
||||
.send_IPI_self = x2apic_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = NULL,
|
||||
|
||||
.read = native_apic_msr_read,
|
||||
|
@@ -128,7 +128,6 @@ static struct apic apic_x2apic_phys = {
|
||||
.send_IPI_all = x2apic_send_IPI_all,
|
||||
.send_IPI_self = x2apic_send_IPI_self,
|
||||
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = NULL,
|
||||
|
||||
.read = native_apic_msr_read,
|
||||
|
@@ -248,7 +248,6 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
|
||||
APIC_DM_STARTUP;
|
||||
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
|
||||
|
||||
atomic_set(&init_deasserted, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -414,7 +413,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
|
||||
.send_IPI_self = uv_send_IPI_self,
|
||||
|
||||
.wakeup_secondary_cpu = uv_wakeup_secondary,
|
||||
.wait_for_init_deassert = false,
|
||||
.inquire_remote_apic = NULL,
|
||||
|
||||
.read = native_apic_msr_read,
|
||||
|
@@ -919,7 +919,7 @@ recalc:
|
||||
} else if (jiffies_since_last_check > idle_period) {
|
||||
unsigned int idle_percentage;
|
||||
|
||||
idle_percentage = stime - last_stime;
|
||||
idle_percentage = cputime_to_jiffies(stime - last_stime);
|
||||
idle_percentage *= 100;
|
||||
idle_percentage /= jiffies_since_last_check;
|
||||
use_apm_idle = (idle_percentage > idle_threshold);
|
||||
|
@@ -1,4 +1,4 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/workqueue.h>
|
||||
@@ -163,6 +163,5 @@ static int start_periodic_check_for_corruption(void)
|
||||
schedule_delayed_work(&bios_check_work, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(start_periodic_check_for_corruption);
|
||||
device_initcall(start_periodic_check_for_corruption);
|
||||
|
||||
|
@@ -46,6 +46,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
|
||||
perf_event_intel_uncore_snb.o \
|
||||
perf_event_intel_uncore_snbep.o \
|
||||
perf_event_intel_uncore_nhmex.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
|
||||
endif
|
||||
|
||||
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/delay.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/mmconfig.h>
|
||||
@@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
const int K6_BUG_LOOP = 1000000;
|
||||
int n;
|
||||
void (*f_vide)(void);
|
||||
unsigned long d, d2;
|
||||
u64 d, d2;
|
||||
|
||||
printk(KERN_INFO "AMD K6 stepping B detected - ");
|
||||
|
||||
@@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
|
||||
n = K6_BUG_LOOP;
|
||||
f_vide = vide;
|
||||
rdtscl(d);
|
||||
d = rdtsc();
|
||||
while (n--)
|
||||
f_vide();
|
||||
rdtscl(d2);
|
||||
d2 = rdtsc();
|
||||
d = d2-d;
|
||||
|
||||
if (d > 20*K6_BUG_LOOP)
|
||||
@@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
/* A random value per boot for bit slice [12:upper_bit) */
|
||||
va_align.bits = get_random_int() & va_align.mask;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_MWAITX))
|
||||
use_mwaitx_delay();
|
||||
}
|
||||
|
||||
static void early_init_amd(struct cpuinfo_x86 *c)
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#include <linux/kgdb.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/stackprotector.h>
|
||||
#include <asm/perf_event.h>
|
||||
@@ -1185,10 +1186,10 @@ void syscall_init(void)
|
||||
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
||||
*/
|
||||
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
||||
wrmsrl(MSR_LSTAR, entry_SYSCALL_64);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, entry_SYSCALL_compat);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
/*
|
||||
* This only works on Intel CPUs.
|
||||
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
|
||||
@@ -1199,7 +1200,7 @@ void syscall_init(void)
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
|
||||
@@ -1488,3 +1489,20 @@ inline bool __static_cpu_has_safe(u16 bit)
|
||||
return boot_cpu_has(bit);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
|
||||
|
||||
static void bsp_resume(void)
|
||||
{
|
||||
if (this_cpu->c_bsp_resume)
|
||||
this_cpu->c_bsp_resume(&boot_cpu_data);
|
||||
}
|
||||
|
||||
static struct syscore_ops cpu_syscore_ops = {
|
||||
.resume = bsp_resume,
|
||||
};
|
||||
|
||||
static int __init init_cpu_syscore(void)
|
||||
{
|
||||
register_syscore_ops(&cpu_syscore_ops);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(init_cpu_syscore);
|
||||
|
@@ -13,6 +13,7 @@ struct cpu_dev {
|
||||
void (*c_init)(struct cpuinfo_x86 *);
|
||||
void (*c_identify)(struct cpuinfo_x86 *);
|
||||
void (*c_detect_tlb)(struct cpuinfo_x86 *);
|
||||
void (*c_bsp_resume)(struct cpuinfo_x86 *);
|
||||
int c_x86_vendor;
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Optional vendor specific routine to obtain the cache size. */
|
||||
|
@@ -371,6 +371,36 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 epb;
|
||||
|
||||
/*
|
||||
* Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
|
||||
* (x86_energy_perf_policy(8) is available to change it at run-time.)
|
||||
*/
|
||||
if (!cpu_has(c, X86_FEATURE_EPB))
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
|
||||
return;
|
||||
|
||||
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
||||
pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
|
||||
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
|
||||
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
}
|
||||
|
||||
static void intel_bsp_resume(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
|
||||
* so reinitialize it properly like during bootup:
|
||||
*/
|
||||
init_intel_energy_perf(c);
|
||||
}
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int l2 = 0;
|
||||
@@ -478,21 +508,7 @@ static void init_intel(struct cpuinfo_x86 *c)
|
||||
if (cpu_has(c, X86_FEATURE_VMX))
|
||||
detect_vmx_virtcap(c);
|
||||
|
||||
/*
|
||||
* Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not.
|
||||
* x86_energy_perf_policy(8) is available to change it at run-time
|
||||
*/
|
||||
if (cpu_has(c, X86_FEATURE_EPB)) {
|
||||
u64 epb;
|
||||
|
||||
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) {
|
||||
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
||||
pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
|
||||
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
|
||||
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
|
||||
}
|
||||
}
|
||||
init_intel_energy_perf(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@@ -747,6 +763,7 @@ static const struct cpu_dev intel_cpu_dev = {
|
||||
.c_detect_tlb = intel_detect_tlb,
|
||||
.c_early_init = early_init_intel,
|
||||
.c_init = init_intel,
|
||||
.c_bsp_resume = intel_bsp_resume,
|
||||
.c_x86_vendor = X86_VENDOR_INTEL,
|
||||
};
|
||||
|
||||
|
@@ -25,32 +25,11 @@
|
||||
*/
|
||||
#define TOPA_PMI_MARGIN 512
|
||||
|
||||
/*
|
||||
* Table of Physical Addresses bits
|
||||
*/
|
||||
enum topa_sz {
|
||||
TOPA_4K = 0,
|
||||
TOPA_8K,
|
||||
TOPA_16K,
|
||||
TOPA_32K,
|
||||
TOPA_64K,
|
||||
TOPA_128K,
|
||||
TOPA_256K,
|
||||
TOPA_512K,
|
||||
TOPA_1MB,
|
||||
TOPA_2MB,
|
||||
TOPA_4MB,
|
||||
TOPA_8MB,
|
||||
TOPA_16MB,
|
||||
TOPA_32MB,
|
||||
TOPA_64MB,
|
||||
TOPA_128MB,
|
||||
TOPA_SZ_END,
|
||||
};
|
||||
#define TOPA_SHIFT 12
|
||||
|
||||
static inline unsigned int sizes(enum topa_sz tsz)
|
||||
static inline unsigned int sizes(unsigned int tsz)
|
||||
{
|
||||
return 1 << (tsz + 12);
|
||||
return 1 << (tsz + TOPA_SHIFT);
|
||||
};
|
||||
|
||||
struct topa_entry {
|
||||
@@ -66,20 +45,26 @@ struct topa_entry {
|
||||
u64 rsvd4 : 16;
|
||||
};
|
||||
|
||||
#define TOPA_SHIFT 12
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||
|
||||
enum pt_capabilities {
|
||||
PT_CAP_max_subleaf = 0,
|
||||
PT_CAP_cr3_filtering,
|
||||
PT_CAP_psb_cyc,
|
||||
PT_CAP_mtc,
|
||||
PT_CAP_topa_output,
|
||||
PT_CAP_topa_multiple_entries,
|
||||
PT_CAP_single_range_output,
|
||||
PT_CAP_payloads_lip,
|
||||
PT_CAP_mtc_periods,
|
||||
PT_CAP_cycle_thresholds,
|
||||
PT_CAP_psb_periods,
|
||||
};
|
||||
|
||||
struct pt_pmu {
|
||||
struct pmu pmu;
|
||||
u32 caps[4 * PT_CPUID_LEAVES];
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -1,4 +1,4 @@
|
||||
obj-y = mce.o mce-severity.o
|
||||
obj-y = mce.o mce-severity.o mce-genpool.o
|
||||
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
|
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
|
||||
|
||||
m.addr = mem_err->physical_addr;
|
||||
mce_log(&m);
|
||||
mce_notify_irq();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
|
||||
|
||||
|
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* MCE event pool management in MCE context
|
||||
*
|
||||
* Copyright (C) 2015 Intel Corp.
|
||||
* Author: Chen, Gong <gong.chen@linux.intel.com>
|
||||
*
|
||||
* This file is licensed under GPLv2.
|
||||
*/
|
||||
#include <linux/smp.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/genalloc.h>
|
||||
#include <linux/llist.h>
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* printk() is not safe in MCE context. This is a lock-less memory allocator
|
||||
* used to save error information organized in a lock-less list.
|
||||
*
|
||||
* This memory pool is only to be used to save MCE records in MCE context.
|
||||
* MCE events are rare, so a fixed size memory pool should be enough. Use
|
||||
* 2 pages to save MCE events for now (~80 MCE records at most).
|
||||
*/
|
||||
#define MCE_POOLSZ (2 * PAGE_SIZE)
|
||||
|
||||
static struct gen_pool *mce_evt_pool;
|
||||
static LLIST_HEAD(mce_event_llist);
|
||||
static char gen_pool_buf[MCE_POOLSZ];
|
||||
|
||||
void mce_gen_pool_process(void)
|
||||
{
|
||||
struct llist_node *head;
|
||||
struct mce_evt_llist *node;
|
||||
struct mce *mce;
|
||||
|
||||
head = llist_del_all(&mce_event_llist);
|
||||
if (!head)
|
||||
return;
|
||||
|
||||
head = llist_reverse_order(head);
|
||||
llist_for_each_entry(node, head, llnode) {
|
||||
mce = &node->mce;
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
|
||||
}
|
||||
}
|
||||
|
||||
bool mce_gen_pool_empty(void)
|
||||
{
|
||||
return llist_empty(&mce_event_llist);
|
||||
}
|
||||
|
||||
int mce_gen_pool_add(struct mce *mce)
|
||||
{
|
||||
struct mce_evt_llist *node;
|
||||
|
||||
if (!mce_evt_pool)
|
||||
return -EINVAL;
|
||||
|
||||
node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
|
||||
if (!node) {
|
||||
pr_warn_ratelimited("MCE records pool full!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memcpy(&node->mce, mce, sizeof(*mce));
|
||||
llist_add(&node->llnode, &mce_event_llist);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mce_gen_pool_create(void)
|
||||
{
|
||||
struct gen_pool *tmpp;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
|
||||
if (!tmpp)
|
||||
goto out;
|
||||
|
||||
ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
|
||||
if (ret) {
|
||||
gen_pool_destroy(tmpp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mce_evt_pool = tmpp;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mce_gen_pool_init(void)
|
||||
{
|
||||
/* Just init mce_gen_pool once. */
|
||||
if (mce_evt_pool)
|
||||
return 0;
|
||||
|
||||
return mce_gen_pool_create();
|
||||
}
|
@@ -13,6 +13,8 @@ enum severity_level {
|
||||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
extern struct atomic_notifier_head x86_mce_decoder_chain;
|
||||
|
||||
#define ATTR_LEN 16
|
||||
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
|
||||
|
||||
@@ -24,6 +26,16 @@ struct mce_bank {
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
struct mce_evt_llist {
|
||||
struct llist_node llnode;
|
||||
struct mce mce;
|
||||
};
|
||||
|
||||
void mce_gen_pool_process(void);
|
||||
bool mce_gen_pool_empty(void);
|
||||
int mce_gen_pool_add(struct mce *mce);
|
||||
int mce_gen_pool_init(void);
|
||||
|
||||
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
void mce_inject_log(struct mce *m);
|
||||
|
@@ -52,11 +52,11 @@
|
||||
|
||||
static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
|
||||
#define rcu_dereference_check_mce(p) \
|
||||
#define mce_log_get_idx_check(p) \
|
||||
({ \
|
||||
rcu_lockdep_assert(rcu_read_lock_sched_held() || \
|
||||
lockdep_is_held(&mce_chrdev_read_mutex), \
|
||||
"suspicious rcu_dereference_check_mce() usage"); \
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
|
||||
!lockdep_is_held(&mce_chrdev_read_mutex), \
|
||||
"suspicious mce_log_get_idx_check() usage"); \
|
||||
smp_load_acquire(&(p)); \
|
||||
})
|
||||
|
||||
@@ -110,22 +110,24 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
||||
*/
|
||||
mce_banks_t mce_banks_ce_disabled;
|
||||
|
||||
static DEFINE_PER_CPU(struct work_struct, mce_work);
|
||||
static struct work_struct mce_work;
|
||||
static struct irq_work mce_irq_work;
|
||||
|
||||
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
||||
static int mce_usable_address(struct mce *m);
|
||||
|
||||
/*
|
||||
* CPU/chipset specific EDAC code can register a notifier call here to print
|
||||
* MCE errors in a human-readable form.
|
||||
*/
|
||||
static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
|
||||
/* Do initial initialization of a struct mce */
|
||||
void mce_setup(struct mce *m)
|
||||
{
|
||||
memset(m, 0, sizeof(struct mce));
|
||||
m->cpu = m->extcpu = smp_processor_id();
|
||||
rdtscll(m->tsc);
|
||||
m->tsc = rdtsc();
|
||||
/* We hope get_seconds stays lockless */
|
||||
m->time = get_seconds();
|
||||
m->cpuvendor = boot_cpu_data.x86_vendor;
|
||||
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
|
||||
/* Emit the trace record: */
|
||||
trace_mce_record(mce);
|
||||
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
if (!mce_gen_pool_add(mce))
|
||||
irq_work_queue(&mce_irq_work);
|
||||
|
||||
mce->finished = 0;
|
||||
wmb();
|
||||
for (;;) {
|
||||
entry = rcu_dereference_check_mce(mcelog.next);
|
||||
entry = mce_log_get_idx_check(mcelog.next);
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
|
||||
set_bit(0, &mce_need_notify);
|
||||
}
|
||||
|
||||
static void drain_mcelog_buffer(void)
|
||||
void mce_inject_log(struct mce *m)
|
||||
{
|
||||
unsigned int next, i, prev = 0;
|
||||
|
||||
next = ACCESS_ONCE(mcelog.next);
|
||||
|
||||
do {
|
||||
struct mce *m;
|
||||
|
||||
/* drain what was logged during boot */
|
||||
for (i = prev; i < next; i++) {
|
||||
unsigned long start = jiffies;
|
||||
unsigned retries = 1;
|
||||
|
||||
m = &mcelog.entry[i];
|
||||
|
||||
while (!m->finished) {
|
||||
if (time_after_eq(jiffies, start + 2*retries))
|
||||
retries++;
|
||||
|
||||
cpu_relax();
|
||||
|
||||
if (!m->finished && retries >= 4) {
|
||||
pr_err("skipping error being logged currently!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
smp_rmb();
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||
}
|
||||
|
||||
memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
|
||||
prev = next;
|
||||
next = cmpxchg(&mcelog.next, prev, 0);
|
||||
} while (next != prev);
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
mce_log(m);
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_inject_log);
|
||||
|
||||
static struct notifier_block mce_srao_nb;
|
||||
|
||||
void mce_register_decode_chain(struct notifier_block *nb)
|
||||
{
|
||||
/* Ensure SRAO notifier has the highest priority in the decode chain. */
|
||||
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
|
||||
nb->priority -= 1;
|
||||
|
||||
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
||||
drain_mcelog_buffer();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
|
||||
|
||||
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple lockless ring to communicate PFNs from the exception handler with the
|
||||
* process context work function. This is vastly simplified because there's
|
||||
* only a single reader and a single writer.
|
||||
*/
|
||||
#define MCE_RING_SIZE 16 /* we use one entry less */
|
||||
|
||||
struct mce_ring {
|
||||
unsigned short start;
|
||||
unsigned short end;
|
||||
unsigned long ring[MCE_RING_SIZE];
|
||||
};
|
||||
static DEFINE_PER_CPU(struct mce_ring, mce_ring);
|
||||
|
||||
/* Runs with CPU affinity in workqueue */
|
||||
static int mce_ring_empty(void)
|
||||
{
|
||||
struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
||||
|
||||
return r->start == r->end;
|
||||
}
|
||||
|
||||
static int mce_ring_get(unsigned long *pfn)
|
||||
{
|
||||
struct mce_ring *r;
|
||||
int ret = 0;
|
||||
|
||||
*pfn = 0;
|
||||
get_cpu();
|
||||
r = this_cpu_ptr(&mce_ring);
|
||||
if (r->start == r->end)
|
||||
goto out;
|
||||
*pfn = r->ring[r->start];
|
||||
r->start = (r->start + 1) % MCE_RING_SIZE;
|
||||
ret = 1;
|
||||
out:
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Always runs in MCE context with preempt off */
|
||||
static int mce_ring_add(unsigned long pfn)
|
||||
{
|
||||
struct mce_ring *r = this_cpu_ptr(&mce_ring);
|
||||
unsigned next;
|
||||
|
||||
next = (r->end + 1) % MCE_RING_SIZE;
|
||||
if (next == r->start)
|
||||
return -1;
|
||||
r->ring[r->end] = pfn;
|
||||
wmb();
|
||||
r->end = next;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mce_available(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mca_cfg.disabled)
|
||||
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
|
||||
|
||||
static void mce_schedule_work(void)
|
||||
{
|
||||
if (!mce_ring_empty())
|
||||
schedule_work(this_cpu_ptr(&mce_work));
|
||||
if (!mce_gen_pool_empty() && keventd_up())
|
||||
schedule_work(&mce_work);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
|
||||
|
||||
static void mce_irq_work_cb(struct irq_work *entry)
|
||||
{
|
||||
mce_notify_irq();
|
||||
@@ -551,9 +472,30 @@ static void mce_report_event(struct pt_regs *regs)
|
||||
return;
|
||||
}
|
||||
|
||||
irq_work_queue(this_cpu_ptr(&mce_irq_work));
|
||||
irq_work_queue(&mce_irq_work);
|
||||
}
|
||||
|
||||
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *mce = (struct mce *)data;
|
||||
unsigned long pfn;
|
||||
|
||||
if (!mce)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
|
||||
pfn = mce->addr >> PAGE_SHIFT;
|
||||
memory_failure(pfn, MCE_VECTOR, 0);
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
static struct notifier_block mce_srao_nb = {
|
||||
.notifier_call = srao_decode_notifier,
|
||||
.priority = INT_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Read ADDR and MISC registers.
|
||||
*/
|
||||
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
*/
|
||||
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
|
||||
if (m.status & MCI_STATUS_ADDRV) {
|
||||
mce_ring_add(m.addr >> PAGE_SHIFT);
|
||||
mce_schedule_work();
|
||||
m.severity = severity;
|
||||
m.usable_addr = mce_usable_address(&m);
|
||||
|
||||
if (!mce_gen_pool_add(&m))
|
||||
mce_schedule_work();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1029,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
struct mce m, *final;
|
||||
enum ctx_state prev_state;
|
||||
int i;
|
||||
int worst = 0;
|
||||
int severity;
|
||||
@@ -1055,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int lmce = 0;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
@@ -1143,15 +1087,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
mce_read_aux(&m, i);
|
||||
|
||||
/*
|
||||
* Action optional error. Queue address for later processing.
|
||||
* When the ring overflows we just ignore the AO error.
|
||||
* RED-PEN add some logging mechanism when
|
||||
* usable_address or mce_add_ring fails.
|
||||
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
|
||||
*/
|
||||
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
|
||||
mce_ring_add(m.addr >> PAGE_SHIFT);
|
||||
/* assuming valid severity level != 0 */
|
||||
m.severity = severity;
|
||||
m.usable_addr = mce_usable_address(&m);
|
||||
|
||||
mce_log(&m);
|
||||
|
||||
@@ -1227,7 +1165,7 @@ out:
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
done:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
|
||||
@@ -1247,14 +1185,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
|
||||
/*
|
||||
* Action optional processing happens here (picking up
|
||||
* from the list of faulting pages that do_machine_check()
|
||||
* placed into the "ring").
|
||||
* placed into the genpool).
|
||||
*/
|
||||
static void mce_process_work(struct work_struct *dummy)
|
||||
{
|
||||
unsigned long pfn;
|
||||
|
||||
while (mce_ring_get(&pfn))
|
||||
memory_failure(pfn, MCE_VECTOR, 0);
|
||||
mce_gen_pool_process();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
@@ -1678,6 +1613,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
mce_intel_feature_clear(c);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
||||
{
|
||||
unsigned long iv = check_interval * HZ;
|
||||
@@ -1731,13 +1677,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
}
|
||||
|
||||
if (mce_gen_pool_init()) {
|
||||
mca_cfg.disabled = true;
|
||||
pr_emerg("Couldn't allocate MCE records pool!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
machine_check_vector = do_machine_check;
|
||||
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_timer();
|
||||
INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
|
||||
init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called for each booted CPU to clear some machine checks opt-ins
|
||||
*/
|
||||
void mcheck_cpu_clear(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mca_cfg.disabled)
|
||||
return;
|
||||
|
||||
if (!mce_available(c))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Possibly to clear general settings generic to x86
|
||||
* __mcheck_cpu_clear_generic(c);
|
||||
*/
|
||||
__mcheck_cpu_clear_vendor(c);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1784,7 +1753,7 @@ static void collect_tscs(void *data)
|
||||
{
|
||||
unsigned long *cpu_tsc = (unsigned long *)data;
|
||||
|
||||
rdtscll(cpu_tsc[smp_processor_id()]);
|
||||
cpu_tsc[smp_processor_id()] = rdtsc();
|
||||
}
|
||||
|
||||
static int mce_apei_read_done;
|
||||
@@ -1850,7 +1819,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
next = rcu_dereference_check_mce(mcelog.next);
|
||||
next = mce_log_get_idx_check(mcelog.next);
|
||||
|
||||
/* Only supports full reads right now */
|
||||
err = -EINVAL;
|
||||
@@ -2056,8 +2025,12 @@ __setup("mce", mcheck_enable);
|
||||
int __init mcheck_init(void)
|
||||
{
|
||||
mcheck_intel_therm_init();
|
||||
mce_register_decode_chain(&mce_srao_nb);
|
||||
mcheck_vendor_init_severity();
|
||||
|
||||
INIT_WORK(&mce_work, mce_process_work);
|
||||
init_irq_work(&mce_irq_work, mce_irq_work_cb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2591,5 +2564,20 @@ static int __init mcheck_debugfs_init(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mcheck_debugfs_init);
|
||||
#else
|
||||
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
||||
#endif
|
||||
|
||||
static int __init mcheck_late_init(void)
|
||||
{
|
||||
mcheck_debugfs_init();
|
||||
|
||||
/*
|
||||
* Flush out everything that has been logged during early boot, now that
|
||||
* everything has been initialized (workqueues, decoders, ...).
|
||||
*/
|
||||
mce_schedule_work();
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mcheck_late_init);
|
||||
|
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
|
||||
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
||||
}
|
||||
|
||||
static void cmci_toggle_interrupt_mode(bool on)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
|
||||
if (on)
|
||||
val |= MCI_CTL2_CMCI_EN;
|
||||
else
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
||||
{
|
||||
if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
|
||||
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
||||
*/
|
||||
if (!atomic_read(&cmci_storm_on_cpus)) {
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
||||
cmci_reenable();
|
||||
cmci_toggle_interrupt_mode(true);
|
||||
cmci_recheck();
|
||||
}
|
||||
return CMCI_POLL_INTERVAL;
|
||||
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
|
||||
}
|
||||
}
|
||||
|
||||
static void cmci_storm_disable_banks(void)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = this_cpu_ptr(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_storm_disable_banks();
|
||||
cmci_toggle_interrupt_mode(false);
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_STORM_INTERVAL);
|
||||
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
|
||||
return;
|
||||
|
||||
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -435,7 +439,7 @@ static void intel_init_cmci(void)
|
||||
cmci_recheck();
|
||||
}
|
||||
|
||||
void intel_init_lmce(void)
|
||||
static void intel_init_lmce(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
@@ -448,9 +452,26 @@ void intel_init_lmce(void)
|
||||
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
|
||||
}
|
||||
|
||||
static void intel_clear_lmce(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!lmce_supported())
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
|
||||
val &= ~MCG_EXT_CTL_LMCE_EN;
|
||||
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
|
||||
}
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
intel_init_lmce();
|
||||
}
|
||||
|
||||
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_clear_lmce();
|
||||
}
|
||||
|
@@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly;
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
@@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
|
@@ -15,12 +15,12 @@
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
|
@@ -377,17 +377,16 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
|
||||
static void mc_device_remove(struct device *dev, struct subsys_interface *sif)
|
||||
{
|
||||
int cpu = dev->id;
|
||||
|
||||
if (!cpu_online(cpu))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
pr_debug("CPU%d removed\n", cpu);
|
||||
microcode_fini_cpu(cpu);
|
||||
sysfs_remove_group(&dev->kobj, &mc_attr_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct subsys_interface mc_cpu_interface = {
|
||||
@@ -460,7 +459,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __refdata mc_cpu_notifier = {
|
||||
static struct notifier_block mc_cpu_notifier = {
|
||||
.notifier_call = mc_cpu_callback,
|
||||
};
|
||||
|
||||
|
@@ -390,7 +390,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void __ref show_saved_mc(void)
|
||||
static void show_saved_mc(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned int sig, pf, rev, total_size, data_size, date;
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include <linux/efi.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/hyperv.h>
|
||||
@@ -28,10 +29,14 @@
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
struct ms_hyperv_info ms_hyperv;
|
||||
EXPORT_SYMBOL_GPL(ms_hyperv);
|
||||
|
||||
static void (*hv_kexec_handler)(void);
|
||||
static void (*hv_crash_handler)(struct pt_regs *regs);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
static void (*vmbus_handler)(void);
|
||||
|
||||
@@ -67,8 +72,47 @@ void hv_remove_vmbus_irq(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
|
||||
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
|
||||
|
||||
void hv_setup_kexec_handler(void (*handler)(void))
|
||||
{
|
||||
hv_kexec_handler = handler;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
|
||||
|
||||
void hv_remove_kexec_handler(void)
|
||||
{
|
||||
hv_kexec_handler = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
|
||||
|
||||
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
|
||||
{
|
||||
hv_crash_handler = handler;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
|
||||
|
||||
void hv_remove_crash_handler(void)
|
||||
{
|
||||
hv_crash_handler = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
|
||||
#endif
|
||||
|
||||
static void hv_machine_shutdown(void)
|
||||
{
|
||||
if (kexec_in_progress && hv_kexec_handler)
|
||||
hv_kexec_handler();
|
||||
native_machine_shutdown();
|
||||
}
|
||||
|
||||
static void hv_machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
if (hv_crash_handler)
|
||||
hv_crash_handler(regs);
|
||||
native_machine_crash_shutdown(regs);
|
||||
}
|
||||
|
||||
|
||||
static uint32_t __init ms_hyperv_platform(void)
|
||||
{
|
||||
u32 eax;
|
||||
@@ -114,6 +158,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||
* Extract the features and hints
|
||||
*/
|
||||
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
|
||||
@@ -141,6 +186,9 @@ static void __init ms_hyperv_init_platform(void)
|
||||
no_timer_check = 1;
|
||||
#endif
|
||||
|
||||
machine_ops.shutdown = hv_machine_shutdown;
|
||||
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
|
||||
mark_tsc_unstable("running on Hyper-V");
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
||||
|
@@ -448,7 +448,6 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
|
||||
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
|
||||
increment);
|
||||
}
|
||||
EXPORT_SYMBOL(mtrr_add);
|
||||
|
||||
/**
|
||||
* mtrr_del_page - delete a memory type region
|
||||
@@ -537,7 +536,6 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
|
||||
return -EINVAL;
|
||||
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
|
||||
}
|
||||
EXPORT_SYMBOL(mtrr_del);
|
||||
|
||||
/**
|
||||
* arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
|
||||
|
@@ -1551,7 +1551,7 @@ static void __init filter_events(struct attribute **attrs)
|
||||
}
|
||||
|
||||
/* Merge two pointer arrays */
|
||||
static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
|
||||
__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
|
||||
{
|
||||
struct attribute **new;
|
||||
int j, i;
|
||||
@@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
int idx = segment >> 3;
|
||||
|
||||
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
if (idx > LDT_ENTRIES)
|
||||
@@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
return 0;
|
||||
|
||||
desc = &ldt->entries[idx];
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
} else {
|
||||
if (idx > GDT_ENTRIES)
|
||||
return 0;
|
||||
@@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
return get_desc_base(desc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
#include <asm/compat.h>
|
||||
|
||||
|
@@ -165,7 +165,7 @@ struct intel_excl_cntrs {
|
||||
unsigned core_id; /* per-core: core id */
|
||||
};
|
||||
|
||||
#define MAX_LBR_ENTRIES 16
|
||||
#define MAX_LBR_ENTRIES 32
|
||||
|
||||
enum {
|
||||
X86_PERF_KFREE_SHARED = 0,
|
||||
@@ -594,6 +594,7 @@ struct x86_pmu {
|
||||
struct event_constraint *pebs_constraints;
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
int max_pebs_events;
|
||||
unsigned long free_running_flags;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
@@ -624,6 +625,7 @@ struct x86_pmu {
|
||||
struct x86_perf_task_context {
|
||||
u64 lbr_from[MAX_LBR_ENTRIES];
|
||||
u64 lbr_to[MAX_LBR_ENTRIES];
|
||||
u64 lbr_info[MAX_LBR_ENTRIES];
|
||||
int lbr_callstack_users;
|
||||
int lbr_stack_state;
|
||||
};
|
||||
@@ -793,6 +795,8 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config);
|
||||
|
||||
struct attribute **merge_attr(struct attribute **a, struct attribute **b);
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
@@ -808,20 +812,6 @@ static inline int amd_pmu_init(void)
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
|
||||
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
|
||||
{
|
||||
/* user explicitly requested branch sampling */
|
||||
if (has_branch_stack(event))
|
||||
return true;
|
||||
|
||||
/* implicit branch sampling to correct PEBS skid */
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
|
||||
x86_pmu.intel_cap.pebs_format < 2)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool intel_pmu_has_bts(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
||||
@@ -873,6 +863,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_hsw_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_skl_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
@@ -911,6 +903,8 @@ void intel_pmu_lbr_init_snb(void);
|
||||
|
||||
void intel_pmu_lbr_init_hsw(void);
|
||||
|
||||
void intel_pmu_lbr_init_skl(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
void intel_pt_interrupt(void);
|
||||
@@ -934,6 +928,7 @@ static inline int is_ht_workaround_enabled(void)
|
||||
{
|
||||
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
@@ -177,6 +177,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_skl_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
|
||||
@@ -193,6 +201,13 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
||||
@@ -244,6 +259,200 @@ static u64 intel_pmu_event_map(int hw_event)
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
/*
|
||||
* Notes on the events:
|
||||
* - data reads do not include code reads (comparable to earlier tables)
|
||||
* - data counts include speculative execution (except L1 write, dtlb, bpu)
|
||||
* - remote node access includes remote memory, remote cache, remote mmio.
|
||||
* - prefetches are not included in the counts.
|
||||
* - icache miss does not include decoded icache
|
||||
*/
|
||||
|
||||
#define SKL_DEMAND_DATA_RD BIT_ULL(0)
|
||||
#define SKL_DEMAND_RFO BIT_ULL(1)
|
||||
#define SKL_ANY_RESPONSE BIT_ULL(16)
|
||||
#define SKL_SUPPLIER_NONE BIT_ULL(17)
|
||||
#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
|
||||
#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
|
||||
#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
|
||||
#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
|
||||
#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
|
||||
SKL_L3_MISS_REMOTE_HOP0_DRAM| \
|
||||
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
|
||||
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
|
||||
#define SKL_SPL_HIT BIT_ULL(30)
|
||||
#define SKL_SNOOP_NONE BIT_ULL(31)
|
||||
#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
|
||||
#define SKL_SNOOP_MISS BIT_ULL(33)
|
||||
#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
|
||||
#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
|
||||
#define SKL_SNOOP_HITM BIT_ULL(36)
|
||||
#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
|
||||
#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
|
||||
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
|
||||
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
|
||||
SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
|
||||
#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
|
||||
#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
|
||||
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
|
||||
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
|
||||
SKL_SNOOP_HITM|SKL_SPL_HIT)
|
||||
#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
|
||||
#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
|
||||
#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
|
||||
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
|
||||
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
|
||||
|
||||
static __initconst const u64 skl_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
|
||||
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
|
||||
[ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
|
||||
[ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
|
||||
[ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
||||
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 skl_hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
|
||||
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
|
||||
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
|
||||
SKL_L3_MISS|SKL_ANY_SNOOP|
|
||||
SKL_SUPPLIER_NONE,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
|
||||
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
|
||||
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
|
||||
SKL_L3_MISS|SKL_ANY_SNOOP|
|
||||
SKL_SUPPLIER_NONE,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
|
||||
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
|
||||
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
|
||||
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
|
||||
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
|
||||
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
|
||||
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#define SNB_DMND_DATA_RD (1ULL << 0)
|
||||
#define SNB_DMND_RFO (1ULL << 1)
|
||||
#define SNB_DMND_IFETCH (1ULL << 2)
|
||||
@@ -1114,7 +1323,7 @@ static struct extra_reg intel_slm_extra_regs[] __read_mostly =
|
||||
{
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
|
||||
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -1594,6 +1803,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
|
||||
|
||||
loops = 0;
|
||||
again:
|
||||
intel_pmu_lbr_read();
|
||||
intel_pmu_ack_status(status);
|
||||
if (++loops > 100) {
|
||||
static bool warned = false;
|
||||
@@ -1608,16 +1818,16 @@ again:
|
||||
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
intel_pmu_lbr_read();
|
||||
|
||||
/*
|
||||
* CondChgd bit 63 doesn't mean any overflow status. Ignore
|
||||
* and clear the bit.
|
||||
* Ignore a range of extra bits in status that do not indicate
|
||||
* overflow by themselves.
|
||||
*/
|
||||
if (__test_and_clear_bit(63, (unsigned long *)&status)) {
|
||||
if (!status)
|
||||
goto done;
|
||||
}
|
||||
status &= ~(GLOBAL_STATUS_COND_CHG |
|
||||
GLOBAL_STATUS_ASIF |
|
||||
GLOBAL_STATUS_LBRS_FROZEN);
|
||||
if (!status)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
@@ -1699,18 +1909,22 @@ intel_bts_constraints(struct perf_event *event)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int intel_alt_er(int idx)
|
||||
static int intel_alt_er(int idx, u64 config)
|
||||
{
|
||||
int alt_idx;
|
||||
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
|
||||
return idx;
|
||||
|
||||
if (idx == EXTRA_REG_RSP_0)
|
||||
return EXTRA_REG_RSP_1;
|
||||
alt_idx = EXTRA_REG_RSP_1;
|
||||
|
||||
if (idx == EXTRA_REG_RSP_1)
|
||||
return EXTRA_REG_RSP_0;
|
||||
alt_idx = EXTRA_REG_RSP_0;
|
||||
|
||||
return idx;
|
||||
if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
|
||||
return idx;
|
||||
|
||||
return alt_idx;
|
||||
}
|
||||
|
||||
static void intel_fixup_er(struct perf_event *event, int idx)
|
||||
@@ -1799,7 +2013,7 @@ again:
|
||||
*/
|
||||
c = NULL;
|
||||
} else {
|
||||
idx = intel_alt_er(idx);
|
||||
idx = intel_alt_er(idx, reg->config);
|
||||
if (idx != reg->idx) {
|
||||
raw_spin_unlock_irqrestore(&era->lock, flags);
|
||||
goto again;
|
||||
@@ -2253,6 +2467,15 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags = x86_pmu.free_running_flags;
|
||||
|
||||
if (event->attr.use_clockid)
|
||||
flags &= ~PERF_SAMPLE_TIME;
|
||||
return flags;
|
||||
}
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
@@ -2263,7 +2486,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
if (event->attr.precise_ip) {
|
||||
if (!event->attr.freq) {
|
||||
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
||||
if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
|
||||
if (!(event->attr.sample_type &
|
||||
~intel_pmu_free_running_flags(event)))
|
||||
event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
|
||||
}
|
||||
if (x86_pmu.pebs_aliases)
|
||||
@@ -2694,6 +2918,8 @@ static __initconst const struct x86_pmu core_pmu = {
|
||||
.event_map = intel_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.free_running_flags = PEBS_FREERUNNING_FLAGS,
|
||||
|
||||
/*
|
||||
* Intel PMCs cannot be accessed sanely above 32-bit width,
|
||||
* so we install an artificial 1<<31 period regardless of
|
||||
@@ -2732,6 +2958,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.event_map = intel_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.free_running_flags = PEBS_FREERUNNING_FLAGS,
|
||||
/*
|
||||
* Intel PMCs cannot be accessed sanely above 32 bit width,
|
||||
* so we install an artificial 1<<31 period regardless of
|
||||
@@ -3269,6 +3496,29 @@ __init int intel_pmu_init(void)
|
||||
pr_cont("Broadwell events, ");
|
||||
break;
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
intel_pmu_lbr_init_skl();
|
||||
|
||||
x86_pmu.event_constraints = intel_skl_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_skl_extra_regs;
|
||||
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
||||
/* all extra regs are per-cpu when HT is on */
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = hsw_get_event_constraints;
|
||||
x86_pmu.cpu_events = hsw_events_attrs;
|
||||
WARN_ON(!x86_pmu.format_attrs);
|
||||
x86_pmu.cpu_events = hsw_events_attrs;
|
||||
pr_cont("Skylake events, ");
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
@@ -3338,7 +3588,7 @@ __init int intel_pmu_init(void)
|
||||
*/
|
||||
if (x86_pmu.extra_regs) {
|
||||
for (er = x86_pmu.extra_regs; er->msr; er++) {
|
||||
er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
|
||||
er->extra_msr_access = check_msr(er->msr, 0x11UL);
|
||||
/* Disable LBR select mapping */
|
||||
if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
|
||||
x86_pmu.lbr_sel_map = NULL;
|
||||
|
@@ -62,9 +62,6 @@ struct bts_buffer {
|
||||
|
||||
struct pmu bts_pmu;
|
||||
|
||||
void intel_pmu_enable_bts(u64 config);
|
||||
void intel_pmu_disable_bts(void);
|
||||
|
||||
static size_t buf_size(struct page *page)
|
||||
{
|
||||
return 1 << (PAGE_SHIFT + page_private(page));
|
||||
|
@@ -224,6 +224,19 @@ union hsw_tsx_tuning {
|
||||
|
||||
#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
|
||||
|
||||
/* Same as HSW, plus TSC */
|
||||
|
||||
struct pebs_record_skl {
|
||||
u64 flags, ip;
|
||||
u64 ax, bx, cx, dx;
|
||||
u64 si, di, bp, sp;
|
||||
u64 r8, r9, r10, r11;
|
||||
u64 r12, r13, r14, r15;
|
||||
u64 status, dla, dse, lat;
|
||||
u64 real_ip, tsx_tuning;
|
||||
u64 tsc;
|
||||
};
|
||||
|
||||
void init_debug_store_on_cpu(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
@@ -675,6 +688,28 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_skl_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
||||
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
|
||||
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
|
||||
/* Allow all events as PEBS with no flags */
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
@@ -754,6 +789,11 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
bool large_pebs = ds->pebs_interrupt_threshold >
|
||||
ds->pebs_buffer_base + x86_pmu.pebs_record_size;
|
||||
|
||||
if (large_pebs)
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
|
||||
@@ -762,12 +802,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled &= ~(1ULL << 63);
|
||||
|
||||
if (ds->pebs_interrupt_threshold >
|
||||
ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
if (!pebs_is_enabled(cpuc))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
}
|
||||
if (large_pebs && !pebs_is_enabled(cpuc))
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
|
||||
if (cpuc->enabled)
|
||||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
@@ -885,7 +921,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
|
||||
static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
|
||||
{
|
||||
if (pebs->tsx_tuning) {
|
||||
union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
|
||||
@@ -894,7 +930,7 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
|
||||
static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
|
||||
{
|
||||
u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
|
||||
|
||||
@@ -918,7 +954,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
|
||||
* unconditionally access the 'extra' entries.
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct pebs_record_hsw *pebs = __pebs;
|
||||
struct pebs_record_skl *pebs = __pebs;
|
||||
u64 sample_type;
|
||||
int fll, fst, dsrc;
|
||||
int fl = event->hw.flags;
|
||||
@@ -1016,6 +1052,16 @@ static void setup_pebs_sample_data(struct perf_event *event,
|
||||
data->txn = intel_hsw_transaction(pebs);
|
||||
}
|
||||
|
||||
/*
|
||||
* v3 supplies an accurate time stamp, so we use that
|
||||
* for the time stamp.
|
||||
*
|
||||
* We can only do this for the default trace clock.
|
||||
*/
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3 &&
|
||||
event->attr.use_clockid == 0)
|
||||
data->time = native_sched_clock_from_tsc(pebs->tsc);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
}
|
||||
@@ -1142,6 +1188,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
|
||||
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
|
||||
struct pebs_record_nhm *p = at;
|
||||
u64 pebs_status;
|
||||
|
||||
/* PEBS v3 has accurate status bits */
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3) {
|
||||
@@ -1152,12 +1199,17 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
continue;
|
||||
}
|
||||
|
||||
bit = find_first_bit((unsigned long *)&p->status,
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
|
||||
|
||||
bit = find_first_bit((unsigned long *)&pebs_status,
|
||||
x86_pmu.max_pebs_events);
|
||||
if (bit >= x86_pmu.max_pebs_events)
|
||||
continue;
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
if (WARN(bit >= x86_pmu.max_pebs_events,
|
||||
"PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
|
||||
(unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
|
||||
*(unsigned long long *)cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The PEBS hardware does not deal well with the situation
|
||||
* when events happen near to each other and multiple bits
|
||||
@@ -1172,27 +1224,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
* one, and it's not possible to reconstruct all events
|
||||
* that caused the PEBS record. It's called collision.
|
||||
* If collision happened, the record will be dropped.
|
||||
*
|
||||
*/
|
||||
if (p->status != (1 << bit)) {
|
||||
u64 pebs_status;
|
||||
|
||||
/* slow path */
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
|
||||
if (pebs_status != (1 << bit)) {
|
||||
for_each_set_bit(i, (unsigned long *)&pebs_status,
|
||||
MAX_PEBS_EVENTS)
|
||||
error[i]++;
|
||||
continue;
|
||||
}
|
||||
if (p->status != (1ULL << bit)) {
|
||||
for_each_set_bit(i, (unsigned long *)&pebs_status,
|
||||
x86_pmu.max_pebs_events)
|
||||
error[i]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
counts[bit]++;
|
||||
}
|
||||
|
||||
for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
|
||||
if ((counts[bit] == 0) && (error[bit] == 0))
|
||||
continue;
|
||||
|
||||
event = cpuc->events[bit];
|
||||
WARN_ON_ONCE(!event);
|
||||
WARN_ON_ONCE(!event->attr.precise_ip);
|
||||
@@ -1245,6 +1291,14 @@ void __init intel_ds_init(void)
|
||||
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
pr_cont("PEBS fmt3%c, ", pebs_type);
|
||||
x86_pmu.pebs_record_size =
|
||||
sizeof(struct pebs_record_skl);
|
||||
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
||||
x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
|
||||
x86_pmu.pebs = 0;
|
||||
|
@@ -13,7 +13,8 @@ enum {
|
||||
LBR_FORMAT_EIP = 0x02,
|
||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
|
||||
LBR_FORMAT_INFO = 0x05,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
|
||||
};
|
||||
|
||||
static enum {
|
||||
@@ -139,6 +140,13 @@ static void __intel_pmu_lbr_enable(bool pmi)
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 debugctl, lbr_select = 0, orig_debugctl;
|
||||
|
||||
/*
|
||||
* No need to unfreeze manually, as v4 can do that as part
|
||||
* of the GLOBAL_STATUS ack.
|
||||
*/
|
||||
if (pmi && x86_pmu.version >= 4)
|
||||
return;
|
||||
|
||||
/*
|
||||
* No need to reprogram LBR_SELECT in a PMI, as it
|
||||
* did not change.
|
||||
@@ -186,6 +194,8 @@ static void intel_pmu_lbr_reset_64(void)
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
wrmsrl(x86_pmu.lbr_from + i, 0);
|
||||
wrmsrl(x86_pmu.lbr_to + i, 0);
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
|
||||
wrmsrl(MSR_LBR_INFO_0 + i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,10 +240,12 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
|
||||
|
||||
mask = x86_pmu.lbr_nr - 1;
|
||||
tos = intel_pmu_lbr_tos();
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
for (i = 0; i < tos; i++) {
|
||||
lbr_idx = (tos - i) & mask;
|
||||
wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
|
||||
wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
|
||||
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
|
||||
}
|
||||
task_ctx->lbr_stack_state = LBR_NONE;
|
||||
}
|
||||
@@ -251,10 +263,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
|
||||
|
||||
mask = x86_pmu.lbr_nr - 1;
|
||||
tos = intel_pmu_lbr_tos();
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
for (i = 0; i < tos; i++) {
|
||||
lbr_idx = (tos - i) & mask;
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
|
||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
|
||||
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
|
||||
}
|
||||
task_ctx->lbr_stack_state = LBR_VALID;
|
||||
}
|
||||
@@ -411,16 +425,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||
u64 tos = intel_pmu_lbr_tos();
|
||||
int i;
|
||||
int out = 0;
|
||||
int num = x86_pmu.lbr_nr;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
|
||||
num = tos;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
unsigned long lbr_idx = (tos - i) & mask;
|
||||
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
|
||||
int skip = 0;
|
||||
u16 cycles = 0;
|
||||
int lbr_flags = lbr_desc[lbr_format];
|
||||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
||||
|
||||
if (lbr_format == LBR_FORMAT_INFO) {
|
||||
u64 info;
|
||||
|
||||
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
|
||||
mis = !!(info & LBR_INFO_MISPRED);
|
||||
pred = !mis;
|
||||
in_tx = !!(info & LBR_INFO_IN_TX);
|
||||
abort = !!(info & LBR_INFO_ABORT);
|
||||
cycles = (info & LBR_INFO_CYCLES);
|
||||
}
|
||||
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
@@ -450,6 +479,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||
cpuc->lbr_entries[out].predicted = pred;
|
||||
cpuc->lbr_entries[out].in_tx = in_tx;
|
||||
cpuc->lbr_entries[out].abort = abort;
|
||||
cpuc->lbr_entries[out].cycles = cycles;
|
||||
cpuc->lbr_entries[out].reserved = 0;
|
||||
out++;
|
||||
}
|
||||
@@ -947,6 +977,26 @@ void intel_pmu_lbr_init_hsw(void)
|
||||
pr_cont("16-deep LBR, ");
|
||||
}
|
||||
|
||||
/* skylake */
|
||||
__init void intel_pmu_lbr_init_skl(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 32;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - support syscall, sysret capture.
|
||||
* That requires LBR_FAR but that means far
|
||||
* jmp need to be filtered out
|
||||
*/
|
||||
pr_cont("32-deep LBR, ");
|
||||
}
|
||||
|
||||
/* atom */
|
||||
void __init intel_pmu_lbr_init_atom(void)
|
||||
{
|
||||
|
@@ -65,15 +65,21 @@ static struct pt_cap_desc {
|
||||
} pt_caps[] = {
|
||||
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
|
||||
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
|
||||
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
|
||||
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
|
||||
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
|
||||
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
|
||||
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
|
||||
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
|
||||
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
|
||||
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
|
||||
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
|
||||
};
|
||||
|
||||
static u32 pt_cap_get(enum pt_capabilities cap)
|
||||
{
|
||||
struct pt_cap_desc *cd = &pt_caps[cap];
|
||||
u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
|
||||
u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
|
||||
unsigned int shift = __ffs(cd->mask);
|
||||
|
||||
return (c & cd->mask) >> shift;
|
||||
@@ -94,12 +100,22 @@ static struct attribute_group pt_cap_group = {
|
||||
.name = "caps",
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(cyc, "config:1" );
|
||||
PMU_FORMAT_ATTR(mtc, "config:9" );
|
||||
PMU_FORMAT_ATTR(tsc, "config:10" );
|
||||
PMU_FORMAT_ATTR(noretcomp, "config:11" );
|
||||
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
|
||||
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
|
||||
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
|
||||
|
||||
static struct attribute *pt_formats_attr[] = {
|
||||
&format_attr_cyc.attr,
|
||||
&format_attr_mtc.attr,
|
||||
&format_attr_tsc.attr,
|
||||
&format_attr_noretcomp.attr,
|
||||
&format_attr_mtc_period.attr,
|
||||
&format_attr_cyc_thresh.attr,
|
||||
&format_attr_psb_period.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -129,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
|
||||
|
||||
for (i = 0; i < PT_CPUID_LEAVES; i++) {
|
||||
cpuid_count(20, i,
|
||||
&pt_pmu.caps[CR_EAX + i*4],
|
||||
&pt_pmu.caps[CR_EBX + i*4],
|
||||
&pt_pmu.caps[CR_ECX + i*4],
|
||||
&pt_pmu.caps[CR_EDX + i*4]);
|
||||
&pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
|
||||
&pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
|
||||
&pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
|
||||
&pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
@@ -170,15 +186,65 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC)
|
||||
#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
|
||||
RTIT_CTL_CYC_THRESH | \
|
||||
RTIT_CTL_PSB_FREQ)
|
||||
|
||||
#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
|
||||
RTIT_CTL_MTC_RANGE)
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
|
||||
RTIT_CTL_DISRETC | \
|
||||
RTIT_CTL_CYC_PSB | \
|
||||
RTIT_CTL_MTC)
|
||||
|
||||
static bool pt_event_valid(struct perf_event *event)
|
||||
{
|
||||
u64 config = event->attr.config;
|
||||
u64 allowed, requested;
|
||||
|
||||
if ((config & PT_CONFIG_MASK) != config)
|
||||
return false;
|
||||
|
||||
if (config & RTIT_CTL_CYC_PSB) {
|
||||
if (!pt_cap_get(PT_CAP_psb_cyc))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_psb_periods);
|
||||
requested = (config & RTIT_CTL_PSB_FREQ) >>
|
||||
RTIT_CTL_PSB_FREQ_OFFSET;
|
||||
if (requested && (!(allowed & BIT(requested))))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_cycle_thresholds);
|
||||
requested = (config & RTIT_CTL_CYC_THRESH) >>
|
||||
RTIT_CTL_CYC_THRESH_OFFSET;
|
||||
if (requested && (!(allowed & BIT(requested))))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config & RTIT_CTL_MTC) {
|
||||
/*
|
||||
* In the unlikely case that CPUID lists valid mtc periods,
|
||||
* but not the mtc capability, drop out here.
|
||||
*
|
||||
* Spec says that setting mtc period bits while mtc bit in
|
||||
* CPUID is 0 will #GP, so better safe than sorry.
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_mtc))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_mtc_periods);
|
||||
if (!allowed)
|
||||
return false;
|
||||
|
||||
requested = (config & RTIT_CTL_MTC_RANGE) >>
|
||||
RTIT_CTL_MTC_RANGE_OFFSET;
|
||||
|
||||
if (!(allowed & BIT(requested)))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -191,6 +257,11 @@ static void pt_config(struct perf_event *event)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
if (!event->hw.itrace_started) {
|
||||
event->hw.itrace_started = 1;
|
||||
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
||||
}
|
||||
|
||||
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
@@ -910,7 +981,6 @@ void intel_pt_interrupt(void)
|
||||
|
||||
pt_config_buffer(buf->cur->table, buf->cur_idx,
|
||||
buf->output_off);
|
||||
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
||||
pt_config(event);
|
||||
}
|
||||
}
|
||||
@@ -934,7 +1004,6 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
|
||||
pt_config_buffer(buf->cur->table, buf->cur_idx,
|
||||
buf->output_off);
|
||||
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
||||
pt_config(event);
|
||||
}
|
||||
|
||||
|
@@ -86,6 +86,10 @@ static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/* Knights Landing has PKG, RAM */
|
||||
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||
|
||||
/*
|
||||
* event code: LSB 8 bits, passed in attr->config
|
||||
* any other bit is reserved
|
||||
@@ -486,6 +490,18 @@ static struct attribute *rapl_events_hsw_attr[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_knl_attr[] = {
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = NULL, /* patched at runtime */
|
||||
@@ -730,6 +746,10 @@ static int __init rapl_pmu_init(void)
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
case 87: /* Knights Landing */
|
||||
rapl_add_quirk(rapl_hsw_server_quirk);
|
||||
rapl_cntr_mask = RAPL_IDX_KNL;
|
||||
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
|
||||
|
||||
default:
|
||||
/* unsupported */
|
||||
|
@@ -911,6 +911,9 @@ static int __init uncore_pci_init(void)
|
||||
case 63: /* Haswell-EP */
|
||||
ret = hswep_uncore_pci_init();
|
||||
break;
|
||||
case 86: /* BDX-DE */
|
||||
ret = bdx_uncore_pci_init();
|
||||
break;
|
||||
case 42: /* Sandy Bridge */
|
||||
ret = snb_uncore_pci_init();
|
||||
break;
|
||||
@@ -1209,6 +1212,11 @@ static int __init uncore_cpu_init(void)
|
||||
break;
|
||||
case 42: /* Sandy Bridge */
|
||||
case 58: /* Ivy Bridge */
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell */
|
||||
case 70: /* Haswell */
|
||||
case 61: /* Broadwell */
|
||||
case 71: /* Broadwell */
|
||||
snb_uncore_cpu_init();
|
||||
break;
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
@@ -1224,6 +1232,9 @@ static int __init uncore_cpu_init(void)
|
||||
case 63: /* Haswell-EP */
|
||||
hswep_uncore_cpu_init();
|
||||
break;
|
||||
case 86: /* BDX-DE */
|
||||
bdx_uncore_cpu_init();
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@@ -336,6 +336,8 @@ int ivbep_uncore_pci_init(void);
|
||||
void ivbep_uncore_cpu_init(void);
|
||||
int hswep_uncore_pci_init(void);
|
||||
void hswep_uncore_cpu_init(void);
|
||||
int bdx_uncore_pci_init(void);
|
||||
void bdx_uncore_cpu_init(void);
|
||||
|
||||
/* perf_event_intel_uncore_nhmex.c */
|
||||
void nhmex_uncore_cpu_init(void);
|
||||
|
@@ -45,6 +45,11 @@
|
||||
#define SNB_UNC_CBO_0_PER_CTR0 0x706
|
||||
#define SNB_UNC_CBO_MSR_OFFSET 0x10
|
||||
|
||||
/* SNB ARB register */
|
||||
#define SNB_UNC_ARB_PER_CTR0 0x3b0
|
||||
#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
|
||||
#define SNB_UNC_ARB_MSR_OFFSET 0x10
|
||||
|
||||
/* NHM global control register */
|
||||
#define NHM_UNC_PERF_GLOBAL_CTL 0x391
|
||||
#define NHM_UNC_FIXED_CTR 0x394
|
||||
@@ -115,7 +120,7 @@ static struct intel_uncore_ops snb_uncore_msr_ops = {
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct event_constraint snb_uncore_cbox_constraints[] = {
|
||||
static struct event_constraint snb_uncore_arb_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
|
||||
EVENT_CONSTRAINT_END
|
||||
@@ -134,14 +139,28 @@ static struct intel_uncore_type snb_uncore_cbox = {
|
||||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.constraints = snb_uncore_cbox_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
.event_descs = snb_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_arb = {
|
||||
.name = "arb",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 44,
|
||||
.perf_ctr = SNB_UNC_ARB_PER_CTR0,
|
||||
.event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_ARB_MSR_OFFSET,
|
||||
.constraints = snb_uncore_arb_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
&snb_uncore_cbox,
|
||||
&snb_uncore_arb,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@@ -2215,7 +2215,7 @@ static struct intel_uncore_type *hswep_pci_uncores[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
|
||||
static const struct pci_device_id hswep_uncore_pci_ids[] = {
|
||||
{ /* Home Agent 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
|
||||
@@ -2321,3 +2321,167 @@ int hswep_uncore_pci_init(void)
|
||||
return 0;
|
||||
}
|
||||
/* end of Haswell-EP uncore support */
|
||||
|
||||
/* BDX-DE uncore support */
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_ubox = {
|
||||
.name = "ubox",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.perf_ctr = HSWEP_U_MSR_PMON_CTR0,
|
||||
.event_ctl = HSWEP_U_MSR_PMON_CTL0,
|
||||
.event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
|
||||
.fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
|
||||
.fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
|
||||
.num_shared_regs = 1,
|
||||
.ops = &ivbep_uncore_msr_ops,
|
||||
.format_group = &ivbep_uncore_ubox_format_group,
|
||||
};
|
||||
|
||||
static struct event_constraint bdx_uncore_cbox_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_cbox = {
|
||||
.name = "cbox",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 8,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_ctl = HSWEP_C0_MSR_PMON_CTL0,
|
||||
.perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
|
||||
.event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
|
||||
.msr_offset = HSWEP_CBO_MSR_OFFSET,
|
||||
.num_shared_regs = 1,
|
||||
.constraints = bdx_uncore_cbox_constraints,
|
||||
.ops = &hswep_uncore_cbox_ops,
|
||||
.format_group = &hswep_uncore_cbox_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *bdx_msr_uncores[] = {
|
||||
&bdx_uncore_ubox,
|
||||
&bdx_uncore_cbox,
|
||||
&hswep_uncore_pcu,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void bdx_uncore_cpu_init(void)
|
||||
{
|
||||
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
|
||||
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
|
||||
uncore_msr_uncores = bdx_msr_uncores;
|
||||
}
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_ha = {
|
||||
.name = "ha",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
SNBEP_UNCORE_PCI_COMMON_INIT(),
|
||||
};
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 5,
|
||||
.num_boxes = 2,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
|
||||
.fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
|
||||
.event_descs = hswep_uncore_imc_events,
|
||||
SNBEP_UNCORE_PCI_COMMON_INIT(),
|
||||
};
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_irp = {
|
||||
.name = "irp",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = SNBEP_PCI_PMON_BOX_CTL,
|
||||
.ops = &hswep_uncore_irp_ops,
|
||||
.format_group = &snbep_uncore_format_group,
|
||||
};
|
||||
|
||||
|
||||
static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type bdx_uncore_r2pcie = {
|
||||
.name = "r2pcie",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.constraints = bdx_uncore_r2pcie_constraints,
|
||||
SNBEP_UNCORE_PCI_COMMON_INIT(),
|
||||
};
|
||||
|
||||
enum {
|
||||
BDX_PCI_UNCORE_HA,
|
||||
BDX_PCI_UNCORE_IMC,
|
||||
BDX_PCI_UNCORE_IRP,
|
||||
BDX_PCI_UNCORE_R2PCIE,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *bdx_pci_uncores[] = {
|
||||
[BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
|
||||
[BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
|
||||
[BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
|
||||
[BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
|
||||
{ /* Home Agent 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
|
||||
},
|
||||
{ /* MC0 Channel 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* MC0 Channel 1 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
|
||||
},
|
||||
{ /* IRP */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
|
||||
},
|
||||
{ /* R2PCIe */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ }
|
||||
};
|
||||
|
||||
static struct pci_driver bdx_uncore_pci_driver = {
|
||||
.name = "bdx_uncore",
|
||||
.id_table = bdx_uncore_pci_ids,
|
||||
};
|
||||
|
||||
int bdx_uncore_pci_init(void)
|
||||
{
|
||||
int ret = snbep_pci2phy_map_init(0x6f1e);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
uncore_pci_uncores = bdx_pci_uncores;
|
||||
uncore_pci_driver = &bdx_uncore_pci_driver;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* end of BDX-DE uncore support */
|
||||
|
242
arch/x86/kernel/cpu/perf_event_msr.c
普通文件
242
arch/x86/kernel/cpu/perf_event_msr.c
普通文件
@@ -0,0 +1,242 @@
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
enum perf_msr_id {
|
||||
PERF_MSR_TSC = 0,
|
||||
PERF_MSR_APERF = 1,
|
||||
PERF_MSR_MPERF = 2,
|
||||
PERF_MSR_PPERF = 3,
|
||||
PERF_MSR_SMI = 4,
|
||||
|
||||
PERF_MSR_EVENT_MAX,
|
||||
};
|
||||
|
||||
bool test_aperfmperf(int idx)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_APERFMPERF);
|
||||
}
|
||||
|
||||
bool test_intel(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_MSR_SMI)
|
||||
return true;
|
||||
break;
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct perf_msr {
|
||||
u64 msr;
|
||||
struct perf_pmu_events_attr *attr;
|
||||
bool (*test)(int idx);
|
||||
};
|
||||
|
||||
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
||||
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
||||
|
||||
static struct perf_msr msr[] = {
|
||||
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
||||
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
||||
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
||||
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
||||
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
||||
};
|
||||
|
||||
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = events_attrs,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-63");
|
||||
static struct attribute *format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
static struct attribute_group format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = format_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *attr_groups[] = {
|
||||
&events_attr_group,
|
||||
&format_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int msr_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
if (cfg >= PERF_MSR_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
if (!msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
|
||||
event->hw.idx = -1;
|
||||
event->hw.event_base = msr[cfg].msr;
|
||||
event->hw.config = cfg;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 msr_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 now;
|
||||
|
||||
if (event->hw.event_base)
|
||||
rdmsrl(event->hw.event_base, now);
|
||||
else
|
||||
rdtscll(now);
|
||||
|
||||
return now;
|
||||
}
|
||||
static void msr_event_update(struct perf_event *event)
|
||||
{
|
||||
u64 prev, now;
|
||||
s64 delta;
|
||||
|
||||
/* Careful, an NMI might modify the previous event value. */
|
||||
again:
|
||||
prev = local64_read(&event->hw.prev_count);
|
||||
now = msr_read_counter(event);
|
||||
|
||||
if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
|
||||
goto again;
|
||||
|
||||
delta = now - prev;
|
||||
if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
|
||||
delta <<= 32;
|
||||
delta >>= 32; /* sign extend */
|
||||
}
|
||||
local64_add(now - prev, &event->count);
|
||||
}
|
||||
|
||||
static void msr_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
u64 now;
|
||||
|
||||
now = msr_read_counter(event);
|
||||
local64_set(&event->hw.prev_count, now);
|
||||
}
|
||||
|
||||
static void msr_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
msr_event_update(event);
|
||||
}
|
||||
|
||||
static void msr_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
msr_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int msr_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
if (flags & PERF_EF_START)
|
||||
msr_event_start(event, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pmu pmu_msr = {
|
||||
.task_ctx_nr = perf_sw_context,
|
||||
.attr_groups = attr_groups,
|
||||
.event_init = msr_event_init,
|
||||
.add = msr_event_add,
|
||||
.del = msr_event_del,
|
||||
.start = msr_event_start,
|
||||
.stop = msr_event_stop,
|
||||
.read = msr_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
static int __init msr_init(void)
|
||||
{
|
||||
int i, j = 0;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC)) {
|
||||
pr_cont("no MSR PMU driver.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Probe the MSRs. */
|
||||
for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
|
||||
u64 val;
|
||||
|
||||
/*
|
||||
* Virt sucks arse; you cannot tell if a R/O MSR is present :/
|
||||
*/
|
||||
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
|
||||
msr[i].attr = NULL;
|
||||
}
|
||||
|
||||
/* List remaining MSRs in the sysfs attrs. */
|
||||
for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
|
||||
if (msr[i].attr)
|
||||
events_attrs[j++] = &msr[i].attr->attr.attr;
|
||||
}
|
||||
events_attrs[j] = NULL;
|
||||
|
||||
perf_pmu_register(&pmu_msr, "msr", -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(msr_init);
|
@@ -170,7 +170,7 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb,
|
||||
return notifier_from_errno(err);
|
||||
}
|
||||
|
||||
static struct notifier_block __refdata cpuid_class_cpu_notifier =
|
||||
static struct notifier_block cpuid_class_cpu_notifier =
|
||||
{
|
||||
.notifier_call = cpuid_class_cpu_callback,
|
||||
};
|
||||
|
@@ -110,7 +110,7 @@ static void init_espfix_random(void)
|
||||
*/
|
||||
if (!arch_get_random_long(&rand)) {
|
||||
/* The constant is an arbitrary large prime */
|
||||
rdtscll(rand);
|
||||
rand = rdtsc();
|
||||
rand *= 0xc345c6b72fd16123UL;
|
||||
}
|
||||
|
||||
|
@@ -226,22 +226,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
|
||||
*/
|
||||
static unsigned long hpet_freq;
|
||||
|
||||
static void hpet_legacy_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt);
|
||||
static int hpet_legacy_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt);
|
||||
|
||||
/*
|
||||
* The hpet clock event device
|
||||
*/
|
||||
static struct clock_event_device hpet_clockevent = {
|
||||
.name = "hpet",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
|
||||
.set_mode = hpet_legacy_set_mode,
|
||||
.set_next_event = hpet_legacy_next_event,
|
||||
.irq = 0,
|
||||
.rating = 50,
|
||||
};
|
||||
static struct clock_event_device hpet_clockevent;
|
||||
|
||||
static void hpet_stop_counter(void)
|
||||
{
|
||||
@@ -306,64 +291,74 @@ static void hpet_legacy_clockevent_register(void)
|
||||
printk(KERN_DEBUG "hpet clockevent registered\n");
|
||||
}
|
||||
|
||||
static void hpet_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt, int timer)
|
||||
static int hpet_set_periodic(struct clock_event_device *evt, int timer)
|
||||
{
|
||||
unsigned int cfg, cmp, now;
|
||||
uint64_t delta;
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
hpet_stop_counter();
|
||||
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
|
||||
delta >>= evt->shift;
|
||||
now = hpet_readl(HPET_COUNTER);
|
||||
cmp = now + (unsigned int) delta;
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
|
||||
HPET_TN_SETVAL | HPET_TN_32BIT;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
hpet_writel(cmp, HPET_Tn_CMP(timer));
|
||||
udelay(1);
|
||||
/*
|
||||
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
|
||||
* cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
|
||||
* bit is automatically cleared after the first write.
|
||||
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
|
||||
* Publication # 24674)
|
||||
*/
|
||||
hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer));
|
||||
hpet_start_counter();
|
||||
hpet_print_config();
|
||||
break;
|
||||
hpet_stop_counter();
|
||||
delta = ((uint64_t)(NSEC_PER_SEC / HZ)) * evt->mult;
|
||||
delta >>= evt->shift;
|
||||
now = hpet_readl(HPET_COUNTER);
|
||||
cmp = now + (unsigned int)delta;
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
|
||||
HPET_TN_32BIT;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
hpet_writel(cmp, HPET_Tn_CMP(timer));
|
||||
udelay(1);
|
||||
/*
|
||||
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
|
||||
* cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
|
||||
* bit is automatically cleared after the first write.
|
||||
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
|
||||
* Publication # 24674)
|
||||
*/
|
||||
hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
|
||||
hpet_start_counter();
|
||||
hpet_print_config();
|
||||
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg &= ~HPET_TN_PERIODIC;
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
break;
|
||||
return 0;
|
||||
}
|
||||
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg &= ~HPET_TN_ENABLE;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
break;
|
||||
static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
|
||||
{
|
||||
unsigned int cfg;
|
||||
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
if (timer == 0) {
|
||||
hpet_enable_legacy_int();
|
||||
} else {
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
|
||||
disable_irq(hdev->irq);
|
||||
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
|
||||
enable_irq(hdev->irq);
|
||||
}
|
||||
hpet_print_config();
|
||||
break;
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg &= ~HPET_TN_PERIODIC;
|
||||
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hpet_shutdown(struct clock_event_device *evt, int timer)
|
||||
{
|
||||
unsigned int cfg;
|
||||
|
||||
cfg = hpet_readl(HPET_Tn_CFG(timer));
|
||||
cfg &= ~HPET_TN_ENABLE;
|
||||
hpet_writel(cfg, HPET_Tn_CFG(timer));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hpet_resume(struct clock_event_device *evt, int timer)
|
||||
{
|
||||
if (!timer) {
|
||||
hpet_enable_legacy_int();
|
||||
} else {
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
|
||||
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
|
||||
disable_irq(hdev->irq);
|
||||
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
|
||||
enable_irq(hdev->irq);
|
||||
}
|
||||
hpet_print_config();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hpet_next_event(unsigned long delta,
|
||||
@@ -403,10 +398,24 @@ static int hpet_next_event(unsigned long delta,
|
||||
return res < HPET_MIN_CYCLES ? -ETIME : 0;
|
||||
}
|
||||
|
||||
static void hpet_legacy_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
static int hpet_legacy_shutdown(struct clock_event_device *evt)
|
||||
{
|
||||
hpet_set_mode(mode, evt, 0);
|
||||
return hpet_shutdown(evt, 0);
|
||||
}
|
||||
|
||||
static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
|
||||
{
|
||||
return hpet_set_oneshot(evt, 0);
|
||||
}
|
||||
|
||||
static int hpet_legacy_set_periodic(struct clock_event_device *evt)
|
||||
{
|
||||
return hpet_set_periodic(evt, 0);
|
||||
}
|
||||
|
||||
static int hpet_legacy_resume(struct clock_event_device *evt)
|
||||
{
|
||||
return hpet_resume(evt, 0);
|
||||
}
|
||||
|
||||
static int hpet_legacy_next_event(unsigned long delta,
|
||||
@@ -415,6 +424,22 @@ static int hpet_legacy_next_event(unsigned long delta,
|
||||
return hpet_next_event(delta, evt, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The hpet clock event device
|
||||
*/
|
||||
static struct clock_event_device hpet_clockevent = {
|
||||
.name = "hpet",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC |
|
||||
CLOCK_EVT_FEAT_ONESHOT,
|
||||
.set_state_periodic = hpet_legacy_set_periodic,
|
||||
.set_state_oneshot = hpet_legacy_set_oneshot,
|
||||
.set_state_shutdown = hpet_legacy_shutdown,
|
||||
.tick_resume = hpet_legacy_resume,
|
||||
.set_next_event = hpet_legacy_next_event,
|
||||
.irq = 0,
|
||||
.rating = 50,
|
||||
};
|
||||
|
||||
/*
|
||||
* HPET MSI Support
|
||||
*/
|
||||
@@ -426,7 +451,7 @@ static struct irq_domain *hpet_domain;
|
||||
|
||||
void hpet_msi_unmask(struct irq_data *data)
|
||||
{
|
||||
struct hpet_dev *hdev = data->handler_data;
|
||||
struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
|
||||
unsigned int cfg;
|
||||
|
||||
/* unmask it */
|
||||
@@ -437,7 +462,7 @@ void hpet_msi_unmask(struct irq_data *data)
|
||||
|
||||
void hpet_msi_mask(struct irq_data *data)
|
||||
{
|
||||
struct hpet_dev *hdev = data->handler_data;
|
||||
struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
|
||||
unsigned int cfg;
|
||||
|
||||
/* mask it */
|
||||
@@ -459,11 +484,32 @@ void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
|
||||
msg->address_hi = 0;
|
||||
}
|
||||
|
||||
static void hpet_msi_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
static int hpet_msi_shutdown(struct clock_event_device *evt)
|
||||
{
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
hpet_set_mode(mode, evt, hdev->num);
|
||||
|
||||
return hpet_shutdown(evt, hdev->num);
|
||||
}
|
||||
|
||||
static int hpet_msi_set_oneshot(struct clock_event_device *evt)
|
||||
{
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
|
||||
return hpet_set_oneshot(evt, hdev->num);
|
||||
}
|
||||
|
||||
static int hpet_msi_set_periodic(struct clock_event_device *evt)
|
||||
{
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
|
||||
return hpet_set_periodic(evt, hdev->num);
|
||||
}
|
||||
|
||||
static int hpet_msi_resume(struct clock_event_device *evt)
|
||||
{
|
||||
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
|
||||
|
||||
return hpet_resume(evt, hdev->num);
|
||||
}
|
||||
|
||||
static int hpet_msi_next_event(unsigned long delta,
|
||||
@@ -523,10 +569,14 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
|
||||
|
||||
evt->rating = 110;
|
||||
evt->features = CLOCK_EVT_FEAT_ONESHOT;
|
||||
if (hdev->flags & HPET_DEV_PERI_CAP)
|
||||
if (hdev->flags & HPET_DEV_PERI_CAP) {
|
||||
evt->features |= CLOCK_EVT_FEAT_PERIODIC;
|
||||
evt->set_state_periodic = hpet_msi_set_periodic;
|
||||
}
|
||||
|
||||
evt->set_mode = hpet_msi_set_mode;
|
||||
evt->set_state_shutdown = hpet_msi_shutdown;
|
||||
evt->set_state_oneshot = hpet_msi_set_oneshot;
|
||||
evt->tick_resume = hpet_msi_resume;
|
||||
evt->set_next_event = hpet_msi_next_event;
|
||||
evt->cpumask = cpumask_of(hdev->cpu);
|
||||
|
||||
@@ -735,7 +785,7 @@ static int hpet_clocksource_register(void)
|
||||
|
||||
/* Verify whether hpet counter works */
|
||||
t1 = hpet_readl(HPET_COUNTER);
|
||||
rdtscll(start);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
@@ -745,7 +795,7 @@ static int hpet_clocksource_register(void)
|
||||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
rdtscll(now);
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
if (t1 == hpet_readl(HPET_COUNTER)) {
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kernel.h>
|
||||
@@ -179,7 +180,11 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
|
||||
va = info->address;
|
||||
len = bp->attr.bp_len;
|
||||
|
||||
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
|
||||
/*
|
||||
* We don't need to worry about va + len - 1 overflowing:
|
||||
* we already require that va is aligned to a multiple of len.
|
||||
*/
|
||||
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
|
||||
}
|
||||
|
||||
int arch_bp_generic_fields(int x86_len, int x86_type,
|
||||
@@ -243,6 +248,20 @@ static int arch_build_bp_info(struct perf_event *bp)
|
||||
info->type = X86_BREAKPOINT_RW;
|
||||
break;
|
||||
case HW_BREAKPOINT_X:
|
||||
/*
|
||||
* We don't allow kernel breakpoints in places that are not
|
||||
* acceptable for kprobes. On non-kprobes kernels, we don't
|
||||
* allow kernel breakpoints at all.
|
||||
*/
|
||||
if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
|
||||
#ifdef CONFIG_KPROBES
|
||||
if (within_kprobe_blacklist(bp->attr.bp_addr))
|
||||
return -EINVAL;
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
info->type = X86_BREAKPOINT_EXECUTE;
|
||||
/*
|
||||
* x86 inst breakpoints need to have a specific undefined len.
|
||||
@@ -276,8 +295,18 @@ static int arch_build_bp_info(struct perf_event *bp)
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
/* AMD range breakpoint */
|
||||
if (!is_power_of_2(bp->attr.bp_len))
|
||||
return -EINVAL;
|
||||
if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
|
||||
return -EINVAL;
|
||||
/*
|
||||
* It's impossible to use a range breakpoint to fake out
|
||||
* user vs kernel detection because bp_len - 1 can't
|
||||
* have the high bit set. If we ever allow range instruction
|
||||
* breakpoints, then we'll have to check for kprobe-blacklisted
|
||||
* addresses anywhere in the range.
|
||||
*/
|
||||
if (!cpu_has_bpext)
|
||||
return -EOPNOTSUPP;
|
||||
info->mask = bp->attr.bp_len - 1;
|
||||
|
@@ -34,7 +34,7 @@ static int __init init_pit_clocksource(void)
|
||||
* - when local APIC timer is active (PIT is switched off)
|
||||
*/
|
||||
if (num_possible_cpus() > 1 || is_hpet_enabled() ||
|
||||
i8253_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
|
||||
!clockevent_state_periodic(&i8253_clockevent))
|
||||
return 0;
|
||||
|
||||
return clocksource_i8253_init();
|
||||
|
@@ -1,328 +0,0 @@
|
||||
/*
|
||||
* IOSF-SB MailBox Interface Driver
|
||||
* Copyright (c) 2013, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*
|
||||
* The IOSF-SB is a fabric bus available on Atom based SOC's that uses a
|
||||
* mailbox interface (MBI) to communicate with mutiple devices. This
|
||||
* driver implements access to this interface for those platforms that can
|
||||
* enumerate the device using PCI.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/capability.h>
|
||||
|
||||
#include <asm/iosf_mbi.h>
|
||||
|
||||
#define PCI_DEVICE_ID_BAYTRAIL 0x0F00
|
||||
#define PCI_DEVICE_ID_BRASWELL 0x2280
|
||||
#define PCI_DEVICE_ID_QUARK_X1000 0x0958
|
||||
|
||||
static DEFINE_SPINLOCK(iosf_mbi_lock);
|
||||
|
||||
static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
|
||||
{
|
||||
return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE;
|
||||
}
|
||||
|
||||
static struct pci_dev *mbi_pdev; /* one mbi device */
|
||||
|
||||
static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr)
|
||||
{
|
||||
int result;
|
||||
|
||||
if (!mbi_pdev)
|
||||
return -ENODEV;
|
||||
|
||||
if (mcrx) {
|
||||
result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
|
||||
mcrx);
|
||||
if (result < 0)
|
||||
goto fail_read;
|
||||
}
|
||||
|
||||
result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
|
||||
if (result < 0)
|
||||
goto fail_read;
|
||||
|
||||
result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
|
||||
if (result < 0)
|
||||
goto fail_read;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_read:
|
||||
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr)
|
||||
{
|
||||
int result;
|
||||
|
||||
if (!mbi_pdev)
|
||||
return -ENODEV;
|
||||
|
||||
result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr);
|
||||
if (result < 0)
|
||||
goto fail_write;
|
||||
|
||||
if (mcrx) {
|
||||
result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET,
|
||||
mcrx);
|
||||
if (result < 0)
|
||||
goto fail_write;
|
||||
}
|
||||
|
||||
result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr);
|
||||
if (result < 0)
|
||||
goto fail_write;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_write:
|
||||
dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
|
||||
{
|
||||
u32 mcr, mcrx;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/*Access to the GFX unit is handled by GPU code */
|
||||
if (port == BT_MBI_UNIT_GFX) {
|
||||
WARN_ON(1);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
|
||||
mcrx = offset & MBI_MASK_HI;
|
||||
|
||||
spin_lock_irqsave(&iosf_mbi_lock, flags);
|
||||
ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr);
|
||||
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(iosf_mbi_read);
|
||||
|
||||
int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
|
||||
{
|
||||
u32 mcr, mcrx;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/*Access to the GFX unit is handled by GPU code */
|
||||
if (port == BT_MBI_UNIT_GFX) {
|
||||
WARN_ON(1);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
|
||||
mcrx = offset & MBI_MASK_HI;
|
||||
|
||||
spin_lock_irqsave(&iosf_mbi_lock, flags);
|
||||
ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr);
|
||||
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(iosf_mbi_write);
|
||||
|
||||
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
|
||||
{
|
||||
u32 mcr, mcrx;
|
||||
u32 value;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/*Access to the GFX unit is handled by GPU code */
|
||||
if (port == BT_MBI_UNIT_GFX) {
|
||||
WARN_ON(1);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO);
|
||||
mcrx = offset & MBI_MASK_HI;
|
||||
|
||||
spin_lock_irqsave(&iosf_mbi_lock, flags);
|
||||
|
||||
/* Read current mdr value */
|
||||
ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value);
|
||||
if (ret < 0) {
|
||||
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Apply mask */
|
||||
value &= ~mask;
|
||||
mdr &= mask;
|
||||
value |= mdr;
|
||||
|
||||
/* Write back */
|
||||
ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value);
|
||||
|
||||
spin_unlock_irqrestore(&iosf_mbi_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(iosf_mbi_modify);
|
||||
|
||||
bool iosf_mbi_available(void)
|
||||
{
|
||||
/* Mbi isn't hot-pluggable. No remove routine is provided */
|
||||
return mbi_pdev;
|
||||
}
|
||||
EXPORT_SYMBOL(iosf_mbi_available);
|
||||
|
||||
#ifdef CONFIG_IOSF_MBI_DEBUG
|
||||
static u32 dbg_mdr;
|
||||
static u32 dbg_mcr;
|
||||
static u32 dbg_mcrx;
|
||||
|
||||
static int mcr_get(void *data, u64 *val)
|
||||
{
|
||||
*val = *(u32 *)data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mcr_set(void *data, u64 val)
|
||||
{
|
||||
u8 command = ((u32)val & 0xFF000000) >> 24,
|
||||
port = ((u32)val & 0x00FF0000) >> 16,
|
||||
offset = ((u32)val & 0x0000FF00) >> 8;
|
||||
int err;
|
||||
|
||||
*(u32 *)data = val;
|
||||
|
||||
if (!capable(CAP_SYS_RAWIO))
|
||||
return -EACCES;
|
||||
|
||||
if (command & 1u)
|
||||
err = iosf_mbi_write(port,
|
||||
command,
|
||||
dbg_mcrx | offset,
|
||||
dbg_mdr);
|
||||
else
|
||||
err = iosf_mbi_read(port,
|
||||
command,
|
||||
dbg_mcrx | offset,
|
||||
&dbg_mdr);
|
||||
|
||||
return err;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(iosf_mcr_fops, mcr_get, mcr_set , "%llx\n");
|
||||
|
||||
static struct dentry *iosf_dbg;
|
||||
|
||||
static void iosf_sideband_debug_init(void)
|
||||
{
|
||||
struct dentry *d;
|
||||
|
||||
iosf_dbg = debugfs_create_dir("iosf_sb", NULL);
|
||||
if (IS_ERR_OR_NULL(iosf_dbg))
|
||||
return;
|
||||
|
||||
/* mdr */
|
||||
d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr);
|
||||
if (IS_ERR_OR_NULL(d))
|
||||
goto cleanup;
|
||||
|
||||
/* mcrx */
|
||||
debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
|
||||
if (IS_ERR_OR_NULL(d))
|
||||
goto cleanup;
|
||||
|
||||
/* mcr - initiates mailbox tranaction */
|
||||
debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
|
||||
if (IS_ERR_OR_NULL(d))
|
||||
goto cleanup;
|
||||
|
||||
return;
|
||||
|
||||
cleanup:
|
||||
debugfs_remove_recursive(d);
|
||||
}
|
||||
|
||||
static void iosf_debugfs_init(void)
|
||||
{
|
||||
iosf_sideband_debug_init();
|
||||
}
|
||||
|
||||
static void iosf_debugfs_remove(void)
|
||||
{
|
||||
debugfs_remove_recursive(iosf_dbg);
|
||||
}
|
||||
#else
|
||||
static inline void iosf_debugfs_init(void) { }
|
||||
static inline void iosf_debugfs_remove(void) { }
|
||||
#endif /* CONFIG_IOSF_MBI_DEBUG */
|
||||
|
||||
static int iosf_mbi_probe(struct pci_dev *pdev,
|
||||
const struct pci_device_id *unused)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = pci_enable_device(pdev);
|
||||
if (ret < 0) {
|
||||
dev_err(&pdev->dev, "error: could not enable device\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbi_pdev = pci_dev_get(pdev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct pci_device_id iosf_mbi_pci_ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BRASWELL) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) },
|
||||
{ 0, },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids);
|
||||
|
||||
static struct pci_driver iosf_mbi_pci_driver = {
|
||||
.name = "iosf_mbi_pci",
|
||||
.probe = iosf_mbi_probe,
|
||||
.id_table = iosf_mbi_pci_ids,
|
||||
};
|
||||
|
||||
static int __init iosf_mbi_init(void)
|
||||
{
|
||||
iosf_debugfs_init();
|
||||
|
||||
return pci_register_driver(&iosf_mbi_pci_driver);
|
||||
}
|
||||
|
||||
static void __exit iosf_mbi_exit(void)
|
||||
{
|
||||
iosf_debugfs_remove();
|
||||
|
||||
pci_unregister_driver(&iosf_mbi_pci_driver);
|
||||
if (mbi_pdev) {
|
||||
pci_dev_put(mbi_pdev);
|
||||
mbi_pdev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
module_init(iosf_mbi_init);
|
||||
module_exit(iosf_mbi_exit);
|
||||
|
||||
MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("IOSF Mailbox Interface accessor");
|
||||
MODULE_LICENSE("GPL v2");
|
@@ -139,10 +139,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
||||
seq_puts(p, " Machine check polls\n");
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
|
||||
seq_printf(p, "%*s: ", prec, "HYP");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
|
||||
seq_puts(p, " Hypervisor callback interrupts\n");
|
||||
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
|
||||
seq_printf(p, "%*s: ", prec, "HYP");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ",
|
||||
irq_stats(j)->irq_hv_callback_count);
|
||||
seq_puts(p, " Hypervisor callback interrupts\n");
|
||||
}
|
||||
#endif
|
||||
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
|
||||
#if defined(CONFIG_X86_IO_APIC)
|
||||
@@ -211,24 +214,38 @@ u64 arch_irq_stat(void)
|
||||
__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
struct irq_desc * desc;
|
||||
/* high bit used in ret_from_ code */
|
||||
unsigned vector = ~regs->orig_ax;
|
||||
unsigned irq;
|
||||
|
||||
/*
|
||||
* NB: Unlike exception entries, IRQ entries do not reliably
|
||||
* handle context tracking in the low-level entry code. This is
|
||||
* because syscall entries execute briefly with IRQs on before
|
||||
* updating context tracking state, so we can take an IRQ from
|
||||
* kernel mode with CONTEXT_USER. The low-level entry code only
|
||||
* updates the context if we came from user mode, so we won't
|
||||
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
||||
* code is cleaned up enough that we can cleanly defer enabling
|
||||
* IRQs.
|
||||
*/
|
||||
|
||||
entering_irq();
|
||||
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
|
||||
|
||||
if (!handle_irq(irq, regs)) {
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
|
||||
if (!handle_irq(desc, regs)) {
|
||||
ack_APIC_irq();
|
||||
|
||||
if (irq != VECTOR_RETRIGGERED) {
|
||||
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
|
||||
if (desc != VECTOR_RETRIGGERED) {
|
||||
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
|
||||
__func__, smp_processor_id(),
|
||||
vector, irq);
|
||||
vector);
|
||||
} else {
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,10 +347,10 @@ static struct cpumask affinity_new, online_new;
|
||||
*/
|
||||
int check_irq_vectors_for_cpu_disable(void)
|
||||
{
|
||||
int irq, cpu;
|
||||
unsigned int this_cpu, vector, this_count, count;
|
||||
struct irq_desc *desc;
|
||||
struct irq_data *data;
|
||||
int cpu;
|
||||
|
||||
this_cpu = smp_processor_id();
|
||||
cpumask_copy(&online_new, cpu_online_mask);
|
||||
@@ -341,47 +358,43 @@ int check_irq_vectors_for_cpu_disable(void)
|
||||
|
||||
this_count = 0;
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
if (irq >= 0) {
|
||||
desc = irq_to_desc(irq);
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Protect against concurrent action removal,
|
||||
* affinity changes etc.
|
||||
*/
|
||||
raw_spin_lock(&desc->lock);
|
||||
data = irq_desc_get_irq_data(desc);
|
||||
cpumask_copy(&affinity_new, data->affinity);
|
||||
cpumask_clear_cpu(this_cpu, &affinity_new);
|
||||
|
||||
/* Do not count inactive or per-cpu irqs. */
|
||||
if (!irq_has_action(irq) || irqd_is_per_cpu(data)) {
|
||||
raw_spin_unlock(&desc->lock);
|
||||
continue;
|
||||
}
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
continue;
|
||||
/*
|
||||
* Protect against concurrent action removal, affinity
|
||||
* changes etc.
|
||||
*/
|
||||
raw_spin_lock(&desc->lock);
|
||||
data = irq_desc_get_irq_data(desc);
|
||||
cpumask_copy(&affinity_new,
|
||||
irq_data_get_affinity_mask(data));
|
||||
cpumask_clear_cpu(this_cpu, &affinity_new);
|
||||
|
||||
/* Do not count inactive or per-cpu irqs. */
|
||||
if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
|
||||
raw_spin_unlock(&desc->lock);
|
||||
/*
|
||||
* A single irq may be mapped to multiple
|
||||
* cpu's vector_irq[] (for example IOAPIC cluster
|
||||
* mode). In this case we have two
|
||||
* possibilities:
|
||||
*
|
||||
* 1) the resulting affinity mask is empty; that is
|
||||
* this the down'd cpu is the last cpu in the irq's
|
||||
* affinity mask, or
|
||||
*
|
||||
* 2) the resulting affinity mask is no longer
|
||||
* a subset of the online cpus but the affinity
|
||||
* mask is not zero; that is the down'd cpu is the
|
||||
* last online cpu in a user set affinity mask.
|
||||
*/
|
||||
if (cpumask_empty(&affinity_new) ||
|
||||
!cpumask_subset(&affinity_new, &online_new))
|
||||
this_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&desc->lock);
|
||||
/*
|
||||
* A single irq may be mapped to multiple cpu's
|
||||
* vector_irq[] (for example IOAPIC cluster mode). In
|
||||
* this case we have two possibilities:
|
||||
*
|
||||
* 1) the resulting affinity mask is empty; that is
|
||||
* this the down'd cpu is the last cpu in the irq's
|
||||
* affinity mask, or
|
||||
*
|
||||
* 2) the resulting affinity mask is no longer a
|
||||
* subset of the online cpus but the affinity mask is
|
||||
* not zero; that is the down'd cpu is the last online
|
||||
* cpu in a user set affinity mask.
|
||||
*/
|
||||
if (cpumask_empty(&affinity_new) ||
|
||||
!cpumask_subset(&affinity_new, &online_new))
|
||||
this_count++;
|
||||
}
|
||||
|
||||
count = 0;
|
||||
@@ -400,8 +413,8 @@ int check_irq_vectors_for_cpu_disable(void)
|
||||
for (vector = FIRST_EXTERNAL_VECTOR;
|
||||
vector < first_system_vector; vector++) {
|
||||
if (!test_bit(vector, used_vectors) &&
|
||||
per_cpu(vector_irq, cpu)[vector] < 0)
|
||||
count++;
|
||||
IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -437,7 +450,7 @@ void fixup_irqs(void)
|
||||
raw_spin_lock(&desc->lock);
|
||||
|
||||
data = irq_desc_get_irq_data(desc);
|
||||
affinity = data->affinity;
|
||||
affinity = irq_data_get_affinity_mask(data);
|
||||
if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
|
||||
cpumask_subset(affinity, cpu_online_mask)) {
|
||||
raw_spin_unlock(&desc->lock);
|
||||
@@ -505,14 +518,13 @@ void fixup_irqs(void)
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
||||
unsigned int irr;
|
||||
|
||||
if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED)
|
||||
if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
|
||||
continue;
|
||||
|
||||
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
||||
if (irr & (1 << (vector % 32))) {
|
||||
irq = __this_cpu_read(vector_irq[vector]);
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
raw_spin_lock(&desc->lock);
|
||||
data = irq_desc_get_irq_data(desc);
|
||||
chip = irq_data_get_irq_chip(data);
|
||||
@@ -523,7 +535,7 @@ void fixup_irqs(void)
|
||||
raw_spin_unlock(&desc->lock);
|
||||
}
|
||||
if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
|
||||
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -148,21 +148,21 @@ void do_softirq_own_stack(void)
|
||||
call_on_stack(__do_softirq, isp);
|
||||
}
|
||||
|
||||
bool handle_irq(unsigned irq, struct pt_regs *regs)
|
||||
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned int irq;
|
||||
int overflow;
|
||||
|
||||
overflow = check_stack_overflow();
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (unlikely(!desc))
|
||||
if (IS_ERR_OR_NULL(desc))
|
||||
return false;
|
||||
|
||||
irq = irq_desc_get_irq(desc);
|
||||
if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
|
||||
if (unlikely(overflow))
|
||||
print_stack_overflow();
|
||||
desc->handle_irq(irq, desc);
|
||||
generic_handle_irq_desc(irq, desc);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -68,16 +68,13 @@ static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
bool handle_irq(unsigned irq, struct pt_regs *regs)
|
||||
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
|
||||
stack_overflow_check(regs);
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (unlikely(!desc))
|
||||
if (unlikely(IS_ERR_OR_NULL(desc)))
|
||||
return false;
|
||||
|
||||
generic_handle_irq_desc(irq, desc);
|
||||
generic_handle_irq_desc(irq_desc_get_irq(desc), desc);
|
||||
return true;
|
||||
}
|
||||
|
@@ -52,7 +52,7 @@ static struct irqaction irq2 = {
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
|
||||
[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
|
||||
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
|
||||
};
|
||||
|
||||
int vector_used_by_percpu_irq(unsigned int vector)
|
||||
@@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED)
|
||||
if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ void __init init_IRQ(void)
|
||||
* irq's migrate etc.
|
||||
*/
|
||||
for (i = 0; i < nr_legacy_irqs(); i++)
|
||||
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i;
|
||||
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
|
||||
|
||||
x86_init.irqs.intr_init();
|
||||
}
|
||||
|
@@ -223,9 +223,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
|
||||
memset(¶ms->hd0_info, 0, sizeof(params->hd0_info));
|
||||
memset(¶ms->hd1_info, 0, sizeof(params->hd1_info));
|
||||
|
||||
/* Default sysdesc table */
|
||||
params->sys_desc_table.length = 0;
|
||||
|
||||
if (image->type == KEXEC_TYPE_CRASH) {
|
||||
ret = crash_setup_memmap_entries(image, params);
|
||||
if (ret)
|
||||
|
@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
|
||||
a->handler, whole_msecs, decimal_msecs);
|
||||
}
|
||||
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs)
|
||||
{
|
||||
struct nmi_desc *desc = nmi_to_desc(type);
|
||||
struct nmiaction *a;
|
||||
@@ -213,7 +213,7 @@ static void
|
||||
pci_serr_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_SERR, regs, false))
|
||||
if (nmi_handle(NMI_SERR, regs))
|
||||
return;
|
||||
|
||||
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
|
||||
@@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
|
||||
unsigned long i;
|
||||
|
||||
/* check to see if anyone registered against these types of errors */
|
||||
if (nmi_handle(NMI_IO_CHECK, regs, false))
|
||||
if (nmi_handle(NMI_IO_CHECK, regs))
|
||||
return;
|
||||
|
||||
pr_emerg(
|
||||
@@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
||||
* as only the first one is ever run (unless it can actually determine
|
||||
* if it caused the NMI)
|
||||
*/
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs, false);
|
||||
handled = nmi_handle(NMI_UNKNOWN, regs);
|
||||
if (handled) {
|
||||
__this_cpu_add(nmi_stats.unknown, handled);
|
||||
return;
|
||||
@@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
|
||||
__this_cpu_write(last_nmi_rip, regs->ip);
|
||||
|
||||
handled = nmi_handle(NMI_LOCAL, regs, b2b);
|
||||
handled = nmi_handle(NMI_LOCAL, regs);
|
||||
__this_cpu_add(nmi_stats.normal, handled);
|
||||
if (handled) {
|
||||
/*
|
||||
|
@@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.wbinvd = native_wbinvd,
|
||||
.read_msr = native_read_msr_safe,
|
||||
.write_msr = native_write_msr_safe,
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
.read_tscp = native_read_tscp,
|
||||
.load_tr_desc = native_load_tr_desc,
|
||||
.set_ldt = native_set_ldt,
|
||||
.load_gdt = native_load_gdt,
|
||||
|
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
|
||||
@@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_cpu_ops, clts);
|
||||
PATCH_SITE(pv_cpu_ops, read_tsc);
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock()) {
|
||||
|
@@ -1,371 +0,0 @@
|
||||
/*
|
||||
* Intel Atom SOC Power Management Controller Driver
|
||||
* Copyright (c) 2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#include <asm/pmc_atom.h>
|
||||
|
||||
struct pmc_dev {
|
||||
u32 base_addr;
|
||||
void __iomem *regmap;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *dbgfs_dir;
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
};
|
||||
|
||||
static struct pmc_dev pmc_device;
|
||||
static u32 acpi_base_addr;
|
||||
|
||||
struct pmc_bit_map {
|
||||
const char *name;
|
||||
u32 bit_mask;
|
||||
};
|
||||
|
||||
static const struct pmc_bit_map dev_map[] = {
|
||||
{"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA},
|
||||
{"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1},
|
||||
{"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2},
|
||||
{"3 - LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1},
|
||||
{"4 - LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2},
|
||||
{"5 - LPSS1_F5_SPI", BIT_LPSS1_F5_SPI},
|
||||
{"6 - LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX},
|
||||
{"7 - LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX},
|
||||
{"8 - SCC_EMMC", BIT_SCC_EMMC},
|
||||
{"9 - SCC_SDIO", BIT_SCC_SDIO},
|
||||
{"10 - SCC_SDCARD", BIT_SCC_SDCARD},
|
||||
{"11 - SCC_MIPI", BIT_SCC_MIPI},
|
||||
{"12 - HDA", BIT_HDA},
|
||||
{"13 - LPE", BIT_LPE},
|
||||
{"14 - OTG", BIT_OTG},
|
||||
{"15 - USH", BIT_USH},
|
||||
{"16 - GBE", BIT_GBE},
|
||||
{"17 - SATA", BIT_SATA},
|
||||
{"18 - USB_EHCI", BIT_USB_EHCI},
|
||||
{"19 - SEC", BIT_SEC},
|
||||
{"20 - PCIE_PORT0", BIT_PCIE_PORT0},
|
||||
{"21 - PCIE_PORT1", BIT_PCIE_PORT1},
|
||||
{"22 - PCIE_PORT2", BIT_PCIE_PORT2},
|
||||
{"23 - PCIE_PORT3", BIT_PCIE_PORT3},
|
||||
{"24 - LPSS2_F0_DMA", BIT_LPSS2_F0_DMA},
|
||||
{"25 - LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1},
|
||||
{"26 - LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2},
|
||||
{"27 - LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3},
|
||||
{"28 - LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4},
|
||||
{"29 - LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5},
|
||||
{"30 - LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6},
|
||||
{"31 - LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7},
|
||||
{"32 - SMB", BIT_SMB},
|
||||
{"33 - OTG_SS_PHY", BIT_OTG_SS_PHY},
|
||||
{"34 - USH_SS_PHY", BIT_USH_SS_PHY},
|
||||
{"35 - DFX", BIT_DFX},
|
||||
};
|
||||
|
||||
static const struct pmc_bit_map pss_map[] = {
|
||||
{"0 - GBE", PMC_PSS_BIT_GBE},
|
||||
{"1 - SATA", PMC_PSS_BIT_SATA},
|
||||
{"2 - HDA", PMC_PSS_BIT_HDA},
|
||||
{"3 - SEC", PMC_PSS_BIT_SEC},
|
||||
{"4 - PCIE", PMC_PSS_BIT_PCIE},
|
||||
{"5 - LPSS", PMC_PSS_BIT_LPSS},
|
||||
{"6 - LPE", PMC_PSS_BIT_LPE},
|
||||
{"7 - DFX", PMC_PSS_BIT_DFX},
|
||||
{"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL},
|
||||
{"9 - USH_SUS", PMC_PSS_BIT_USH_SUS},
|
||||
{"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS},
|
||||
{"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA},
|
||||
{"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL},
|
||||
{"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS},
|
||||
{"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK},
|
||||
{"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA},
|
||||
{"16 - USB", PMC_PSS_BIT_USB},
|
||||
{"17 - USB_SUS", PMC_PSS_BIT_USB_SUS},
|
||||
};
|
||||
|
||||
static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
|
||||
{
|
||||
return readl(pmc->regmap + reg_offset);
|
||||
}
|
||||
|
||||
static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val)
|
||||
{
|
||||
writel(val, pmc->regmap + reg_offset);
|
||||
}
|
||||
|
||||
static void pmc_power_off(void)
|
||||
{
|
||||
u16 pm1_cnt_port;
|
||||
u32 pm1_cnt_value;
|
||||
|
||||
pr_info("Preparing to enter system sleep state S5\n");
|
||||
|
||||
pm1_cnt_port = acpi_base_addr + PM1_CNT;
|
||||
|
||||
pm1_cnt_value = inl(pm1_cnt_port);
|
||||
pm1_cnt_value &= SLEEP_TYPE_MASK;
|
||||
pm1_cnt_value |= SLEEP_TYPE_S5;
|
||||
pm1_cnt_value |= SLEEP_ENABLE;
|
||||
|
||||
outl(pm1_cnt_value, pm1_cnt_port);
|
||||
}
|
||||
|
||||
static void pmc_hw_reg_setup(struct pmc_dev *pmc)
|
||||
{
|
||||
/*
|
||||
* Disable PMC S0IX_WAKE_EN events coming from:
|
||||
* - LPC clock run
|
||||
* - GPIO_SUS ored dedicated IRQs
|
||||
* - GPIO_SCORE ored dedicated IRQs
|
||||
* - GPIO_SUS shared IRQ
|
||||
* - GPIO_SCORE shared IRQ
|
||||
*/
|
||||
pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static int pmc_dev_state_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct pmc_dev *pmc = s->private;
|
||||
u32 func_dis, func_dis_2, func_dis_index;
|
||||
u32 d3_sts_0, d3_sts_1, d3_sts_index;
|
||||
int dev_num, dev_index, reg_index;
|
||||
|
||||
func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS);
|
||||
func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2);
|
||||
d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0);
|
||||
d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1);
|
||||
|
||||
dev_num = ARRAY_SIZE(dev_map);
|
||||
|
||||
for (dev_index = 0; dev_index < dev_num; dev_index++) {
|
||||
reg_index = dev_index / PMC_REG_BIT_WIDTH;
|
||||
if (reg_index) {
|
||||
func_dis_index = func_dis_2;
|
||||
d3_sts_index = d3_sts_1;
|
||||
} else {
|
||||
func_dis_index = func_dis;
|
||||
d3_sts_index = d3_sts_0;
|
||||
}
|
||||
|
||||
seq_printf(s, "Dev: %-32s\tState: %s [%s]\n",
|
||||
dev_map[dev_index].name,
|
||||
dev_map[dev_index].bit_mask & func_dis_index ?
|
||||
"Disabled" : "Enabled ",
|
||||
dev_map[dev_index].bit_mask & d3_sts_index ?
|
||||
"D3" : "D0");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmc_dev_state_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pmc_dev_state_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations pmc_dev_state_ops = {
|
||||
.open = pmc_dev_state_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int pmc_pss_state_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct pmc_dev *pmc = s->private;
|
||||
u32 pss = pmc_reg_read(pmc, PMC_PSS);
|
||||
int pss_index;
|
||||
|
||||
for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) {
|
||||
seq_printf(s, "Island: %-32s\tState: %s\n",
|
||||
pss_map[pss_index].name,
|
||||
pss_map[pss_index].bit_mask & pss ? "Off" : "On");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmc_pss_state_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pmc_pss_state_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations pmc_pss_state_ops = {
|
||||
.open = pmc_pss_state_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct pmc_dev *pmc = s->private;
|
||||
u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr;
|
||||
|
||||
s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT;
|
||||
s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT;
|
||||
s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT;
|
||||
s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT;
|
||||
s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT;
|
||||
|
||||
seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr);
|
||||
seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr);
|
||||
seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr);
|
||||
seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr);
|
||||
seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pmc_sleep_tmr_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pmc_sleep_tmr_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations pmc_sleep_tmr_ops = {
|
||||
.open = pmc_sleep_tmr_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
|
||||
{
|
||||
debugfs_remove_recursive(pmc->dbgfs_dir);
|
||||
}
|
||||
|
||||
static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
|
||||
{
|
||||
struct dentry *dir, *f;
|
||||
|
||||
dir = debugfs_create_dir("pmc_atom", NULL);
|
||||
if (!dir)
|
||||
return -ENOMEM;
|
||||
|
||||
pmc->dbgfs_dir = dir;
|
||||
|
||||
f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
|
||||
dir, pmc, &pmc_dev_state_ops);
|
||||
if (!f) {
|
||||
dev_err(&pdev->dev, "dev_state register failed\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO,
|
||||
dir, pmc, &pmc_pss_state_ops);
|
||||
if (!f) {
|
||||
dev_err(&pdev->dev, "pss_state register failed\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
|
||||
dir, pmc, &pmc_sleep_tmr_ops);
|
||||
if (!f) {
|
||||
dev_err(&pdev->dev, "sleep_state register failed\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
pmc_dbgfs_unregister(pmc);
|
||||
return -ENODEV;
|
||||
}
|
||||
#else
|
||||
static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
static int pmc_setup_dev(struct pci_dev *pdev)
|
||||
{
|
||||
struct pmc_dev *pmc = &pmc_device;
|
||||
int ret;
|
||||
|
||||
/* Obtain ACPI base address */
|
||||
pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr);
|
||||
acpi_base_addr &= ACPI_BASE_ADDR_MASK;
|
||||
|
||||
/* Install power off function */
|
||||
if (acpi_base_addr != 0 && pm_power_off == NULL)
|
||||
pm_power_off = pmc_power_off;
|
||||
|
||||
pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr);
|
||||
pmc->base_addr &= PMC_BASE_ADDR_MASK;
|
||||
|
||||
pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN);
|
||||
if (!pmc->regmap) {
|
||||
dev_err(&pdev->dev, "error: ioremap failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* PMC hardware registers setup */
|
||||
pmc_hw_reg_setup(pmc);
|
||||
|
||||
ret = pmc_dbgfs_register(pmc, pdev);
|
||||
if (ret) {
|
||||
iounmap(pmc->regmap);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Data for PCI driver interface
|
||||
*
|
||||
* This data only exists for exporting the supported
|
||||
* PCI ids via MODULE_DEVICE_TABLE. We do not actually
|
||||
* register a pci_driver, because lpc_ich will register
|
||||
* a driver on the same PCI id.
|
||||
*/
|
||||
static const struct pci_device_id pmc_pci_ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_VLV_PMC) },
|
||||
{ 0, },
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(pci, pmc_pci_ids);
|
||||
|
||||
static int __init pmc_atom_init(void)
|
||||
{
|
||||
struct pci_dev *pdev = NULL;
|
||||
const struct pci_device_id *ent;
|
||||
|
||||
/* We look for our device - PCU PMC
|
||||
* we assume that there is max. one device.
|
||||
*
|
||||
* We can't use plain pci_driver mechanism,
|
||||
* as the device is really a multiple function device,
|
||||
* main driver that binds to the pci_device is lpc_ich
|
||||
* and have to find & bind to the device this way.
|
||||
*/
|
||||
for_each_pci_dev(pdev) {
|
||||
ent = pci_match_id(pmc_pci_ids, pdev);
|
||||
if (ent)
|
||||
return pmc_setup_dev(pdev);
|
||||
}
|
||||
/* Device not found. */
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
module_init(pmc_atom_init);
|
||||
/* no module_exit, this driver shouldn't be unloaded */
|
||||
|
||||
MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface");
|
||||
MODULE_LICENSE("GPL v2");
|
@@ -29,6 +29,8 @@
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
@@ -110,6 +112,8 @@ void exit_thread(void)
|
||||
kfree(bp);
|
||||
}
|
||||
|
||||
free_vm86(t);
|
||||
|
||||
fpu__drop(fpu);
|
||||
}
|
||||
|
||||
@@ -319,6 +323,7 @@ void stop_this_cpu(void *dummy)
|
||||
*/
|
||||
set_cpu_online(smp_processor_id(), false);
|
||||
disable_local_APIC();
|
||||
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
||||
|
||||
for (;;)
|
||||
halt();
|
||||
|
@@ -53,6 +53,7 @@
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
||||
|
@@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
void release_thread(struct task_struct *dead_task)
|
||||
{
|
||||
if (dead_task->mm) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
if (dead_task->mm->context.ldt) {
|
||||
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
|
||||
dead_task->comm,
|
||||
@@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task)
|
||||
dead_task->mm->context.ldt->size);
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
||||
__USER_CS, __USER_DS, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
#ifdef CONFIG_COMPAT
|
||||
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
||||
{
|
||||
start_thread_common(regs, new_ip, new_sp,
|
||||
test_thread_flag(TIF_X32)
|
||||
|
@@ -37,12 +37,10 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
enum x86_regset {
|
||||
REGSET_GENERAL,
|
||||
REGSET_FP,
|
||||
@@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static long x32_arch_ptrace(struct task_struct *child,
|
||||
compat_long_t request, compat_ulong_t caddr,
|
||||
@@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t caddr, compat_ulong_t cdata)
|
||||
{
|
||||
unsigned long addr = caddr;
|
||||
unsigned long data = cdata;
|
||||
void __user *datap = compat_ptr(data);
|
||||
int ret;
|
||||
__u32 val;
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
if (!is_ia32_task())
|
||||
return x32_arch_ptrace(child, request, caddr, cdata);
|
||||
#endif
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKUSR:
|
||||
ret = getreg32(child, addr, &val);
|
||||
if (ret == 0)
|
||||
ret = put_user(val, (__u32 __user *)datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKEUSR:
|
||||
ret = putreg32(child, addr, data);
|
||||
break;
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_GENERAL, 0,
|
||||
sizeof(struct user_regs_struct32),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_FP, 0,
|
||||
sizeof(struct user_i387_ia32_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(
|
||||
child, &user_x86_32_view, REGSET_FP,
|
||||
0, sizeof(struct user_i387_ia32_struct), datap);
|
||||
|
||||
case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */
|
||||
return copy_regset_to_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
|
||||
return copy_regset_from_user(child, &user_x86_32_view,
|
||||
REGSET_XFP, 0,
|
||||
sizeof(struct user32_fxsr_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GET_THREAD_AREA:
|
||||
case PTRACE_SET_THREAD_AREA:
|
||||
return arch_ptrace(child, request, addr, data);
|
||||
|
||||
default:
|
||||
return compat_ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
return ia32_arch_ptrace(child, request, caddr, cdata);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
@@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
|
||||
/* Send us the fake SIGTRAP */
|
||||
force_sig_info(SIGTRAP, &info, tsk);
|
||||
}
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
audit_syscall_entry(regs->orig_ax, regs->di,
|
||||
regs->si, regs->dx, regs->r10);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
audit_syscall_entry(regs->orig_ax, regs->bx,
|
||||
regs->cx, regs->dx, regs->si);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We can return 0 to resume the syscall or anything else to go to phase
|
||||
* 2. If we resume the syscall, we need to put something appropriate in
|
||||
* regs->orig_ax.
|
||||
*
|
||||
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
|
||||
* are fully functional.
|
||||
*
|
||||
* For phase 2's benefit, our return value is:
|
||||
* 0: resume the syscall
|
||||
* 1: go to phase 2; no seccomp phase 2 needed
|
||||
* anything else: go to phase 2; pass return value to seccomp
|
||||
*/
|
||||
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
u32 work;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
user_exit();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
* code, and keeping seccomp fast is probably more valuable
|
||||
* than the rest of this.
|
||||
*/
|
||||
if (work & _TIF_SECCOMP) {
|
||||
struct seccomp_data sd;
|
||||
|
||||
sd.arch = arch;
|
||||
sd.nr = regs->orig_ax;
|
||||
sd.instruction_pointer = regs->ip;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
sd.args[0] = regs->di;
|
||||
sd.args[1] = regs->si;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->r10;
|
||||
sd.args[4] = regs->r8;
|
||||
sd.args[5] = regs->r9;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
sd.args[0] = regs->bx;
|
||||
sd.args[1] = regs->cx;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->si;
|
||||
sd.args[4] = regs->di;
|
||||
sd.args[5] = regs->bp;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
|
||||
BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
|
||||
|
||||
ret = seccomp_phase1(&sd);
|
||||
if (ret == SECCOMP_PHASE1_SKIP) {
|
||||
regs->orig_ax = -1;
|
||||
ret = 0;
|
||||
} else if (ret != SECCOMP_PHASE1_OK) {
|
||||
return ret; /* Go directly to phase 2 */
|
||||
}
|
||||
|
||||
work &= ~_TIF_SECCOMP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do our best to finish without phase 2. */
|
||||
if (work == 0)
|
||||
return ret; /* seccomp and/or nohz only (ret == 0 here) */
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (work == _TIF_SYSCALL_AUDIT) {
|
||||
/*
|
||||
* If there is no more work to be done except auditing,
|
||||
* then audit in phase 1. Phase 2 always audits, so, if
|
||||
* we audit here, then we can't go on to phase 2.
|
||||
*/
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1; /* Something is enabled that we can't handle in phase 1 */
|
||||
}
|
||||
|
||||
/* Returns the syscall nr to run (which should match regs->orig_ax). */
|
||||
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
||||
unsigned long phase1_result)
|
||||
{
|
||||
long ret = 0;
|
||||
u32 work = ACCESS_ONCE(current_thread_info()->flags) &
|
||||
_TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
* they can see any changes made by a seccomp tracer.
|
||||
*/
|
||||
if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
|
||||
/* seccomp failures shouldn't expose any additional code. */
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(work & _TIF_SYSCALL_EMU))
|
||||
ret = -1L;
|
||||
|
||||
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
|
||||
tracehook_report_syscall_entry(regs))
|
||||
ret = -1L;
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_enter(regs, regs->orig_ax);
|
||||
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
|
||||
|
||||
if (phase1_result == 0)
|
||||
return regs->orig_ax;
|
||||
else
|
||||
return syscall_trace_enter_phase2(regs, arch, phase1_result);
|
||||
}
|
||||
|
||||
void syscall_trace_leave(struct pt_regs *regs)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* We may come here right after calling schedule_user()
|
||||
* or do_notify_resume(), in which case we can be in RCU
|
||||
* user mode.
|
||||
*/
|
||||
user_exit();
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_exit(regs, regs->ax);
|
||||
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter().
|
||||
*/
|
||||
step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
|
||||
!test_thread_flag(TIF_SYSCALL_EMU);
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
@@ -916,11 +916,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
#ifdef CONFIG_X86_32
|
||||
apm_info.bios = boot_params.apm_bios_info;
|
||||
ist_info = boot_params.ist_info;
|
||||
if (boot_params.sys_desc_table.length != 0) {
|
||||
machine_id = boot_params.sys_desc_table.table[0];
|
||||
machine_submodel_id = boot_params.sys_desc_table.table[1];
|
||||
BIOS_revision = boot_params.sys_desc_table.table[2];
|
||||
}
|
||||
#endif
|
||||
saved_video_mode = boot_params.hdr.vid_mode;
|
||||
bootloader_type = boot_params.hdr.type_of_loader;
|
||||
|
@@ -31,11 +31,11 @@
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/sighandling.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#include <asm/sys_ia32.h>
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#include <asm/syscall.h>
|
||||
@@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
bool stepping, failed;
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
|
||||
if (v8086_mode(regs))
|
||||
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
|
||||
|
||||
/* Are we from a system call? */
|
||||
if (syscall_get_nr(current, regs) >= 0) {
|
||||
/* If so, check system call restarting.. */
|
||||
@@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
* want to handle. Thus you cannot kill init even with a SIGKILL even by
|
||||
* mistake.
|
||||
*/
|
||||
static void do_signal(struct pt_regs *regs)
|
||||
void do_signal(struct pt_regs *regs)
|
||||
{
|
||||
struct ksignal ksig;
|
||||
|
||||
@@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs)
|
||||
restore_saved_sigmask();
|
||||
}
|
||||
|
||||
/*
|
||||
* notification of userspace execution resumption
|
||||
* - triggered by the TIF_WORK_MASK flags
|
||||
*/
|
||||
__visible void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
user_exit();
|
||||
|
||||
if (thread_info_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
||||
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
}
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
|
95
arch/x86/kernel/signal_compat.c
普通文件
95
arch/x86/kernel/signal_compat.c
普通文件
@@ -0,0 +1,95 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
|
||||
{
|
||||
int err = 0;
|
||||
bool ia32 = test_thread_flag(TIF_IA32);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
put_user_try {
|
||||
/* If you change siginfo_t structure, please make sure that
|
||||
this code is fixed accordingly.
|
||||
It should never copy any pad contained in the structure
|
||||
to avoid security leaks, but must copy the generic
|
||||
3 ints plus the relevant union member. */
|
||||
put_user_ex(from->si_signo, &to->si_signo);
|
||||
put_user_ex(from->si_errno, &to->si_errno);
|
||||
put_user_ex((short)from->si_code, &to->si_code);
|
||||
|
||||
if (from->si_code < 0) {
|
||||
put_user_ex(from->si_pid, &to->si_pid);
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
|
||||
} else {
|
||||
/*
|
||||
* First 32bits of unions are always present:
|
||||
* si_pid === si_band === si_tid === si_addr(LS half)
|
||||
*/
|
||||
put_user_ex(from->_sifields._pad[0],
|
||||
&to->_sifields._pad[0]);
|
||||
switch (from->si_code >> 16) {
|
||||
case __SI_FAULT >> 16:
|
||||
break;
|
||||
case __SI_SYS >> 16:
|
||||
put_user_ex(from->si_syscall, &to->si_syscall);
|
||||
put_user_ex(from->si_arch, &to->si_arch);
|
||||
break;
|
||||
case __SI_CHLD >> 16:
|
||||
if (ia32) {
|
||||
put_user_ex(from->si_utime, &to->si_utime);
|
||||
put_user_ex(from->si_stime, &to->si_stime);
|
||||
} else {
|
||||
put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime);
|
||||
put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime);
|
||||
}
|
||||
put_user_ex(from->si_status, &to->si_status);
|
||||
/* FALL THROUGH */
|
||||
default:
|
||||
case __SI_KILL >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
break;
|
||||
case __SI_POLL >> 16:
|
||||
put_user_ex(from->si_fd, &to->si_fd);
|
||||
break;
|
||||
case __SI_TIMER >> 16:
|
||||
put_user_ex(from->si_overrun, &to->si_overrun);
|
||||
put_user_ex(ptr_to_compat(from->si_ptr),
|
||||
&to->si_ptr);
|
||||
break;
|
||||
/* This is not generated by the kernel as of now. */
|
||||
case __SI_RT >> 16:
|
||||
case __SI_MESGQ >> 16:
|
||||
put_user_ex(from->si_uid, &to->si_uid);
|
||||
put_user_ex(from->si_int, &to->si_int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} put_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
|
||||
{
|
||||
int err = 0;
|
||||
u32 ptr32;
|
||||
|
||||
if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
get_user_try {
|
||||
get_user_ex(to->si_signo, &from->si_signo);
|
||||
get_user_ex(to->si_errno, &from->si_errno);
|
||||
get_user_ex(to->si_code, &from->si_code);
|
||||
|
||||
get_user_ex(to->si_pid, &from->si_pid);
|
||||
get_user_ex(to->si_uid, &from->si_uid);
|
||||
get_user_ex(ptr32, &from->si_ptr);
|
||||
to->si_ptr = compat_ptr(ptr32);
|
||||
} get_user_catch(err);
|
||||
|
||||
return err;
|
||||
}
|
@@ -30,6 +30,7 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
/*
|
||||
* Some notes on x86 processor bugs affecting SMP operation:
|
||||
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
|
||||
finish:
|
||||
local_irq_save(flags);
|
||||
disable_local_APIC();
|
||||
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@@ -97,8 +97,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
|
||||
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_info);
|
||||
|
||||
atomic_t init_deasserted;
|
||||
|
||||
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -146,16 +144,11 @@ static void smp_callin(void)
|
||||
|
||||
/*
|
||||
* If waken up by an INIT in an 82489DX configuration
|
||||
* we may get here before an INIT-deassert IPI reaches
|
||||
* our local APIC. We have to wait for the IPI or we'll
|
||||
* lock up on an APIC access.
|
||||
*
|
||||
* Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
|
||||
* cpu_callout_mask guarantees we don't get here before
|
||||
* an INIT_deassert IPI reaches our local APIC, so it is
|
||||
* now safe to touch our local APIC.
|
||||
*/
|
||||
cpuid = smp_processor_id();
|
||||
if (apic->wait_for_init_deassert && cpuid)
|
||||
while (!atomic_read(&init_deasserted))
|
||||
cpu_relax();
|
||||
|
||||
/*
|
||||
* (This works even if the APIC is not enabled.)
|
||||
@@ -620,7 +613,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
||||
mb();
|
||||
atomic_set(&init_deasserted, 1);
|
||||
|
||||
/*
|
||||
* Should we send STARTUP IPIs ?
|
||||
@@ -665,7 +657,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(300);
|
||||
if (init_udelay)
|
||||
udelay(300);
|
||||
|
||||
pr_debug("Startup point 1\n");
|
||||
|
||||
@@ -675,7 +668,8 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(200);
|
||||
if (init_udelay)
|
||||
udelay(200);
|
||||
|
||||
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
||||
apic_write(APIC_ESR, 0);
|
||||
@@ -859,8 +853,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
* the targeted processor.
|
||||
*/
|
||||
|
||||
atomic_set(&init_deasserted, 0);
|
||||
|
||||
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
|
||||
|
||||
pr_debug("Setting warm reset code and vector.\n");
|
||||
@@ -898,7 +890,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
|
||||
if (!boot_error) {
|
||||
/*
|
||||
* Wait 10s total for a response from AP
|
||||
* Wait 10s total for first sign of life from AP
|
||||
*/
|
||||
boot_error = -1;
|
||||
timeout = jiffies + 10*HZ;
|
||||
@@ -911,7 +903,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
boot_error = 0;
|
||||
break;
|
||||
}
|
||||
udelay(100);
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
@@ -927,7 +918,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
* for the MTRR work(triggered by the AP coming online)
|
||||
* to be completed in the stop machine context.
|
||||
*/
|
||||
udelay(100);
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
@@ -1358,7 +1348,7 @@ static void remove_siblinginfo(int cpu)
|
||||
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
|
||||
}
|
||||
|
||||
static void __ref remove_cpu_from_maps(int cpu)
|
||||
static void remove_cpu_from_maps(int cpu)
|
||||
{
|
||||
set_cpu_online(cpu, false);
|
||||
cpumask_clear_cpu(cpu, cpu_callout_mask);
|
||||
|
@@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
||||
return addr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
/*
|
||||
* We'll assume that the code segments in the GDT
|
||||
* are all zero-based. That is largely true: the
|
||||
@@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
|
||||
}
|
||||
mutex_unlock(&child->mm->context.lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
@@ -57,7 +57,7 @@ __setup("cpu0_hotplug", enable_cpu0_hotplug);
|
||||
*
|
||||
* This is only called for debugging CPU offline/online feature.
|
||||
*/
|
||||
int __ref _debug_hotplug_cpu(int cpu, int action)
|
||||
int _debug_hotplug_cpu(int cpu, int action)
|
||||
{
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
int ret;
|
||||
@@ -104,7 +104,7 @@ static int __init debug_hotplug_cpu(void)
|
||||
late_initcall_sync(debug_hotplug_cpu);
|
||||
#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
|
||||
|
||||
int __ref arch_register_cpu(int num)
|
||||
int arch_register_cpu(int num)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(num);
|
||||
|
||||
|
@@ -12,10 +12,5 @@
|
||||
*/
|
||||
u64 notrace trace_clock_x86_tsc(void)
|
||||
{
|
||||
u64 ret;
|
||||
|
||||
rdtsc_barrier();
|
||||
rdtscll(ret);
|
||||
|
||||
return ret;
|
||||
return rdtsc_ordered();
|
||||
}
|
||||
|
@@ -62,6 +62,7 @@
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/trace/mpx.h>
|
||||
#include <asm/mpx.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/x86_init.h>
|
||||
@@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
preempt_count_dec();
|
||||
}
|
||||
|
||||
enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
/* Other than that, we're just an exception. */
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
} else {
|
||||
/*
|
||||
* We might have interrupted pretty much anything. In
|
||||
@@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
* but we need to notify RCU.
|
||||
*/
|
||||
rcu_nmi_enter();
|
||||
prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
|
||||
}
|
||||
|
||||
/*
|
||||
* We are atomic because we're on the IST stack (or we're on x86_32,
|
||||
* in which case we still shouldn't schedule).
|
||||
*
|
||||
* This must be after exception_enter(), because exception_enter()
|
||||
* won't do anything if in_interrupt() returns true.
|
||||
* We are atomic because we're on the IST stack; or we're on
|
||||
* x86_32, in which case we still shouldn't schedule; or we're
|
||||
* on x86_64 and entered from user mode, in which case we're
|
||||
* still atomic unless ist_begin_non_atomic is called.
|
||||
*/
|
||||
preempt_count_add(HARDIRQ_OFFSET);
|
||||
|
||||
/* This code is a bit fragile. Test it. */
|
||||
rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
|
||||
|
||||
return prev_state;
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
|
||||
}
|
||||
|
||||
void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
void ist_exit(struct pt_regs *regs)
|
||||
{
|
||||
/* Must be before exception_exit. */
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
|
||||
if (user_mode(regs))
|
||||
return exception_exit(prev_state);
|
||||
else
|
||||
if (!user_mode(regs))
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
@@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
* the non-atomic section, and callers must call is_end_non_atomic()
|
||||
* the non-atomic section, and callers must call ist_end_non_atomic()
|
||||
* before ist_exit().
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
@@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap);
|
||||
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
|
||||
unsigned long trapnr, int signr)
|
||||
{
|
||||
enum ctx_state prev_state = exception_enter();
|
||||
siginfo_t info;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
|
||||
NOTIFY_STOP) {
|
||||
conditional_sti(regs);
|
||||
do_trap(trapnr, signr, str, regs, error_code,
|
||||
fill_trap_info(regs, signr, trapnr, &info));
|
||||
}
|
||||
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
#define DO_ERROR(trapnr, signr, str, name) \
|
||||
@@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
#endif
|
||||
|
||||
ist_enter(regs); /* Discard prev_state because we won't return. */
|
||||
ist_enter(regs);
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
const struct bndcsr *bndcsr;
|
||||
siginfo_t *info;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
|
||||
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
|
||||
if (!user_mode(regs))
|
||||
@@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
die("bounds", regs, error_code);
|
||||
}
|
||||
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
|
||||
exit_trap:
|
||||
/*
|
||||
* This path out is for all the cases where we could not
|
||||
@@ -447,35 +435,33 @@ exit_trap:
|
||||
* time..
|
||||
*/
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_general_protection(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
conditional_sti(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
goto exit;
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die("general protection fault", regs, error_code);
|
||||
goto exit;
|
||||
return;
|
||||
}
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
|
||||
exit:
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
/* May run on IST stack. */
|
||||
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/*
|
||||
* ftrace must be first, everything else may cause a recursive crash.
|
||||
@@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
if (poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
@@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
@@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
|
||||
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
enum ctx_state prev_state;
|
||||
int user_icebp = 0;
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
prev_state = ist_enter(regs);
|
||||
ist_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
@@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
||||
@@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
@@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
dotraplinkage void
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
BUG_ON(use_eager_fpu());
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
@@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
exception_exit(prev_state);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
#endif
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_device_not_available);
|
||||
|
||||
@@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available);
|
||||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
siginfo_t info;
|
||||
enum ctx_state prev_state;
|
||||
|
||||
prev_state = exception_enter();
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
local_irq_enable();
|
||||
|
||||
info.si_signo = SIGILL;
|
||||
@@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
}
|
||||
exception_exit(prev_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
|
||||
|
||||
data = cyc2ns_write_begin(cpu);
|
||||
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
ns_now = cycles_2_ns(tsc_now);
|
||||
|
||||
/*
|
||||
@@ -290,12 +290,20 @@ u64 native_sched_clock(void)
|
||||
}
|
||||
|
||||
/* read the Time Stamp Counter: */
|
||||
rdtscll(tsc_now);
|
||||
tsc_now = rdtsc();
|
||||
|
||||
/* return the value in ns */
|
||||
return cycles_2_ns(tsc_now);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a sched_clock if you already have a TSC value.
|
||||
*/
|
||||
u64 native_sched_clock_from_tsc(u64 tsc)
|
||||
{
|
||||
return cycles_2_ns(tsc);
|
||||
}
|
||||
|
||||
/* We need to define a real function for sched_clock, to override the
|
||||
weak default version */
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
@@ -308,12 +316,6 @@ unsigned long long
|
||||
sched_clock(void) __attribute__((alias("native_sched_clock")));
|
||||
#endif
|
||||
|
||||
unsigned long long native_read_tsc(void)
|
||||
{
|
||||
return __native_read_tsc();
|
||||
}
|
||||
EXPORT_SYMBOL(native_read_tsc);
|
||||
|
||||
int check_tsc_unstable(void)
|
||||
{
|
||||
return tsc_unstable;
|
||||
@@ -976,7 +978,7 @@ static struct clocksource clocksource_tsc;
|
||||
*/
|
||||
static cycle_t read_tsc(struct clocksource *cs)
|
||||
{
|
||||
return (cycle_t)get_cycles();
|
||||
return (cycle_t)rdtsc_ordered();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -39,16 +39,15 @@ static cycles_t max_warp;
|
||||
static int nr_warps;
|
||||
|
||||
/*
|
||||
* TSC-warp measurement loop running on both CPUs:
|
||||
* TSC-warp measurement loop running on both CPUs. This is not called
|
||||
* if there is no TSC.
|
||||
*/
|
||||
static void check_tsc_warp(unsigned int timeout)
|
||||
{
|
||||
cycles_t start, now, prev, end;
|
||||
int i;
|
||||
|
||||
rdtsc_barrier();
|
||||
start = get_cycles();
|
||||
rdtsc_barrier();
|
||||
start = rdtsc_ordered();
|
||||
/*
|
||||
* The measurement runs for 'timeout' msecs:
|
||||
*/
|
||||
@@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout)
|
||||
*/
|
||||
arch_spin_lock(&sync_lock);
|
||||
prev = last_tsc;
|
||||
rdtsc_barrier();
|
||||
now = get_cycles();
|
||||
rdtsc_barrier();
|
||||
now = rdtsc_ordered();
|
||||
last_tsc = now;
|
||||
arch_spin_unlock(&sync_lock);
|
||||
|
||||
@@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu)
|
||||
|
||||
/*
|
||||
* No need to check if we already know that the TSC is not
|
||||
* synchronized:
|
||||
* synchronized or if we have no TSC.
|
||||
*/
|
||||
if (unsynchronized_tsc())
|
||||
return;
|
||||
@@ -190,6 +187,7 @@ void check_tsc_sync_target(void)
|
||||
{
|
||||
int cpus = 2;
|
||||
|
||||
/* Also aborts if there is no TSC. */
|
||||
if (unsynchronized_tsc() || tsc_clocksource_reliable)
|
||||
return;
|
||||
|
||||
|
@@ -985,3 +985,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (ctx == RP_CHECK_CALL) /* sp was just decremented by "call" insn */
|
||||
return regs->sp < ret->stack;
|
||||
else
|
||||
return regs->sp <= ret->stack;
|
||||
}
|
||||
|
@@ -44,11 +44,14 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
@@ -66,10 +69,6 @@
|
||||
*/
|
||||
|
||||
|
||||
#define KVM86 ((struct kernel_vm86_struct *)regs)
|
||||
#define VMPI KVM86->vm86plus
|
||||
|
||||
|
||||
/*
|
||||
* 8- and 16-bit register defines..
|
||||
*/
|
||||
@@ -81,8 +80,8 @@
|
||||
/*
|
||||
* virtual flags (16 and 32-bit versions)
|
||||
*/
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
|
||||
#define VEFLAGS (current->thread.v86flags)
|
||||
#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags))
|
||||
#define VEFLAGS (current->thread.vm86->veflags)
|
||||
|
||||
#define set_flags(X, new, mask) \
|
||||
((X) = ((X) & ~(mask)) | ((new) & (mask)))
|
||||
@@ -90,46 +89,13 @@
|
||||
#define SAFE_MASK (0xDD5)
|
||||
#define RETURN_MASK (0xDFF)
|
||||
|
||||
/* convert kernel_vm86_regs to vm86_regs */
|
||||
static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
|
||||
const struct kernel_vm86_regs *regs)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* kernel_vm86_regs is missing gs, so copy everything up to
|
||||
* (but not including) orig_eax, and then rest including orig_eax.
|
||||
*/
|
||||
ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* convert vm86_regs to kernel_vm86_regs */
|
||||
static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
|
||||
const struct vm86_regs __user *user,
|
||||
unsigned extra)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* copy ax-fs inclusive */
|
||||
ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
|
||||
/* copy orig_ax-__gsh+extra */
|
||||
ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax,
|
||||
sizeof(struct kernel_vm86_regs) -
|
||||
offsetof(struct kernel_vm86_regs, pt.orig_ax) +
|
||||
extra);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
struct pt_regs *ret;
|
||||
unsigned long tmp;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
long err = 0;
|
||||
|
||||
/*
|
||||
* This gets called from entry.S with interrupts disabled, but
|
||||
@@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
if (!current->thread.vm86_info) {
|
||||
pr_alert("no vm86_info: BAD\n");
|
||||
if (!vm86 || !vm86->user_vm86) {
|
||||
pr_alert("no user_vm86: BAD\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
|
||||
tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs);
|
||||
tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap);
|
||||
if (tmp) {
|
||||
pr_alert("could not access userspace vm86_info\n");
|
||||
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
|
||||
user = vm86->user_vm86;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
|
||||
sizeof(struct vm86plus_struct) :
|
||||
sizeof(struct vm86_struct))) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
put_user_try {
|
||||
put_user_ex(regs->pt.bx, &user->regs.ebx);
|
||||
put_user_ex(regs->pt.cx, &user->regs.ecx);
|
||||
put_user_ex(regs->pt.dx, &user->regs.edx);
|
||||
put_user_ex(regs->pt.si, &user->regs.esi);
|
||||
put_user_ex(regs->pt.di, &user->regs.edi);
|
||||
put_user_ex(regs->pt.bp, &user->regs.ebp);
|
||||
put_user_ex(regs->pt.ax, &user->regs.eax);
|
||||
put_user_ex(regs->pt.ip, &user->regs.eip);
|
||||
put_user_ex(regs->pt.cs, &user->regs.cs);
|
||||
put_user_ex(regs->pt.flags, &user->regs.eflags);
|
||||
put_user_ex(regs->pt.sp, &user->regs.esp);
|
||||
put_user_ex(regs->pt.ss, &user->regs.ss);
|
||||
put_user_ex(regs->es, &user->regs.es);
|
||||
put_user_ex(regs->ds, &user->regs.ds);
|
||||
put_user_ex(regs->fs, &user->regs.fs);
|
||||
put_user_ex(regs->gs, &user->regs.gs);
|
||||
|
||||
put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
|
||||
} put_user_catch(err);
|
||||
if (err) {
|
||||
pr_alert("could not access userspace vm86 info\n");
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
current->thread.sp0 = current->thread.saved_sp0;
|
||||
current->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, ¤t->thread);
|
||||
current->thread.saved_sp0 = 0;
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
|
||||
ret = KVM86->regs32;
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
ret->fs = current->thread.saved_fs;
|
||||
set_user_gs(ret, current->thread.saved_gs);
|
||||
lazy_load_gs(vm86->regs32.gs);
|
||||
|
||||
return ret;
|
||||
regs->pt.ax = retval;
|
||||
}
|
||||
|
||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
@@ -200,45 +192,16 @@ out:
|
||||
|
||||
|
||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86)
|
||||
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk = current;
|
||||
int tmp;
|
||||
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, vm86plus) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
|
||||
info.regs32 = current_pt_regs();
|
||||
tsk->thread.vm86_info = v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
|
||||
}
|
||||
|
||||
|
||||
SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
{
|
||||
struct kernel_vm86_struct info; /* declare this _on top_,
|
||||
* this avoids wasting of stack space.
|
||||
* This remains on the stack until we
|
||||
* return to 32 bit user space.
|
||||
*/
|
||||
struct task_struct *tsk;
|
||||
int tmp;
|
||||
struct vm86plus_struct __user *v86;
|
||||
|
||||
tsk = current;
|
||||
switch (cmd) {
|
||||
case VM86_REQUEST_IRQ:
|
||||
case VM86_FREE_IRQ:
|
||||
@@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
}
|
||||
|
||||
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
|
||||
if (tsk->thread.saved_sp0)
|
||||
return -EPERM;
|
||||
v86 = (struct vm86plus_struct __user *)arg;
|
||||
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
|
||||
offsetof(struct kernel_vm86_struct, regs32) -
|
||||
sizeof(info.regs));
|
||||
if (tmp)
|
||||
return -EFAULT;
|
||||
info.regs32 = current_pt_regs();
|
||||
info.vm86plus.is_vm86pus = 1;
|
||||
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
|
||||
do_sys_vm86(&info, tsk);
|
||||
return 0; /* we never return here */
|
||||
return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
|
||||
}
|
||||
|
||||
|
||||
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
/*
|
||||
* make sure the vm86() system call doesn't try to do anything silly
|
||||
*/
|
||||
info->regs.pt.ds = 0;
|
||||
info->regs.pt.es = 0;
|
||||
info->regs.pt.fs = 0;
|
||||
#ifndef CONFIG_X86_32_LAZY_GS
|
||||
info->regs.pt.gs = 0;
|
||||
#endif
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
unsigned long err = 0;
|
||||
|
||||
if (!vm86) {
|
||||
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
|
||||
return -ENOMEM;
|
||||
tsk->thread.vm86 = vm86;
|
||||
}
|
||||
if (vm86->saved_sp0)
|
||||
return -EPERM;
|
||||
|
||||
if (!access_ok(VERIFY_READ, user_vm86, plus ?
|
||||
sizeof(struct vm86_struct) :
|
||||
sizeof(struct vm86plus_struct)))
|
||||
return -EFAULT;
|
||||
|
||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||
get_user_try {
|
||||
unsigned short seg;
|
||||
get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
|
||||
get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
|
||||
get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
|
||||
get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
|
||||
get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
|
||||
get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
|
||||
get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
|
||||
get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
|
||||
get_user_ex(seg, &user_vm86->regs.cs);
|
||||
vm86regs.pt.cs = seg;
|
||||
get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
|
||||
get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
|
||||
get_user_ex(seg, &user_vm86->regs.ss);
|
||||
vm86regs.pt.ss = seg;
|
||||
get_user_ex(vm86regs.es, &user_vm86->regs.es);
|
||||
get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
|
||||
get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
|
||||
get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
|
||||
|
||||
get_user_ex(vm86->flags, &user_vm86->flags);
|
||||
get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
|
||||
get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
|
||||
} get_user_catch(err);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (copy_from_user(&vm86->int_revectored,
|
||||
&user_vm86->int_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (copy_from_user(&vm86->int21_revectored,
|
||||
&user_vm86->int21_revectored,
|
||||
sizeof(struct revectored_struct)))
|
||||
return -EFAULT;
|
||||
if (plus) {
|
||||
if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
|
||||
sizeof(struct vm86plus_info_struct)))
|
||||
return -EFAULT;
|
||||
vm86->vm86plus.is_vm86pus = 1;
|
||||
} else
|
||||
memset(&vm86->vm86plus, 0,
|
||||
sizeof(struct vm86plus_info_struct));
|
||||
|
||||
memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
|
||||
vm86->user_vm86 = user_vm86;
|
||||
|
||||
/*
|
||||
* The flags register is also special: we cannot trust that the user
|
||||
* has set it up safely, so this makes sure interrupt etc flags are
|
||||
* inherited from protected mode.
|
||||
*/
|
||||
VEFLAGS = info->regs.pt.flags;
|
||||
info->regs.pt.flags &= SAFE_MASK;
|
||||
info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
|
||||
info->regs.pt.flags |= X86_VM_MASK;
|
||||
VEFLAGS = vm86regs.pt.flags;
|
||||
vm86regs.pt.flags &= SAFE_MASK;
|
||||
vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
|
||||
vm86regs.pt.flags |= X86_VM_MASK;
|
||||
|
||||
switch (info->cpu_type) {
|
||||
vm86regs.pt.orig_ax = regs->orig_ax;
|
||||
|
||||
switch (vm86->cpu_type) {
|
||||
case CPU_286:
|
||||
tsk->thread.v86mask = 0;
|
||||
vm86->veflags_mask = 0;
|
||||
break;
|
||||
case CPU_386:
|
||||
tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
case CPU_486:
|
||||
tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
default:
|
||||
tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
|
||||
* Save old state
|
||||
*/
|
||||
info->regs32->ax = VM86_SIGNAL;
|
||||
tsk->thread.saved_sp0 = tsk->thread.sp0;
|
||||
tsk->thread.saved_fs = info->regs32->fs;
|
||||
tsk->thread.saved_gs = get_user_gs(info->regs32);
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
|
||||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
if (cpu_has_sep)
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
|
||||
tsk->thread.screen_bitmap = info->screen_bitmap;
|
||||
if (info->flags & VM86_SCREEN_BITMAP)
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
||||
/*call __audit_syscall_exit since we do not exit via the normal paths */
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (unlikely(current->audit_context))
|
||||
__audit_syscall_exit(1, 0);
|
||||
#endif
|
||||
|
||||
__asm__ __volatile__(
|
||||
"movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
"mov %2, %%gs\n\t"
|
||||
#endif
|
||||
"jmp resume_userspace"
|
||||
: /* no outputs */
|
||||
:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
|
||||
/* we never return here */
|
||||
}
|
||||
|
||||
static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
|
||||
{
|
||||
struct pt_regs *regs32;
|
||||
|
||||
regs32 = save_v86_state(regs16);
|
||||
regs32->ax = retval;
|
||||
__asm__ __volatile__("movl %0,%%esp\n\t"
|
||||
"movl %1,%%ebp\n\t"
|
||||
"jmp resume_userspace"
|
||||
: : "r" (regs32), "r" (current_thread_info()));
|
||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||
force_iret();
|
||||
return regs->ax;
|
||||
}
|
||||
|
||||
static inline void set_IF(struct kernel_vm86_regs *regs)
|
||||
{
|
||||
VEFLAGS |= X86_EFLAGS_VIF;
|
||||
if (VEFLAGS & X86_EFLAGS_VIP)
|
||||
return_to_32bit(regs, VM86_STI);
|
||||
}
|
||||
|
||||
static inline void clear_IF(struct kernel_vm86_regs *regs)
|
||||
@@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs)
|
||||
|
||||
static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VEFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
@@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs
|
||||
|
||||
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
|
||||
{
|
||||
set_flags(VFLAGS, flags, current->thread.v86mask);
|
||||
set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
|
||||
set_flags(regs->pt.flags, flags, SAFE_MASK);
|
||||
if (flags & X86_EFLAGS_IF)
|
||||
set_IF(regs);
|
||||
@@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
|
||||
if (VEFLAGS & X86_EFLAGS_VIF)
|
||||
flags |= X86_EFLAGS_IF;
|
||||
flags |= X86_EFLAGS_IOPL;
|
||||
return flags | (VEFLAGS & current->thread.v86mask);
|
||||
return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
|
||||
}
|
||||
|
||||
static inline int is_revectored(int nr, struct revectored_struct *bitmap)
|
||||
@@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
||||
{
|
||||
unsigned long __user *intr_ptr;
|
||||
unsigned long segoffs;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (regs->pt.cs == BIOSSEG)
|
||||
goto cannot_handle;
|
||||
if (is_revectored(i, &KVM86->int_revectored))
|
||||
if (is_revectored(i, &vm86->int_revectored))
|
||||
goto cannot_handle;
|
||||
if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
|
||||
if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
|
||||
goto cannot_handle;
|
||||
intr_ptr = (unsigned long __user *) (i << 2);
|
||||
if (get_user(segoffs, intr_ptr))
|
||||
@@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
|
||||
return;
|
||||
|
||||
cannot_handle:
|
||||
return_to_32bit(regs, VM86_INTx + (i << 8));
|
||||
save_v86_state(regs, VM86_INTx + (i << 8));
|
||||
}
|
||||
|
||||
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
|
||||
{
|
||||
if (VMPI.is_vm86pus) {
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
|
||||
if (vm86->vm86plus.is_vm86pus) {
|
||||
if ((trapno == 3) || (trapno == 1)) {
|
||||
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
|
||||
/* setting this flag forces the code in entry_32.S to
|
||||
the path where we call save_v86_state() and change
|
||||
the stack pointer to KVM86->regs32 */
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
save_v86_state(regs, VM86_TRAP + (trapno << 8));
|
||||
return 0;
|
||||
}
|
||||
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
|
||||
@@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
unsigned char __user *ssp;
|
||||
unsigned short ip, sp, orig_flags;
|
||||
int data32, pref_done;
|
||||
struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus;
|
||||
|
||||
#define CHECK_IF_IN_TRAP \
|
||||
if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
|
||||
if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
|
||||
newflags |= X86_EFLAGS_TF
|
||||
#define VM86_FAULT_RETURN do { \
|
||||
if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
|
||||
return_to_32bit(regs, VM86_PICRETURN); \
|
||||
if (orig_flags & X86_EFLAGS_TF) \
|
||||
handle_vm86_trap(regs, 0, 1); \
|
||||
return; } while (0)
|
||||
|
||||
orig_flags = *(unsigned short *)®s->pt.flags;
|
||||
|
||||
@@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
SP(regs) -= 2;
|
||||
}
|
||||
IP(regs) = ip;
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* popf */
|
||||
case 0x9d:
|
||||
@@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
else
|
||||
set_vflags_short(newflags, regs);
|
||||
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* int xx */
|
||||
case 0xcd: {
|
||||
int intno = popb(csp, ip, simulate_sigsegv);
|
||||
IP(regs) = ip;
|
||||
if (VMPI.vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
|
||||
return_to_32bit(regs, VM86_INTx + (intno << 8));
|
||||
if (vmpi->vm86dbg_active) {
|
||||
if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
|
||||
save_v86_state(regs, VM86_INTx + (intno << 8));
|
||||
return;
|
||||
}
|
||||
}
|
||||
do_int(regs, intno, ssp, sp);
|
||||
return;
|
||||
@@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
} else {
|
||||
set_vflags_short(newflags, regs);
|
||||
}
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
}
|
||||
|
||||
/* cli */
|
||||
case 0xfa:
|
||||
IP(regs) = ip;
|
||||
clear_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto vm86_fault_return;
|
||||
|
||||
/* sti */
|
||||
/*
|
||||
@@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
|
||||
case 0xfb:
|
||||
IP(regs) = ip;
|
||||
set_IF(regs);
|
||||
VM86_FAULT_RETURN;
|
||||
goto check_vip;
|
||||
|
||||
default:
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
check_vip:
|
||||
if (VEFLAGS & X86_EFLAGS_VIP) {
|
||||
save_v86_state(regs, VM86_STI);
|
||||
return;
|
||||
}
|
||||
|
||||
vm86_fault_return:
|
||||
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
|
||||
save_v86_state(regs, VM86_PICRETURN);
|
||||
return;
|
||||
}
|
||||
if (orig_flags & X86_EFLAGS_TF)
|
||||
handle_vm86_trap(regs, 0, X86_TRAP_DB);
|
||||
return;
|
||||
|
||||
simulate_sigsegv:
|
||||
/* FIXME: After a long discussion with Stas we finally
|
||||
* agreed, that this is wrong. Here we should
|
||||
@@ -720,7 +713,7 @@ simulate_sigsegv:
|
||||
* should be a mixture of the two, but how do we
|
||||
* get the information? [KD]
|
||||
*/
|
||||
return_to_32bit(regs, VM86_UNKNOWN);
|
||||
save_v86_state(regs, VM86_UNKNOWN);
|
||||
}
|
||||
|
||||
/* ---------------- vm86 special IRQ passing stuff ----------------- */
|
||||
|
在新工单中引用
屏蔽一个用户