Merge branch 'linus' into x86/urgent

Merge reason: we want to queue up a dependent fix.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-12-07 13:14:12 +01:00
1825 changed files with 54210 additions and 29806 deletions

View File

@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o

View File

@@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
struct cpuinfo_x86 *c = &cpu_data(pr->id);
pr->pdc = NULL;
if (c->x86_vendor == X86_VENDOR_INTEL)
if (c->x86_vendor == X86_VENDOR_INTEL ||
c->x86_vendor == X86_VENDOR_CENTAUR)
init_intel_pdc(pr, c);
return;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
* Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
@@ -25,10 +25,12 @@
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <asm/pci-direct.h>
#include <asm/amd_iommu_proto.h>
#include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h>
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
/*
* definitions for the ACPI scanning code
@@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
#ifdef CONFIG_IOMMU_STRESS
bool amd_iommu_isolate = false;
#else
bool amd_iommu_isolate = true; /* if true, device isolation is
enabled */
#endif
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
/* Array to assign indices to IOMMUs*/
struct amd_iommu *amd_iommus[MAX_IOMMUS];
int amd_iommus_present;
/* IOMMUs have a non-present cache? */
bool amd_iommu_np_cache __read_mostly;
/*
* List of protection domains - used during resume
*/
LIST_HEAD(amd_iommu_pd_list);
spinlock_t amd_iommu_pd_lock;
/*
* Pointer to the device table which is shared by all AMD IOMMUs
* it is indexed by the PCI device id or the HT unit id and contains
@@ -156,12 +164,6 @@ u16 *amd_iommu_alias_table;
*/
struct amd_iommu **amd_iommu_rlookup_table;
/*
* The pd table (protection domain table) is used to find the protection domain
* data structure a device belongs to. Indexed with the PCI device id too.
*/
struct protection_domain **amd_iommu_pd_table;
/*
* AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
* to know which ones are already in use.
@@ -838,7 +840,18 @@ static void __init free_iommu_all(void)
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
{
spin_lock_init(&iommu->lock);
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
iommu->index = amd_iommus_present++;
if (unlikely(iommu->index >= MAX_IOMMUS)) {
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
return -ENOSYS;
}
/* Index is fine - add IOMMU to the array */
amd_iommus[iommu->index] = iommu;
/*
* Copy data from ACPI table entry to the iommu struct
@@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
return pci_enable_device(iommu->dev);
}
@@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
*
****************************************************************************/
static int __init iommu_setup_msi(struct amd_iommu *iommu)
static int iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
@@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = {
* functions. Finally it prints some information about AMD IOMMUs and
* the driver state and enables the hardware.
*/
int __init amd_iommu_init(void)
static int __init amd_iommu_init(void)
{
int i, ret = 0;
if (no_iommu) {
printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
return 0;
}
if (!amd_iommu_detected)
return -ENODEV;
/*
* First parse ACPI tables to find the largest Bus/Dev/Func
* we need to handle. Upon this information the shared data
@@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void)
if (amd_iommu_rlookup_table == NULL)
goto free;
/*
* Protection Domain table - maps devices to protection domains
* This table has the same size as the rlookup_table
*/
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(rlookup_table_size));
if (amd_iommu_pd_table == NULL)
goto free;
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
GFP_KERNEL | __GFP_ZERO,
get_order(MAX_DOMAIN_ID/8));
@@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void)
*/
amd_iommu_pd_alloc_bitmap[0] = 1;
spin_lock_init(&amd_iommu_pd_lock);
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
@@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void)
if (iommu_pass_through)
goto out;
printk(KERN_INFO "AMD-Vi: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
else
printk("disabled\n");
if (amd_iommu_unmap_flush)
printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
else
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
x86_platform.iommu_shutdown = disable_iommus;
out:
return ret;
@@ -1304,9 +1299,6 @@ free:
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID/8));
free_pages((unsigned long)amd_iommu_pd_table,
get_order(rlookup_table_size));
free_pages((unsigned long)amd_iommu_rlookup_table,
get_order(rlookup_table_size));
@@ -1323,11 +1315,6 @@ free:
goto out;
}
void amd_iommu_shutdown(void)
{
disable_iommus();
}
/****************************************************************************
*
* Early detect code. This code runs at IOMMU detection time in the DMA
@@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
void __init amd_iommu_detect(void)
{
if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
return;
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
iommu_detected = 1;
amd_iommu_detected = 1;
#ifdef CONFIG_GART_IOMMU
gart_iommu_aperture_disabled = 1;
gart_iommu_aperture = 0;
#endif
x86_init.iommu.iommu_init = amd_iommu_init;
}
}
@@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str)
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = true;
if (strncmp(str, "share", 5) == 0)
amd_iommu_isolate = false;
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}

View File

@@ -28,6 +28,7 @@
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/k8.h>
#include <asm/x86_init.h>
int gart_iommu_aperture;
int gart_iommu_aperture_disabled __initdata;
@@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void)
iommu_detected = 1;
gart_iommu_aperture = 1;
x86_init.iommu.iommu_init = gart_iommu_init;
aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
@@ -456,7 +458,7 @@ out:
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
} else if (swiotlb && !valid_agp) {
} else if (!valid_agp) {
/* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||

View File

@@ -2,7 +2,7 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o

View File

@@ -241,28 +241,13 @@ static int modern_apic(void)
}
/*
* bare function to substitute write operation
* and it's _that_ fast :)
*/
static void native_apic_write_dummy(u32 reg, u32 v)
{
WARN_ON_ONCE((cpu_has_apic || !disable_apic));
}
static u32 native_apic_read_dummy(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
return 0;
}
/*
* right after this call apic->write/read doesn't do anything
* note that there is no restore operation it works one way
* right after this call apic become NOOP driven
* so apic->write/read doesn't do anything
*/
void apic_disable(void)
{
apic->read = native_apic_read_dummy;
apic->write = native_apic_write_dummy;
pr_info("APIC: switched to apic NOOP\n");
apic = &apic_noop;
}
void native_apic_wait_icr_idle(void)
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
apic_write(APIC_TMICT, 0xffffffff);
apic_write(APIC_TMICT, 0);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
int ret, x2apic_enabled = 0;
int dmar_table_init_ret = 0;
int dmar_table_init_ret;
#ifdef CONFIG_INTR_REMAP
dmar_table_init_ret = dmar_table_init();
if (dmar_table_init_ret)
pr_debug("dmar_table_init() failed with %d:\n",
dmar_table_init_ret);
#endif
if (dmar_table_init_ret && !x2apic_supported())
return;
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {

View File

@@ -0,0 +1,200 @@
/*
* NOOP APIC driver.
*
* Does almost nothing and should be substituted by a real apic driver via
* probe routine.
*
* Though in case if apic is disabled (for some reason) we try
* to not uglify the caller's code and allow to call (some) apic routines
* like self-ipi, etc...
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <linux/smp.h>
#include <asm/ipi.h>
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/e820.h>
static void noop_init_apic_ldr(void) { }
static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_allbutself(int vector) { }
static void noop_send_IPI_all(int vector) { }
static void noop_send_IPI_self(int vector) { }
static void noop_apic_wait_icr_idle(void) { }
static void noop_apic_icr_write(u32 low, u32 id) { }
static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
{
return -1;
}
static u32 noop_safe_apic_wait_icr_idle(void)
{
return 0;
}
static u64 noop_apic_icr_read(void)
{
return 0;
}
static int noop_cpu_to_logical_apicid(int cpu)
{
return 0;
}
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
{
return 0;
}
static unsigned int noop_get_apic_id(unsigned long x)
{
return 0;
}
static int noop_probe(void)
{
/*
* NOOP apic should not ever be
* enabled via probe routine
*/
return 0;
}
static int noop_apic_id_registered(void)
{
/*
* if we would be really "pedantic"
* we should pass read_apic_id() here
* but since NOOP suppose APIC ID = 0
* lets save a few cycles
*/
return physid_isset(0, phys_cpu_present_map);
}
static const struct cpumask *noop_target_cpus(void)
{
/* only BSP here */
return cpumask_of(0);
}
static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
{
return physid_isset(apicid, *map);
}
static unsigned long noop_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
}
static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
if (cpu != 0)
pr_warning("APIC: Vector allocated for non-BSP cpu\n");
cpumask_clear(retmask);
cpumask_set_cpu(cpu, retmask);
}
int noop_apicid_to_node(int logical_apicid)
{
/* we're always on node 0 */
return 0;
}
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
return 0;
}
static void noop_apic_write(u32 reg, u32 v)
{
WARN_ON_ONCE((cpu_has_apic || !disable_apic));
}
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
.acpi_madt_oem_check = NULL,
.apic_id_registered = noop_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
.target_cpus = noop_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = noop_check_apicid_used,
.check_apicid_present = noop_check_apicid_present,
.vector_allocation_domain = noop_vector_allocation_domain,
.init_apic_ldr = noop_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = noop_apicid_to_node,
.cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = noop_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = noop_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
.cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = noop_send_IPI_mask,
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
.send_IPI_allbutself = noop_send_IPI_allbutself,
.send_IPI_all = noop_send_IPI_all,
.send_IPI_self = noop_send_IPI_self,
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
/* should be safe */
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = noop_apic_read,
.write = noop_apic_write,
.icr_read = noop_apic_icr_read,
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
};

View File

@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void)
#endif
}
static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
{
return physid_mask_of_physid(phys_apicid);
}
/* Mapping from cpu number to logical apicid */
static inline int bigsmp_cpu_to_logical_apicid(int cpu)
{
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
return cpu_physical_id(cpu);
}
static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0xFFL);
physids_promote(0xFFL, retmap);
}
static int bigsmp_check_phys_apicid_present(int phys_apicid)
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = {
.apicid_to_node = bigsmp_apicid_to_node,
.cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
.enable_apic_mode = NULL,

View File

@@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void)
return cpumask_of(smp_processor_id());
}
static unsigned long
es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
static unsigned long es7000_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
@@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu)
static int cpu_id;
static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
{
physid_mask_t mask;
mask = physid_mask_of_physid(cpu_id);
physid_set_mask_of_physid(cpu_id, retmap);
++cpu_id;
return mask;
}
/* Mapping from cpu number to logical apicid */
@@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu)
#endif
}
static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0xff);
physids_promote(0xFFL, retmap);
}
static int es7000_check_phys_apicid_present(int cpu_physical_apicid)

View File

@@ -60,8 +60,6 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_irq.h>
#include <asm/apic.h>
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}
/*
* This is performance-critical, we want to do it O(1)
*
* Most irqs are mapped 1:1 with pins.
*/
struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
add_pin_to_irq_node(cfg, node, newapic, newpin);
}
static void __io_apic_modify_irq(struct irq_pin_list *entry,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
unsigned int reg, pin;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
reg &= mask_and;
reg |= mask_or;
io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
if (final)
final(entry);
}
static void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
int pin;
struct irq_pin_list *entry;
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
reg &= mask_and;
reg |= mask_or;
io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
if (final)
final(entry);
}
for_each_irq_pin(entry, cfg->irq_2_pin)
__io_apic_modify_irq(entry, mask_and, mask_or, final);
}
static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
{
__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
IO_APIC_REDIR_MASKED, NULL);
}
static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
{
__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
IO_APIC_REDIR_MASKED, NULL);
}
static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
struct irq_cfg *cfg = desc->chip_data;
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
int cpu, err;
cpumask_var_t tmp_mask;
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
if (cfg->move_in_progress)
return -EBUSY;
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1237,8 +1227,7 @@ next:
return err;
}
static int
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_desc *desc;
unsigned int irq;
if (apic_verbosity == APIC_QUIET)
return;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base)
{
int i;
if (apic_verbosity == APIC_QUIET)
return;
printk(KERN_DEBUG);
for (i = 0; i < 8; i++)
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
unsigned int i, v, ver, maxlvt;
u64 icr;
if (apic_verbosity == APIC_QUIET)
return;
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk("\n");
}
__apicdebuginit(void) print_all_local_APICs(void)
__apicdebuginit(void) print_local_APICs(int maxcpu)
{
int cpu;
if (!maxcpu)
return;
preempt_disable();
for_each_online_cpu(cpu)
for_each_online_cpu(cpu) {
if (cpu >= maxcpu)
break;
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
}
preempt_enable();
}
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
if (!nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
__apicdebuginit(int) print_all_ICs(void)
static int __initdata show_lapic = 1;
static __init int setup_show_lapic(char *arg)
{
int num = -1;
if (strcmp(arg, "all") == 0) {
show_lapic = CONFIG_NR_CPUS;
} else {
get_option(&arg, &num);
if (num >= 0)
show_lapic = num;
}
return 1;
}
__setup("show_lapic=", setup_show_lapic);
__apicdebuginit(int) print_ICs(void)
{
if (apic_verbosity == APIC_QUIET)
return 0;
print_PIC();
/* don't print out if apic is not there */
if (!cpu_has_apic && !apic_from_smp_config())
return 0;
print_all_local_APICs();
print_local_APICs(show_lapic);
print_IO_APIC();
return 0;
}
fs_initcall(print_all_ICs);
fs_initcall(print_ICs);
/* Where if anywhere is the i8259 connect in external int mode */
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
if (apic->check_apicid_used(phys_id_present_map,
if (apic->check_apicid_used(&phys_id_present_map,
mp_ioapics[apic_id].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic_id, mp_ioapics[apic_id].apicid);
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void)
mp_ioapics[apic_id].apicid = i;
} else {
physid_mask_t tmp;
tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
mp_ioapics[apic_id].apicid);
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
static void send_cleanup_vector(struct irq_cfg *cfg)
void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
cfg->move_cleanup_count = 0;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
cfg->move_cleanup_count++;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
@@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
}
}
static int
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
/*
* Either sets desc->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
* leaves desc->affinity untouched.
*/
static unsigned int
unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
@@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
cfg = irq_cfg(irq);
spin_lock(&desc->lock);
if (!cfg->move_cleanup_count)
goto unlock;
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
@@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
goto unlock;
}
__get_cpu_var(vector_irq)[vector] = -1;
cfg->move_cleanup_count--;
unlock:
spin_unlock(&desc->lock);
}
@@ -2460,21 +2456,33 @@ unlock:
irq_exit();
}
static void irq_complete_move(struct irq_desc **descp)
static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
{
struct irq_desc *desc = *descp;
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
unsigned me;
if (likely(!cfg->move_in_progress))
return;
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
}
static void irq_complete_move(struct irq_desc **descp)
{
__irq_complete_move(descp, ~get_irq_regs()->orig_ax);
}
void irq_force_complete_move(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg = desc->chip_data;
__irq_complete_move(&desc, cfg->vector);
}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
@@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq)
atomic_t irq_mis_count;
/*
* IO-APIC versions below 0x20 don't support EOI register.
* For the record, here is the information about various versions:
* 0Xh 82489DX
* 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
* 2Xh I/O(x)APIC which is PCI 2.2 Compliant
* 30h-FFh Reserved
*
* Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
* version as 0x2. This is an error with documentation and these ICH chips
* use io-apic's of version 0x20.
*
* For IO-APIC's with EOI register, we use that to do an explicit EOI.
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
for_each_irq_pin(entry, cfg->irq_2_pin) {
if (mp_ioapics[entry->apic].apicver >= 0x20) {
/*
* Intr-remapping uses pin number as the virtual vector
* in the RTE. Actual vector is programmed in
* intr-remapping table entry. Hence for the io-apic
* EOI we use the pin number.
*/
if (irq_remapped(irq))
io_apic_eoi(entry->apic, entry->pin);
else
io_apic_eoi(entry->apic, cfg->vector);
} else {
__mask_and_edge_IO_APIC_irq(entry);
__unmask_and_level_IO_APIC_irq(entry);
}
}
}
static void eoi_ioapic_irq(struct irq_desc *desc)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int irq;
irq = desc->irq;
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
__eoi_ioapic_irq(irq, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq)
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*
* Also in the case when cpu goes offline, fixup_irqs() will forward
* any unhandled interrupt on the offlined cpu to the new cpu
* destination that is handling the corresponding interrupt. This
* interrupt forwarding is done via IPI's. Hence, in this case also
* level-triggered io-apic interrupt will be seen as an edge
* interrupt in the IRR. And we can't rely on the cpu's EOI
* to be broadcasted to the IO-APIC's which will clear the remoteIRR
* corresponding to the level-triggered interrupt. Hence on IO-APIC's
* supporting EOI register, we do an explicit EOI to clear the
* remote IRR and on IO-APIC's which don't have an EOI register,
* we use the above logic (mask+edge followed by unmask+level) from
* Manfred Spraul to clear the remote IRR.
*/
cfg = desc->chip_data;
i = cfg->vector;
@@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
/*
* Tail end of clearing remote IRR bit (either by delivering the EOI
* message via io-apic EOI register write or simulating it using
* mask+edge followed by unnask+level logic) manually when the
* level triggered interrupt is seen as the edge triggered interrupt
* at the cpu.
*/
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
eoi_ioapic_irq(desc);
}
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq)
move_masked_irq(irq);
unmask_IO_APIC_irq_desc(desc);
}
/* Tail end of version 0x11 I/O APIC bug workaround */
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
__mask_and_edge_IO_APIC_irq(cfg);
__unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
}
#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
for_each_irq_pin(entry, cfg->irq_2_pin)
io_apic_eoi(entry->apic, entry->pin);
}
static void
eoi_ioapic_irq(struct irq_desc *desc)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int irq;
irq = desc->irq;
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
__eoi_ioapic_irq(irq, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void ir_ack_apic_edge(unsigned int irq)
{
ack_APIC_irq();
@@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
continue;
desc_new = move_irq_desc(desc_new, node);
cfg_new = desc_new->chip_data;
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
@@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
#ifdef CONFIG_X86_UV
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
unsigned long flags;
int err;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
spin_lock_irqsave(&vector_lock, flags);
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
spin_unlock_irqrestore(&vector_lock, flags);
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return irq;
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
#endif /* CONFIG_X86_64 */
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
*/
if (physids_empty(apic_id_map))
apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
@@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
* Every APIC in a system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
if (apic->check_apicid_used(apic_id_map, apic_id)) {
if (apic->check_apicid_used(&apic_id_map, apic_id)) {
for (i = 0; i < get_physical_broadcast(); i++) {
if (!apic->check_apicid_used(apic_id_map, i))
if (!apic->check_apicid_used(&apic_id_map, i))
break;
}
@@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
apic_id = i;
}
tmp = apic->apicid_to_cpu_present(apic_id);
apic->apicid_to_cpu_present(apic_id, &tmp);
physids_or(apic_id_map, apic_id_map, tmp);
if (reg_00.bits.ID != apic_id) {
@@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
sprintf(mem, "IOAPIC %u", i);
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
@@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void)
#ifdef CONFIG_X86_32
fake_ioapic_page:
#endif
ioapic_phys = (unsigned long)
alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
apic_printk(APIC_VERBOSE,
"mapped IOAPIC to %08lx (%08lx)\n",
__fix_to_virt(idx), ioapic_phys);
apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
ioapic_phys);
idx++;
ioapic_res->start = ioapic_phys;
ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
}

View File

@@ -39,7 +39,8 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
static cpumask_t backtrace_mask __read_mostly;
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
if (cpumask_test_cpu(cpu, &backtrace_mask)) {
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
show_regs(regs);
dump_stack();
spin_unlock(&lock);
cpumask_clear_cpu(cpu, &backtrace_mask);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
rc = 1;
}
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void)
{
int i;
cpumask_copy(&backtrace_mask, cpu_online_mask);
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpumask_empty(&backtrace_mask))
if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
}

View File

@@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void)
return cpu_all_mask;
}
static inline unsigned long
numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
{
return physid_isset(apicid, bitmap);
return physid_isset(apicid, *map);
}
static inline unsigned long numaq_check_apicid_present(int bit)
@@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq)
return apic != 0 && irq == 0;
}
static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* We don't have a good way to do this yet - hack */
return physids_promote(0xFUL);
return physids_promote(0xFUL, retmap);
}
static inline int numaq_cpu_to_logical_apicid(int cpu)
@@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
int cpu = __ffs(logical_apicid & 0xf);
return physid_mask_of_physid(cpu + 4*node);
physid_set_mask_of_physid(cpu + 4*node, retmap);
}
/* Where the IO area was mapped on multiquad, always 0 otherwise */

View File

@@ -108,7 +108,7 @@ struct apic apic_default = {
.apicid_to_node = default_apicid_to_node,
.cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = default_apicid_to_cpu_present,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,

View File

@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void)
return cpumask_of(0);
}
static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0x0F);
physids_promote(0x0FL, retmap);
}
static physid_mask_t summit_apicid_to_cpu_present(int apicid)
static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
{
return physid_mask_of_physid(0);
physid_set_mask_of_physid(0, retmap);
}
static int summit_check_phys_apicid_present(int physical_apicid)

View File

@@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode)
map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
}
static __init void map_low_mmrs(void)
{
init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
}
static __init void uv_rtc_init(void)
{
long status;
@@ -550,6 +556,8 @@ void __init uv_system_init(void)
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;

View File

@@ -204,7 +204,6 @@
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/smp_lock.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/timer.h>
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
static DEFINE_MUTEX(apm_mutex);
/*
* Set up a segment that references the real mode segment 0x40
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
return -EPERM;
switch (cmd) {
case APM_IOC_STANDBY:
lock_kernel();
mutex_lock(&apm_mutex);
if (as->standbys_read > 0) {
as->standbys_read--;
as->standbys_pending--;
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_STANDBY, as);
if (standbys_pending <= 0)
standby();
unlock_kernel();
mutex_unlock(&apm_mutex);
break;
case APM_IOC_SUSPEND:
lock_kernel();
mutex_lock(&apm_mutex);
if (as->suspends_read > 0) {
as->suspends_read--;
as->suspends_pending--;
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_SUSPEND, as);
if (suspends_pending <= 0) {
ret = suspend(1);
mutex_unlock(&apm_mutex);
} else {
as->suspend_wait = 1;
mutex_unlock(&apm_mutex);
wait_event_interruptible(apm_suspend_waitqueue,
as->suspend_wait == 0);
ret = as->suspend_result;
}
unlock_kernel();
return ret;
default:
return -ENOTTY;
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp)
{
struct apm_user *as;
lock_kernel();
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
sizeof(*as));
unlock_kernel();
return -ENOMEM;
}
as->magic = APM_BIOS_MAGIC;
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp)
user_list = as;
spin_unlock(&user_list_lock);
filp->private_data = as;
unlock_kernel();
return 0;
}

View File

@@ -5,6 +5,7 @@
# Don't trace early stages of a secondary CPU boot
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
endif
# Make sure load_percpu_segment has no stackprotector

View File

@@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
display_cacheinfo(c);
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {

View File

@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
display_cacheinfo(c);
cpu_detect_cache_sizes(c);
}
enum {

View File

@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
display_cacheinfo(c);
cpu_detect_cache_sizes(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
}
}
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
c->x86_cache_size = (ecx>>24) + (edx>>24);
#ifdef CONFIG_X86_64
/* On K8 L1 TLB is inclusive, so don't count it */
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
#endif
c->x86_cache_size = l2size;
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
l2size, ecx & 0xFF);
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -659,24 +654,31 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
#ifdef PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
#endif
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
const struct cpu_dev *cpudev = *cdev;
unsigned int j;
if (count >= X86_VENDOR_NUM)
break;
cpu_devs[count] = cpudev;
count++;
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
#ifdef PROCESSOR_SELECT
{
unsigned int j;
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
#endif
}
early_identify_cpu(&boot_cpu_data);
}
@@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
#endif
mcheck_cpu_init(c);
select_idle_routine(c);

View File

@@ -32,6 +32,6 @@ struct cpu_dev {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
extern void display_cacheinfo(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
#endif

View File

@@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
/* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
/* Intel Xeon Processor 7100 Series Specification Update
* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
* AL30: A Machine Check Exception (MCE) Occurring during an
* Enhanced Intel SpeedStep Technology Ratio Change May Cause
* Both Processor Cores to Lock Up when HT is enabled*/
* Both Processor Cores to Lock Up. */
if (c->x86_vendor == X86_VENDOR_INTEL) {
if ((c->x86 == 15) &&
(c->x86_model == 6) &&
(c->x86_mask == 8) && smt_capable())
(c->x86_mask == 8)) {
printk(KERN_INFO "acpi-cpufreq: Intel(R) "
"Xeon(R) 7100 Errata AL30, processors may "
"lock up on frequency changes: disabling "
"acpi-cpufreq.\n");
return -ENODEV;
}
}
return 0;
}
@@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int result = 0;
struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
struct acpi_processor_performance *perf;
#ifdef CONFIG_SMP
static int blacklisted;
#endif
dprintk("acpi_cpufreq_cpu_init\n");
#ifdef CONFIG_SMP
result = acpi_cpufreq_blacklist(c);
if (result)
return result;
if (blacklisted)
return blacklisted;
blacklisted = acpi_cpufreq_blacklist(c);
if (blacklisted)
return blacklisted;
#endif
data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);

View File

@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
break;
case 1 ... 15:
longhaul_version = TYPE_LONGHAUL_V1;
longhaul_version = TYPE_LONGHAUL_V2;
if (c->x86_mask < 8) {
cpu_model = CPU_SAMUEL2;
cpuname = "C3 'Samuel 2' [C5B]";

View File

@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data)
* set it to 1 to avoid problems in the future.
* For all others it's a BIOS bug.
*/
if (!boot_cpu_data.x86 == 0x11)
if (boot_cpu_data.x86 != 0x11)
printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
"latency\n");
max_latency = 1;

View File

@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void)
return 0;
}
struct get_freq_data {
unsigned int speed;
unsigned int processor;
};
static void get_freq_data(void *_data)
static void get_freq_data(void *_speed)
{
struct get_freq_data *data = _data;
unsigned int *speed = _speed;
data->speed = speedstep_get_frequency(data->processor);
*speed = speedstep_get_frequency(speedstep_processor);
}
static unsigned int speedstep_get(unsigned int cpu)
{
struct get_freq_data data = { .processor = cpu };
unsigned int speed;
/* You're supposed to ensure CPU is online. */
if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
BUG();
dprintk("detected %u kHz as current frequency\n", data.speed);
return data.speed;
dprintk("detected %u kHz as current frequency\n", speed);
return speed;
}
/**

View File

@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
/* Handle the GX (Formally known as the GX2) */
if (c->x86 == 5 && c->x86_model == 5)
display_cacheinfo(c);
cpu_detect_cache_sizes(c);
else
init_cyrix(c);
}

View File

@@ -491,22 +491,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
#endif
}
if (trace)
printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
else if (l1i)
printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
if (l1d)
printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
else
printk(KERN_CONT "\n");
if (l2)
printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
if (l3)
printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;

View File

@@ -46,6 +46,9 @@
#include "mce-internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
int mce_disabled __read_mostly;
#define MISC_MCELOG_MINOR 227
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
static void default_decode_mce(struct mce *m)
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
return NOTIFY_STOP;
}
/*
* CPU/chipset specific EDAC code can register a callback here to print
* MCE errors in a human-readable form:
*/
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
EXPORT_SYMBOL(x86_mce_decode_callback);
static struct notifier_block mce_dec_nb = {
.notifier_call = default_decode_mce,
.priority = -1,
};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;
/* Emit the trace record: */
trace_mce_record(mce);
mce->finished = 0;
wmb();
for (;;) {
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that):
* (if the CPU has an implementation for that)
*/
x86_mce_decode_callback(m);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
static int mce_banks_init(void)
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
static int __cpuinit mce_cap_init(void)
static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!mce_banks) {
int err = mce_banks_init();
int err = __mcheck_cpu_mce_banks_init();
if (err)
return err;
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
static void mce_init(void)
static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@@ -1273,7 +1287,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
static void mce_cpu_features(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
static void mce_init_timer(void)
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
mce_ancient_init(c);
__mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
mce_init();
mce_cpu_features(c);
mce_init_timer();
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
}
/*
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
mcheck_intel_therm_init();
return 0;
}
/*
* Sysfs support
*/
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
static int mce_disable(void)
static int mce_disable_error_reporting(void)
{
int i;
@@ -1663,12 +1687,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
return mce_disable();
return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
return mce_disable();
return mce_disable_error_reporting();
}
/*
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
mce_init();
mce_cpu_features(&current_cpu_data);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(&current_cpu_data);
return 0;
}
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(&current_cpu_data))
return;
mce_init();
mce_init_timer();
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
mce_init_timer();
__mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
static void mce_disable_cpu(void *h)
static void __cpuinit mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(&current_cpu_data))
return;
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
}
}
static void mce_reenable_cpu(void *h)
static void __cpuinit mce_reenable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@@ -2027,7 +2052,7 @@ static __init void mce_init_banks(void)
}
}
static __init int mce_init_device(void)
static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@@ -2055,7 +2080,7 @@ static __init int mce_init_device(void)
return err;
}
device_initcall(mce_init_device);
device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@@ -2103,7 +2128,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
static int __init mce_debugfs_init(void)
static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@@ -2117,5 +2142,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
late_initcall(mce_debugfs_init);
late_initcall(mcheck_debugfs_init);
#endif

View File

@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
static u32 lvtthmr_init __read_mostly;
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
}
void __init mcheck_intel_therm_init(void)
{
/*
* This function is only called on boot CPU. Save the init thermal
* LVT value on BSP and use that value to restore APs' thermal LVT
* entry BIOS programmed later
*/
if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
lvtthmr_init = apic_read(APIC_LVTTHMR);
}
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
* sequence to them and LVT registers are reset to 0s except for
* the mask bits which are set to 1s when APs receive INIT IPI.
* Always restore the value that BIOS has programmed on AP based on
* BSP's info we saved since BIOS is always setting the same value
* for all threads/cores
*/
apic_write(APIC_LVTTHMR, lvtthmr_init);
h = lvtthmr_init;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);

View File

@@ -77,6 +77,18 @@ struct cpu_hw_events {
struct debug_store *ds;
};
struct event_constraint {
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int code;
};
#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->idxmsk[0]; (e)++)
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -102,6 +114,8 @@ struct x86_pmu {
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
int (*get_event_idx)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
static const struct event_constraint *event_constraints;
/*
* Not sure about some of these
*/
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
return hw_event & P6_EVNTSEL_MASK;
}
static const struct event_constraint intel_p6_event_constraints[] =
{
EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
EVENT_CONSTRAINT_END
};
/*
* Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
static const struct event_constraint intel_core_event_constraints[] =
{
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
EVENT_CONSTRAINT_END
};
static const struct event_constraint intel_nehalem_event_constraints[] =
{
EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
EVENT_CONSTRAINT_END
};
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
static const u64 nehalem_hw_cache_event_ids
static __initconst u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids
},
};
static const u64 core2_hw_cache_event_ids
static __initconst u64 core2_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids
},
};
static const u64 atom_hw_cache_event_ids
static __initconst u64 atom_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
(CORE_EVNTSEL_EVENT_MASK | \
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
return hw_event & CORE_EVNTSEL_MASK;
}
static const u64 amd_hw_cache_event_ids
static __initconst u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
hwc->idx = -1;
/*
* Count user and OS events unless requested not to.
*/
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
x86_pmu_enable_event(hwc, idx);
}
static int
fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
static int fixed_mode_idx(struct hw_perf_event *hwc)
{
unsigned int hw_event;
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
if (!x86_pmu.num_events_fixed)
return -1;
/*
* fixed counters do not take all possible filters
*/
if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
return -1;
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
}
/*
* Find a PMC slot for the freshly enabled / scheduled in event:
* generic counter allocator: get next free counter
*/
static int x86_pmu_enable(struct perf_event *event)
static int
gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
idx = fixed_mode_idx(event, hwc);
idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
return idx == x86_pmu.num_events ? -1 : idx;
}
/*
* intel-specific counter allocator: check event constraints
*/
static int
intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
const struct event_constraint *event_constraint;
int i, code;
if (!event_constraints)
goto skip;
code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
for_each_event_constraint(event_constraint, event_constraints) {
if (code == event_constraint->code) {
for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
if (!test_and_set_bit(i, cpuc->used_mask))
return i;
}
return -1;
}
}
skip:
return gen_get_event_idx(cpuc, hwc);
}
static int
x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
int idx;
idx = fixed_mode_idx(hwc);
if (idx == X86_PMC_IDX_FIXED_BTS) {
/* BTS is already occupied. */
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
hwc->config_base = 0;
hwc->event_base = 0;
hwc->event_base = 0;
hwc->idx = idx;
} else if (idx >= 0) {
/*
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
} else {
idx = hwc->idx;
/* Try to get the previous generic event again */
if (test_and_set_bit(idx, cpuc->used_mask)) {
if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
idx = find_first_zero_bit(cpuc->used_mask,
x86_pmu.num_events);
if (idx == x86_pmu.num_events)
idx = x86_pmu.get_event_idx(cpuc, hwc);
if (idx == -1)
return -EAGAIN;
set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
hwc->config_base = x86_pmu.eventsel;
hwc->event_base = x86_pmu.perfctr;
hwc->config_base = x86_pmu.eventsel;
hwc->event_base = x86_pmu.perfctr;
}
return idx;
}
/*
* Find a PMC slot for the freshly enabled / scheduled in event:
*/
static int x86_pmu_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
idx = x86_schedule_event(cpuc, hwc);
if (idx < 0)
return idx;
perf_events_lapic_init();
x86_pmu.disable(hwc, idx);
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.priority = 1
};
static struct x86_pmu p6_pmu = {
static __initconst struct x86_pmu p6_pmu = {
.name = "p6",
.handle_irq = p6_pmu_handle_irq,
.disable_all = p6_pmu_disable_all,
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = {
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
.get_event_idx = intel_get_event_idx,
};
static struct x86_pmu intel_pmu = {
static __initconst struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.disable_all = intel_pmu_disable_all,
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
.get_event_idx = intel_get_event_idx,
};
static struct x86_pmu amd_pmu = {
static __initconst struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
.get_event_idx = gen_get_event_idx,
};
static int p6_pmu_init(void)
static __init int p6_pmu_init(void)
{
switch (boot_cpu_data.x86_model) {
case 1:
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
event_constraints = intel_p6_event_constraints;
break;
case 9:
case 13:
/* Pentium M */
event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void)
return 0;
}
static int intel_pmu_init(void)
static __init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void)
sizeof(hw_cache_event_ids));
pr_cont("Core2 events, ");
event_constraints = intel_core_event_constraints;
break;
default:
case 26:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void)
return 0;
}
static int amd_pmu_init(void)
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = {
.unthrottle = x86_pmu_unthrottle,
};
static int
validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct hw_perf_event fake_event = event->hw;
if (event->pmu && event->pmu != &pmu)
return 0;
return x86_schedule_event(cpuc, &fake_event) >= 0;
}
static int validate_group(struct perf_event *event)
{
struct perf_event *sibling, *leader = event->group_leader;
struct cpu_hw_events fake_pmu;
memset(&fake_pmu, 0, sizeof(fake_pmu));
if (!validate_event(&fake_pmu, leader))
return -ENOSPC;
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
if (!validate_event(&fake_pmu, sibling))
return -ENOSPC;
}
if (!validate_event(&fake_pmu, event))
return -ENOSPC;
return 0;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err;
err = __hw_perf_event_init(event);
if (!err) {
if (event->group_leader != event)
err = validate_group(event);
}
if (err) {
if (event->destroy)
event->destroy(event);

View File

@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
boot_cpu_data.x86 != 16)
boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
return;
wd_ops = &k7_wd_ops;
break;

View File

@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
early_init_transmeta(c);
display_cacheinfo(c);
cpu_detect_cache_sizes(c);
/* Print CMS and CPU revision */
max = cpuid_eax(0x80860000);

View File

@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file)
{
unsigned int cpu;
struct cpuinfo_x86 *c;
int ret = 0;
lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
ret = -ENXIO; /* No such CPU */
goto out;
}
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
c = &cpu_data(cpu);
if (c->cpuid_level < 0)
ret = -EIO; /* CPUID not supported */
out:
unlock_kernel();
return ret;
return -EIO; /* CPUID not supported */
return 0;
}
/*

View File

@@ -27,8 +27,7 @@
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
#include <asm/iommu.h>
#include <asm/x86_init.h>
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#endif
#ifdef CONFIG_X86_64
pci_iommu_shutdown();
x86_platform.iommu_shutdown();
#endif
crash_save_cpu(regs, safe_smp_processor_id());

View File

@@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
show_registers(regs);
#ifdef CONFIG_X86_32
sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
if (user_mode(regs)) {
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
} else {
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);

View File

@@ -10,9 +10,9 @@
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
stack = &dummy;
if (task && task != current)
stack = (unsigned long *)task->thread.sp;
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
bp = print_context_stack(context, stack, bp, ops,
data, NULL, &graph);
bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}

View File

@@ -10,26 +10,28 @@
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include "dumpstack.h"
#define N_EXCEPTION_STACKS_END \
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
static char x86_stack_ids[][8] = {
[DEBUG_STACK - 1] = "#DB",
[NMI_STACK - 1] = "NMI",
[DOUBLEFAULT_STACK - 1] = "#DF",
[STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC",
[ DEBUG_STACK-1 ] = "#DB",
[ NMI_STACK-1 ] = "NMI",
[ DOUBLEFAULT_STACK-1 ] = "#DF",
[ STACKFAULT_STACK-1 ] = "#SS",
[ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[N_EXCEPTION_STACKS ...
N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
[ N_EXCEPTION_STACKS ...
N_EXCEPTION_STACKS_END ] = "#DB[?]"
#endif
};
};
int x86_is_stack_id(int id, char *name)
{
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name)
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
unsigned *usedp, char **idp)
{
unsigned k;
@@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
unsigned long *stack;
int cpu;
int i;
const int cpu = smp_processor_id();
unsigned long *irq_stack_end =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
unsigned long *irq_stack =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
preempt_disable();
cpu = smp_processor_id();
irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
* back trace for this cpu.
* Debugging aid: "show_stack(NULL, NULL);" prints the
* back trace for this cpu:
*/
if (sp == NULL) {
if (task)
sp = (unsigned long *)task->thread.sp;
@@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
preempt_enable();
printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}

View File

@@ -333,6 +333,10 @@ ENTRY(ret_from_fork)
CFI_ENDPROC
END(ret_from_fork)
/*
* Interrupt exit functions should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
/*
* Return to user mode is not as complex as all this looks,
* but we want the default path for a system call return to
@@ -383,6 +387,10 @@ need_resched:
END(resume_kernel)
#endif
CFI_ENDPROC
/*
* End of kprobes section
*/
.popsection
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -513,6 +521,10 @@ sysexit_audit:
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
/*
* syscall stub including irq exit should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
@@ -705,6 +717,10 @@ syscall_badsys:
jmp resume_userspace
END(syscall_badsys)
CFI_ENDPROC
/*
* End of kprobes section
*/
.popsection
/*
* System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
/*
* Irq entries should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
/*
* End of kprobes section
*/
.popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
@@ -1185,17 +1209,14 @@ END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
pushl $0
pushl %eax
pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
movl %eax, %ecx
popl %edx
popl %ecx
popl %eax
ret
jmp *%ecx
#endif
.section .rodata,"a"

View File

@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
movq %rax, 16(%rsp)
movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $16, %rsp
retq
addq $24, %rsp
jmp *%rdi
#endif
@@ -803,6 +803,10 @@ END(interrupt)
call \func
.endm
/*
* Interrupt entry/exit should be protected against kprobes
*/
.pushsection .kprobes.text, "ax"
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)
/*
* End of kprobes section
*/
.popsection
/*
* APIC interrupts.
@@ -1491,12 +1499,17 @@ error_kernelspace:
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%ecx /* zero extend */
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%eax /* zero extend */
cmpq %rax,RIP+8(%rsp)
je bstep_iret
cmpq $gs_change,RIP+8(%rsp)
je error_swapgs
jmp error_sti
bstep_iret:
/* Fix truncated RIP */
movq %rcx,RIP+8(%rsp)
jmp error_swapgs
END(error_entry)

View File

@@ -9,6 +9,8 @@
* the dangers of modifying code on the run.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data)
switch (faulted) {
case 0:
pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("ftrace: converting mcount calls to jmp . + 5\n");
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_FTRACE_SYSCALLS
extern unsigned long __start_syscalls_metadata[];
extern unsigned long __stop_syscalls_metadata[];
extern unsigned long *sys_call_table;
static struct syscall_metadata **syscalls_metadata;
static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
unsigned long __init arch_syscall_addr(int nr)
{
struct syscall_metadata *start;
struct syscall_metadata *stop;
char str[KSYM_SYMBOL_LEN];
start = (struct syscall_metadata *)__start_syscalls_metadata;
stop = (struct syscall_metadata *)__stop_syscalls_metadata;
kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
for ( ; start < stop; start++) {
if (start->name && !strcmp(start->name, str))
return start;
}
return NULL;
return (unsigned long)(&sys_call_table)[nr];
}
struct syscall_metadata *syscall_nr_to_meta(int nr)
{
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
return NULL;
return syscalls_metadata[nr];
}
int syscall_name_to_nr(char *name)
{
int i;
if (!syscalls_metadata)
return -1;
for (i = 0; i < NR_syscalls; i++) {
if (syscalls_metadata[i]) {
if (!strcmp(syscalls_metadata[i]->name, name))
return i;
}
}
return -1;
}
void set_syscall_enter_id(int num, int id)
{
syscalls_metadata[num]->enter_id = id;
}
void set_syscall_exit_id(int num, int id)
{
syscalls_metadata[num]->exit_id = id;
}
static int __init arch_init_ftrace_syscalls(void)
{
int i;
struct syscall_metadata *meta;
unsigned long **psys_syscall_table = &sys_call_table;
syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
NR_syscalls, GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
return -ENOMEM;
}
for (i = 0; i < NR_syscalls; i++) {
meta = find_syscall_meta(psys_syscall_table[i]);
syscalls_metadata[i] = meta;
}
return 0;
}
arch_initcall(arch_init_ftrace_syscalls);
#endif

View File

@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
/* set up data segments */
xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es

View File

@@ -0,0 +1,555 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) 2007 Alan Stern
* Copyright (C) 2009 IBM Corporation
* Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
*
* Authors: Alan Stern <stern@rowland.harvard.edu>
* K.Prasad <prasad@linux.vnet.ibm.com>
* Frederic Weisbecker <fweisbec@gmail.com>
*/
/*
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
* using the CPU's debug registers.
*/
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/irqflags.h>
#include <linux/notifier.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/percpu.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
/* Per cpu debug control register value */
DEFINE_PER_CPU(unsigned long, cpu_dr7);
EXPORT_PER_CPU_SYMBOL(cpu_dr7);
/* Per cpu debug address registers values */
static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
/*
* Stores the breakpoints currently in use on each breakpoint address
* register for each cpus
*/
static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
static inline unsigned long
__encode_dr7(int drnum, unsigned int len, unsigned int type)
{
unsigned long bp_info;
bp_info = (len | type) & 0xf;
bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
return bp_info;
}
/*
* Encode the length, type, Exact, and Enable bits for a particular breakpoint
* as stored in debug register 7.
*/
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
{
return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
}
/*
* Decode the length and type bits for a particular breakpoint as
* stored in debug register 7. Return the "enabled" status.
*/
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
{
int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
*len = (bp_info & 0xc) | 0x40;
*type = (bp_info & 0x3) | 0x80;
return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
}
/*
* Install a perf counter breakpoint.
*
* We seek a free debug address register and use it for this
* breakpoint. Eventually we enable it in the debug control register.
*
* Atomic: we hold the counter->ctx->lock and we only handle variables
* and registers local to this cpu.
*/
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned long *dr7;
int i;
for (i = 0; i < HBP_NUM; i++) {
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
if (!*slot) {
*slot = bp;
break;
}
}
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
return -EBUSY;
set_debugreg(info->address, i);
__get_cpu_var(cpu_debugreg[i]) = info->address;
dr7 = &__get_cpu_var(cpu_dr7);
*dr7 |= encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
return 0;
}
/*
* Uninstall the breakpoint contained in the given counter.
*
* First we search the debug address register it uses and then we disable
* it.
*
* Atomic: we hold the counter->ctx->lock and we only handle variables
* and registers local to this cpu.
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned long *dr7;
int i;
for (i = 0; i < HBP_NUM; i++) {
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
if (*slot == bp) {
*slot = NULL;
break;
}
}
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
return;
dr7 = &__get_cpu_var(cpu_dr7);
*dr7 &= ~__encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
}
static int get_hbp_len(u8 hbp_len)
{
unsigned int len_in_bytes = 0;
switch (hbp_len) {
case X86_BREAKPOINT_LEN_1:
len_in_bytes = 1;
break;
case X86_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
case X86_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
#ifdef CONFIG_X86_64
case X86_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
#endif
}
return len_in_bytes;
}
/*
* Check for virtual address in user space.
*/
int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
{
unsigned int len;
len = get_hbp_len(hbp_len);
return (va <= TASK_SIZE - len);
}
/*
* Check for virtual address in kernel space.
*/
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
{
unsigned int len;
len = get_hbp_len(hbp_len);
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
}
/*
* Store a breakpoint's encoded address, length, and type.
*/
static int arch_store_info(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
/*
* For kernel-addresses, either the address or symbol name can be
* specified.
*/
if (info->name)
info->address = (unsigned long)
kallsyms_lookup_name(info->name);
if (info->address)
return 0;
return -EINVAL;
}
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
/* Len */
switch (x86_len) {
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case X86_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
case X86_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
#ifdef CONFIG_X86_64
case X86_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
#endif
default:
return -EINVAL;
}
/* Type */
switch (x86_type) {
case X86_BREAKPOINT_EXECUTE:
*gen_type = HW_BREAKPOINT_X;
break;
case X86_BREAKPOINT_WRITE:
*gen_type = HW_BREAKPOINT_W;
break;
case X86_BREAKPOINT_RW:
*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
break;
default:
return -EINVAL;
}
return 0;
}
static int arch_build_bp_info(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
info->address = bp->attr.bp_addr;
/* Len */
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
info->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
info->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
info->len = X86_BREAKPOINT_LEN_4;
break;
#ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
info->len = X86_BREAKPOINT_LEN_8;
break;
#endif
default:
return -EINVAL;
}
/* Type */
switch (bp->attr.bp_type) {
case HW_BREAKPOINT_W:
info->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
info->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
info->type = X86_BREAKPOINT_EXECUTE;
break;
default:
return -EINVAL;
}
return 0;
}
/*
* Validate the arch-specific HW Breakpoint register settings
*/
int arch_validate_hwbkpt_settings(struct perf_event *bp,
struct task_struct *tsk)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
ret = arch_build_bp_info(bp);
if (ret)
return ret;
ret = -EINVAL;
if (info->type == X86_BREAKPOINT_EXECUTE)
/*
* Ptrace-refactoring code
* For now, we'll allow instruction breakpoint only for user-space
* addresses
*/
if ((!arch_check_va_in_userspace(info->address, info->len)) &&
info->len != X86_BREAKPOINT_EXECUTE)
return ret;
switch (info->len) {
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
case X86_BREAKPOINT_LEN_2:
align = 1;
break;
case X86_BREAKPOINT_LEN_4:
align = 3;
break;
#ifdef CONFIG_X86_64
case X86_BREAKPOINT_LEN_8:
align = 7;
break;
#endif
default:
return ret;
}
if (bp->callback)
ret = arch_store_info(bp);
if (ret < 0)
return ret;
/*
* Check that the low-order bits of the address are appropriate
* for the alignment implied by len.
*/
if (info->address & align)
return -EINVAL;
/* Check that the virtual address is in the proper range */
if (tsk) {
if (!arch_check_va_in_userspace(info->address, info->len))
return -EFAULT;
} else {
if (!arch_check_va_in_kernelspace(info->address, info->len))
return -EFAULT;
}
return 0;
}
/*
* Dump the debug register contents to the user.
* We can't dump our per cpu values because it
* may contain cpu wide breakpoint, something that
* doesn't belong to the current task.
*
* TODO: include non-ptrace user breakpoints (perf)
*/
void aout_dump_debugregs(struct user *dump)
{
int i;
int dr7 = 0;
struct perf_event *bp;
struct arch_hw_breakpoint *info;
struct thread_struct *thread = &current->thread;
for (i = 0; i < HBP_NUM; i++) {
bp = thread->ptrace_bps[i];
if (bp && !bp->attr.disabled) {
dump->u_debugreg[i] = bp->attr.bp_addr;
info = counter_arch_bp(bp);
dr7 |= encode_dr7(i, info->len, info->type);
} else {
dump->u_debugreg[i] = 0;
}
}
dump->u_debugreg[4] = 0;
dump->u_debugreg[5] = 0;
dump->u_debugreg[6] = current->thread.debugreg6;
dump->u_debugreg[7] = dr7;
}
EXPORT_SYMBOL_GPL(aout_dump_debugregs);
/*
* Release the user breakpoints used by ptrace
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
int i;
struct thread_struct *t = &tsk->thread;
for (i = 0; i < HBP_NUM; i++) {
unregister_hw_breakpoint(t->ptrace_bps[i]);
t->ptrace_bps[i] = NULL;
}
}
void hw_breakpoint_restore(void)
{
set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
set_debugreg(current->thread.debugreg6, 6);
set_debugreg(__get_cpu_var(cpu_dr7), 7);
}
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
/*
* Handle debug exception notifications.
*
* Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
*
* NOTIFY_DONE returned if one of the following conditions is true.
* i) When the causative address is from user-space and the exception
* is a valid one, i.e. not triggered as a result of lazy debug register
* switching
* ii) When there are more bits than trap<n> set in DR6 register (such
* as BD, BS or BT) indicating that more than one debug condition is
* met and requires some more action in do_debug().
*
* NOTIFY_STOP returned for all other cases
*
*/
static int __kprobes hw_breakpoint_handler(struct die_args *args)
{
int i, cpu, rc = NOTIFY_STOP;
struct perf_event *bp;
unsigned long dr7, dr6;
unsigned long *dr6_p;
/* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p;
/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
get_debugreg(dr7, 7);
/* Disable breakpoints during exception handling */
set_debugreg(0UL, 7);
/*
* Assert that local interrupts are disabled
* Reset the DRn bits in the virtualized register value.
* The ptrace trigger routine will add in whatever is needed.
*/
current->thread.debugreg6 &= ~DR_TRAP_BITS;
cpu = get_cpu();
/* Handle all the breakpoints that were triggered */
for (i = 0; i < HBP_NUM; ++i) {
if (likely(!(dr6 & (DR_TRAP0 << i))))
continue;
/*
* The counter may be concurrently released but that can only
* occur from a call_rcu() path. We can then safely fetch
* the breakpoint, use its callback, touch its counter
* while we are in an rcu_read_lock() path.
*/
rcu_read_lock();
bp = per_cpu(bp_per_reg[i], cpu);
if (bp)
rc = NOTIFY_DONE;
/*
* Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling
*/
(*dr6_p) &= ~(DR_TRAP0 << i);
/*
* bp can be NULL due to lazy debug register switching
* or due to concurrent perf counter removing.
*/
if (!bp) {
rcu_read_unlock();
break;
}
(bp->callback)(bp, args->regs);
rcu_read_unlock();
}
if (dr6 & (~DR_TRAP_BITS))
rc = NOTIFY_DONE;
set_debugreg(dr7, 7);
put_cpu();
return rc;
}
/*
* Handle debug exception notifications.
*/
int __kprobes hw_breakpoint_exceptions_notify(
struct notifier_block *unused, unsigned long val, void *data)
{
if (val != DIE_DEBUG)
return NOTIFY_DONE;
return hw_breakpoint_handler(data);
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
{
/* TODO */
}

View File

@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
# ifdef CONFIG_X86_MCE_THRESHOLD
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_call_count;
sum += irq_stats(cpu)->irq_tlb_count;
#endif
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
# ifdef CONFIG_X86_MCE_THRESHOLD
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
# endif
#endif
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs)
}
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
{
unsigned int irq, vector;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
int break_affinity = 0;
int set_affinity = 1;
const struct cpumask *affinity;
if (!desc)
continue;
if (irq == 2)
continue;
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
affinity = desc->affinity;
if (!irq_has_action(irq) ||
cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);
continue;
}
/*
* Complete the irq move. This cpu is going down and for
* non intr-remapping case, we can't wait till this interrupt
* arrives at this cpu before completing the irq move.
*/
irq_force_complete_move(irq);
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
affinity = cpu_all_mask;
}
if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
desc->chip->mask(irq);
if (desc->chip->set_affinity)
desc->chip->set_affinity(irq, affinity);
else if (!(warned++))
set_affinity = 0;
if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
desc->chip->unmask(irq);
spin_unlock(&desc->lock);
if (break_affinity && set_affinity)
printk("Broke affinity for irq %i\n", irq);
else if (!set_affinity)
printk("Cannot set affinity for irq %i\n", irq);
}
/*
* We can remove mdelay() and then send spuriuous interrupts to
* new cpu targets for all the irqs that were handled previously by
* this cpu. While it works, I have seen spurious interrupt messages
* (nothing wrong but still...).
*
* So for now, retain mdelay(1) and check the IRR and then send those
* interrupts to new targets as this cpu is already offlined...
*/
mdelay(1);
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irr;
if (__get_cpu_var(vector_irq)[vector] < 0)
continue;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
if (irr & (1 << (vector % 32))) {
irq = __get_cpu_var(vector_irq)[vector];
desc = irq_to_desc(irq);
spin_lock(&desc->lock);
if (desc->chip->retrigger)
desc->chip->retrigger(irq);
spin_unlock(&desc->lock);
}
}
}
#endif

View File

@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
{
unsigned int irq;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
const struct cpumask *affinity;
if (!desc)
continue;
if (irq == 2)
continue;
affinity = desc->affinity;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
printk("Breaking affinity for irq %i\n", irq);
affinity = cpu_all_mask;
}
if (desc->chip->set_affinity)
desc->chip->set_affinity(irq, affinity);
else if (desc->action)
printk_once("Cannot set affinity for irq %i\n", irq);
}
#if 0
barrier();
/* Ingo Molnar says: "after the IO-APIC masks have been redirected
[note the nop - the interrupt-enable boundary on x86 is two
instructions from sti] - to flush out pending hardirqs and
IPIs. After this point nothing is supposed to reach this CPU." */
__asm__ __volatile__("sti; nop; cli");
barrier();
#else
/* That doesn't seem sufficient. Give it 1ms. */
local_irq_enable();
mdelay(1);
local_irq_disable();
#endif
}
#endif

View File

@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
int break_affinity = 0;
int set_affinity = 1;
const struct cpumask *affinity;
if (!desc)
continue;
if (irq == 2)
continue;
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
affinity = desc->affinity;
if (!irq_has_action(irq) ||
cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);
continue;
}
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
affinity = cpu_all_mask;
}
if (desc->chip->mask)
desc->chip->mask(irq);
if (desc->chip->set_affinity)
desc->chip->set_affinity(irq, affinity);
else if (!(warned++))
set_affinity = 0;
if (desc->chip->unmask)
desc->chip->unmask(irq);
spin_unlock(&desc->lock);
if (break_affinity && set_affinity)
printk("Broke affinity for irq %i\n", irq);
else if (!set_affinity)
printk("Cannot set affinity for irq %i\n", irq);
}
/* That doesn't seem sufficient. Give it 1ms. */
local_irq_enable();
mdelay(1);
local_irq_disable();
}
#endif
extern void call_softirq(void);

View File

@@ -43,6 +43,7 @@
#include <linux/smp.h>
#include <linux/nmi.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/system.h>
@@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
gdb_regs[GDB_SP] = (int)&regs->sp;
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
gdb_regs[GDB_SP] = regs->sp;
#endif
gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
}
/**
@@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}

View File

@@ -48,31 +48,22 @@
#include <linux/preempt.h>
#include <linux/module.h>
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
#ifdef CONFIG_X86_64
#define stack_addr(regs) ((unsigned long *)regs->sp)
#else
/*
* "&regs->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
* don't save the ss and esp registers if the CPU is already in kernel
* mode when it traps. So for kprobes, regs->sp and regs->ss are not
* the [nonexistent] saved stack pointer and ss register, but rather
* the top 8 bytes of the pre-int3 stack. So &regs->sp happens to
* point to the top of the pre-int3 stack.
*/
#define stack_addr(regs) ((unsigned long *)&regs->sp)
#endif
#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
static const u32 onebyte_has_modrm[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ----------------------------------------------- */
W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
static const u32 twobyte_has_modrm[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ----------------------------------------------- */
W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
#undef W
struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +191,75 @@ retry:
}
}
/* Recover the probed instruction at addr for further analysis. */
static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
kp = get_kprobe((void *)addr);
if (!kp)
return -EINVAL;
/*
* Basically, kp->ainsn.insn has an original instruction.
* However, RIP-relative instruction can not do single-stepping
* at different place, fix_riprel() tweaks the displacement of
* that instruction. In that case, we can't recover the instruction
* from the kp->ainsn.insn.
*
* On the other hand, kp->opcode has a copy of the first byte of
* the probed instruction, which is overwritten by int3. And
* the instruction at kp->addr is not modified by kprobes except
* for the first byte, we can recover the original instruction
* from it and kp->opcode.
*/
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
buf[0] = kp->opcode;
return 0;
}
/* Dummy buffers for kallsyms_lookup */
static char __dummy_buf[KSYM_NAME_LEN];
/* Check if paddr is at an instruction boundary */
static int __kprobes can_probe(unsigned long paddr)
{
int ret;
unsigned long addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
return 0;
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
kernel_insn_init(&insn, (void *)addr);
insn_get_opcode(&insn);
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
* original instruction in our buffer.
*/
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
ret = recover_probed_instruction(buf, addr);
if (ret)
/*
* Another debugging subsystem might insert
* this breakpoint. In that case, we can't
* recover it.
*/
return 0;
kernel_insn_init(&insn, buf);
}
insn_get_length(&insn);
addr += insn.length;
}
return (addr == paddr);
}
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
static void __kprobes fix_riprel(struct kprobe *p)
{
#ifdef CONFIG_X86_64
u8 *insn = p->ainsn.insn;
s64 disp;
int need_modrm;
struct insn insn;
kernel_insn_init(&insn, p->ainsn.insn);
/* Skip legacy instruction prefixes. */
while (1) {
switch (*insn) {
case 0x66:
case 0x67:
case 0x2e:
case 0x3e:
case 0x26:
case 0x64:
case 0x65:
case 0x36:
case 0xf0:
case 0xf3:
case 0xf2:
++insn;
continue;
}
break;
}
/* Skip REX instruction prefix. */
if (is_REX_prefix(insn))
++insn;
if (*insn == 0x0f) {
/* Two-byte opcode. */
++insn;
need_modrm = test_bit(*insn,
(unsigned long *)twobyte_has_modrm);
} else
/* One-byte opcode. */
need_modrm = test_bit(*insn,
(unsigned long *)onebyte_has_modrm);
if (need_modrm) {
u8 modrm = *++insn;
if ((modrm & 0xc7) == 0x05) {
/* %rip+disp32 addressing mode */
/* Displacement follows ModRM byte. */
++insn;
/*
* The copied instruction uses the %rip-relative
* addressing mode. Adjust the displacement for the
* difference between the original location of this
* instruction and the location of the copy that will
* actually be run. The tricky bit here is making sure
* that the sign extension happens correctly in this
* calculation, since we need a signed 32-bit result to
* be sign-extended to 64 bits when it's added to the
* %rip value and yield the same 64-bit result that the
* sign-extension of the original signed 32-bit
* displacement would have given.
*/
disp = (u8 *) p->addr + *((s32 *) insn) -
(u8 *) p->ainsn.insn;
BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
*(s32 *)insn = (s32) disp;
}
if (insn_rip_relative(&insn)) {
s64 newdisp;
u8 *disp;
insn_get_displacement(&insn);
/*
* The copied instruction uses the %rip-relative addressing
* mode. Adjust the displacement for the difference between
* the original location of this instruction and the location
* of the copy that will actually be run. The tricky bit here
* is making sure that the sign extension happens correctly in
* this calculation, since we need a signed 32-bit result to
* be sign-extended to 64 bits when it's added to the %rip
* value and yield the same 64-bit result that the sign-
* extension of the original signed 32-bit displacement would
* have given.
*/
newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
(u8 *) p->ainsn.insn;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
}
#endif
}
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
if (!can_probe((unsigned long)p->addr))
return -EILSEQ;
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
#ifdef CONFIG_X86_64
/* TODO: Provide re-entrancy from post_kprobes_handler() and
* avoid exception stack corruption while single-stepping on
* the instruction of the new probe.
*/
arch_disarm_kprobe(p);
regs->ip = (unsigned long)p->addr;
reset_current_kprobe();
preempt_enable_no_resched();
break;
#endif
case KPROBE_HIT_ACTIVE:
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_status = KPROBE_REENTER;
break;
case KPROBE_HIT_SS:
if (p == kprobe_running()) {
regs->flags &= ~X86_EFLAGS_TF;
regs->flags |= kcb->kprobe_saved_flags;
return 0;
} else {
/* A probe has been hit in the codepath leading up
* to, or just after, single-stepping of a probed
* instruction. This entire codepath should strictly
* reside in .kprobes.text section. Raise a warning
* to highlight this peculiar case.
*/
}
/* A probe has been hit in the codepath leading up to, or just
* after, single-stepping of a probed instruction. This entire
* codepath should strictly reside in .kprobes.text section.
* Raise a BUG or we'll continue in an endless reentering loop
* and eventually a stack overflow.
*/
printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
p->addr);
dump_kprobe(p);
BUG();
default:
/* impossible cases */
WARN_ON(1);
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs))
if (post_kprobe_handler(args->regs)) {
/*
* Reset the BS bit in dr6 (pointed by args->err) to
* denote completion of processing
*/
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
}
break;
case DIE_GPF:
/*

View File

@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC

View File

@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC

View File

@@ -73,7 +73,6 @@
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/smp_lock.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size)
static int microcode_open(struct inode *unused1, struct file *unused2)
{
cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

View File

@@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
struct cpuinfo_x86 *c = &cpu_data(cpu);
int ret = 0;
lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
ret = -ENXIO; /* No such CPU */
goto out;
}
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
c = &cpu_data(cpu);
if (!cpu_has(c, X86_FEATURE_MSR))
ret = -EIO; /* MSR not supported */
out:
unlock_kernel();
return ret;
return -EIO; /* MSR not supported */
return 0;
}
/*

View File

@@ -46,6 +46,7 @@
#include <asm/dma.h>
#include <asm/rio.h>
#include <asm/bios_ebda.h>
#include <asm/x86_init.h>
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
int use_calgary __read_mostly = 1;
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
return bad_dma_address;
return DMA_ERROR_CODE;
}
}
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
void *vaddr, unsigned int npages, int direction)
{
unsigned long entry;
dma_addr_t ret = bad_dma_address;
dma_addr_t ret;
entry = iommu_range_alloc(dev, tbl, npages);
if (unlikely(entry == bad_dma_address))
goto error;
if (unlikely(entry == DMA_ERROR_CODE)) {
printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
"iommu %p\n", npages, tbl);
return DMA_ERROR_CODE;
}
/* set the return dma address */
ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
/* put the TCEs in the HW table */
tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
direction);
return ret;
error:
printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
"iommu %p\n", npages, tbl);
return bad_dma_address;
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned long flags;
/* were we called with bad_dma_address? */
badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
return;
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
pdev = to_pci_dev(dev);
/* search up the device tree for an iommu */
pbus = pdev->bus;
/* is the device behind a bridge? Look for the root bus */
while (pbus->parent)
do {
tbl = pci_iommu(pbus);
if (tbl && tbl->it_busno == pbus->number)
break;
tbl = NULL;
pbus = pbus->parent;
tbl = pci_iommu(pbus);
} while (pbus);
BUG_ON(tbl && (tbl->it_busno != pbus->number));
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
entry = iommu_range_alloc(dev, tbl, npages);
if (entry == bad_dma_address) {
if (entry == DMA_ERROR_CODE) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
goto error;
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
error:
calgary_unmap_sg(dev, sg, nelems, dir, NULL);
for_each_sg(sg, s, nelems, i) {
sg->dma_address = bad_dma_address;
sg->dma_address = DMA_ERROR_CODE;
sg->dma_length = 0;
}
return 0;
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
/* set up tces to cover the allocated range */
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
if (mapping == bad_dma_address)
if (mapping == DMA_ERROR_CODE)
goto free;
*dma_handle = mapping;
return ret;
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
struct iommu_table *tbl = pci_iommu(dev->bus);
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
/* avoid the BIOS/VGA first 640KB-1MB region */
/* for CalIOC2 - avoid the entire first MB */
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void)
return;
}
static int __init calgary_iommu_init(void)
{
int ret;
/* ok, we're trying to use Calgary - let's roll */
printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
ret = calgary_init();
if (ret) {
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
"falling back to no_iommu\n", ret);
return ret;
}
return 0;
}
void __init detect_calgary(void)
{
int bus;
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void)
* if the user specified iommu=off or iommu=soft or we found
* another HW IOMMU already, bail out.
*/
if (swiotlb || no_iommu || iommu_detected)
if (no_iommu || iommu_detected)
return;
if (!use_calgary)
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void)
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
specified_table_size);
/* swiotlb for devices that aren't behind the Calgary. */
if (max_pfn > MAX_DMA32_PFN)
swiotlb = 1;
x86_init.iommu.iommu_init = calgary_iommu_init;
}
return;
@@ -1457,35 +1472,6 @@ cleanup:
}
}
int __init calgary_iommu_init(void)
{
int ret;
if (no_iommu || (swiotlb && !calgary_detected))
return -ENODEV;
if (!calgary_detected)
return -ENODEV;
/* ok, we're trying to use Calgary - let's roll */
printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
ret = calgary_init();
if (ret) {
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
"falling back to no_iommu\n", ret);
return ret;
}
force_iommu = 1;
bad_dma_address = 0x0;
/* dma_ops is set to swiotlb or nommu */
if (!dma_ops)
dma_ops = &nommu_dma_ops;
return 0;
}
static int __init calgary_parse_options(char *p)
{
unsigned int bridge;

View File

@@ -11,10 +11,11 @@
#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/amd_iommu.h>
#include <asm/x86_init.h>
static int forbid_dac __read_mostly;
struct dma_map_ops *dma_ops;
struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -42,15 +43,10 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
to older i386. */
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
.coherent_dma_mask = DMA_BIT_MASK(32),
.coherent_dma_mask = ISA_DMA_BIT_MASK,
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -128,20 +124,17 @@ void __init pci_iommu_alloc(void)
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
#endif
if (pci_swiotlb_init())
return;
/*
* The order of these functions is important for
* fall-back/fail-over reasons
*/
gart_iommu_hole_init();
detect_calgary();
detect_intel_iommu();
/* needs to be called after gart_iommu_hole_init */
amd_iommu_detect();
pci_swiotlb_init();
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
@@ -216,7 +209,7 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "allowdac", 8))
forbid_dac = 0;
if (!strncmp(p, "nodac", 5))
forbid_dac = -1;
forbid_dac = 1;
if (!strncmp(p, "usedac", 6)) {
forbid_dac = -1;
return 1;
@@ -291,25 +284,17 @@ static int __init pci_iommu_init(void)
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
#endif
x86_init.iommu.iommu_init();
calgary_iommu_init();
if (swiotlb) {
printk(KERN_INFO "PCI-DMA: "
"Using software bounce buffering for IO (SWIOTLB)\n");
swiotlb_print_info();
} else
swiotlb_free();
intel_iommu_init();
amd_iommu_init();
gart_iommu_init();
no_iommu_init();
return 0;
}
void pci_iommu_shutdown(void)
{
gart_iommu_shutdown();
amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
rootfs_initcall(pci_iommu_init);

View File

@@ -39,6 +39,7 @@
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/k8.h>
#include <asm/x86_init.h>
static unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */
static u32 *iommu_gatt_base; /* Remapping table */
static dma_addr_t bad_dma_addr;
/*
* If this is disabled the IOMMU will use an optimized flushing strategy
* of only flushing when an mapping is reused. With it true the GART is
@@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
PAGE_SIZE) >> PAGE_SHIFT;
boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
if (panic_on_overflow)
panic("dma_map_area overflow %lu bytes\n", size);
iommu_full(dev, size, dir);
return bad_dma_address;
return bad_dma_addr;
}
for (i = 0; i < npages; i++) {
@@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
int i;
#ifdef CONFIG_IOMMU_DEBUG
printk(KERN_DEBUG "dma_map_sg overflow\n");
pr_debug("dma_map_sg overflow\n");
#endif
for_each_sg(sg, s, nents, i) {
@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
if (nonforced_iommu(dev, addr, s->length)) {
addr = dma_map_area(dev, addr, s->length, dir, 0);
if (addr == bad_dma_address) {
if (addr == bad_dma_addr) {
if (i > 0)
gart_unmap_sg(dev, sg, i, dir, NULL);
nents = 0;
@@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
if (!dev)
dev = &x86_dma_fallback_dev;
out = 0;
start = 0;
start_sg = sgmap = sg;
seg_size = 0;
max_seg_size = dma_get_max_seg_size(dev);
ps = NULL; /* shut up gcc */
out = 0;
start = 0;
start_sg = sg;
sgmap = sg;
seg_size = 0;
max_seg_size = dma_get_max_seg_size(dev);
ps = NULL; /* shut up gcc */
for_each_sg(sg, s, nents, i) {
dma_addr_t addr = sg_phys(s);
@@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
sgmap, pages, need) < 0)
goto error;
out++;
seg_size = 0;
sgmap = sg_next(sgmap);
pages = 0;
start = i;
start_sg = s;
seg_size = 0;
sgmap = sg_next(sgmap);
pages = 0;
start = i;
start_sg = s;
}
}
@@ -455,7 +461,7 @@ error:
iommu_full(dev, pages << PAGE_SHIFT, dir);
for_each_sg(sg, s, nents, i)
s->dma_address = bad_dma_address;
s->dma_address = bad_dma_addr;
return 0;
}
@@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
DMA_BIDIRECTIONAL, align_mask);
flush_gart();
if (paddr != bad_dma_address) {
if (paddr != bad_dma_addr) {
*dma_addr = paddr;
return page_address(page);
}
@@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return (dma_addr == bad_dma_addr);
}
static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
pr_warning(
"PCI-DMA: Warning: Small IOMMU %luMB."
" Consider increasing the AGP aperture in BIOS\n",
iommu_size >> 20);
@@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
aperture_alloc = aper_alloc;
}
static void gart_fixup_northbridges(struct sys_device *dev)
{
int i;
if (!fix_up_north_bridges)
return;
pr_info("PCI-DMA: Restoring GART aperture settings\n");
for (i = 0; i < num_k8_northbridges; i++) {
struct pci_dev *dev = k8_northbridges[i];
/*
* Don't enable translations just yet. That is the next
* step. Restore the pre-suspend aperture settings.
*/
pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1);
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
}
}
static int gart_resume(struct sys_device *dev)
{
printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
pr_info("PCI-DMA: Resuming GART IOMMU\n");
if (fix_up_north_bridges) {
int i;
printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
for (i = 0; i < num_k8_northbridges; i++) {
struct pci_dev *dev = k8_northbridges[i];
/*
* Don't enable translations just yet. That is the next
* step. Restore the pre-suspend aperture settings.
*/
pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
aperture_order << 1);
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
aperture_alloc >> 25);
}
}
gart_fixup_northbridges(dev);
enable_gart_translations();
@@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state)
}
static struct sysdev_class gart_sysdev_class = {
.name = "gart",
.suspend = gart_suspend,
.resume = gart_resume,
.name = "gart",
.suspend = gart_suspend,
.resume = gart_resume,
};
static struct sys_device device_gart = {
.id = 0,
.cls = &gart_sysdev_class,
.cls = &gart_sysdev_class,
};
/*
@@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
void *gatt;
int i, error;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
pr_info("PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
dev = NULL;
for (i = 0; i < num_k8_northbridges; i++) {
@@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
}
if (!aper_base)
goto nommu;
info->aper_base = aper_base;
info->aper_size = aper_size >> 20;
@@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
flush_gart();
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
aper_base, aper_size>>10);
return 0;
nommu:
/* Should not happen anymore */
printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
"falling back to iommu=soft.\n");
return -1;
}
@@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = {
.unmap_page = gart_unmap_page,
.alloc_coherent = gart_alloc_coherent,
.free_coherent = gart_free_coherent,
.mapping_error = gart_mapping_error,
};
void gart_iommu_shutdown(void)
static void gart_iommu_shutdown(void)
{
struct pci_dev *dev;
int i;
if (no_agp && (dma_ops != &gart_dma_ops))
if (no_agp)
return;
for (i = 0; i < num_k8_northbridges; i++) {
@@ -708,7 +725,7 @@ void gart_iommu_shutdown(void)
}
}
void __init gart_iommu_init(void)
int __init gart_iommu_init(void)
{
struct agp_kern_info info;
unsigned long iommu_start;
@@ -718,7 +735,7 @@ void __init gart_iommu_init(void)
long i;
if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
return;
return 0;
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
@@ -730,35 +747,28 @@ void __init gart_iommu_init(void)
(agp_copy_info(agp_bridge, &info) < 0);
#endif
if (swiotlb)
return;
/* Did we detect a different HW IOMMU? */
if (iommu_detected && !gart_iommu_aperture)
return;
if (no_iommu ||
(!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
!gart_iommu_aperture ||
(no_agp && init_k8_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
printk(KERN_WARNING "More than 4GB of memory "
"but GART IOMMU not available.\n");
printk(KERN_WARNING "falling back to iommu=soft.\n");
pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
pr_warning("falling back to iommu=soft.\n");
}
return;
return 0;
}
/* need to map that range */
aper_size = info.aper_size << 20;
aper_base = info.aper_base;
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
aper_size = info.aper_size << 20;
aper_base = info.aper_base;
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
if (end_pfn > max_low_pfn_mapped) {
start_pfn = (aper_base>>PAGE_SHIFT);
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
}
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
pr_info("PCI-DMA: using GART IOMMU.\n");
iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT;
@@ -773,8 +783,7 @@ void __init gart_iommu_init(void)
ret = dma_debug_resize_entries(iommu_pages);
if (ret)
printk(KERN_DEBUG
"PCI-DMA: Cannot trace all the entries\n");
pr_debug("PCI-DMA: Cannot trace all the entries\n");
}
#endif
@@ -784,15 +793,14 @@ void __init gart_iommu_init(void)
*/
iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
agp_memory_reserved = iommu_size;
printk(KERN_INFO
"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
iommu_size >> 20);
iommu_start = aper_size - iommu_size;
iommu_bus_base = info.aper_base + iommu_start;
bad_dma_address = iommu_bus_base;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
agp_memory_reserved = iommu_size;
iommu_start = aper_size - iommu_size;
iommu_bus_base = info.aper_base + iommu_start;
bad_dma_addr = iommu_bus_base;
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
/*
* Unmap the IOMMU part of the GART. The alias of the page is
@@ -814,7 +822,7 @@ void __init gart_iommu_init(void)
* the pages as Not-Present:
*/
wbinvd();
/*
* Now all caches are flushed and we can safely enable
* GART hardware. Doing it early leaves the possibility
@@ -838,6 +846,10 @@ void __init gart_iommu_init(void)
flush_gart();
dma_ops = &gart_dma_ops;
x86_platform.iommu_shutdown = gart_iommu_shutdown;
swiotlb = 0;
return 0;
}
void __init gart_parse_options(char *p)
@@ -856,7 +868,7 @@ void __init gart_parse_options(char *p)
#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
if (!strncmp(p, "fullflush", 8))
if (!strncmp(p, "fullflush", 9))
iommu_fullflush = 1;
if (!strncmp(p, "nofullflush", 11))
iommu_fullflush = 0;

View File

@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return bad_dma_address;
return DMA_ERROR_CODE;
flush_write_buffers();
return bus;
}
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
};
void __init no_iommu_init(void)
{
if (dma_ops)
return;
force_iommu = 0; /* no HW IOMMU */
dma_ops = &nommu_dma_ops;
}

View File

@@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = {
.dma_supported = NULL,
};
void __init pci_swiotlb_init(void)
/*
* pci_swiotlb_init - initialize swiotlb if necessary
*
* This returns non-zero if we are forced to use swiotlb (by the boot
* option).
*/
int __init pci_swiotlb_init(void)
{
int use_swiotlb = swiotlb | swiotlb_force;
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
if (!no_iommu && max_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
if (swiotlb_force)
swiotlb = 1;
if (swiotlb) {
printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
swiotlb_init();
swiotlb_init(0);
dma_ops = &swiotlb_dma_ops;
}
return use_swiotlb;
}

View File

@@ -10,6 +10,7 @@
#include <linux/clockchips.h>
#include <linux/random.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -17,6 +18,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
}
#endif
clear_tsk_thread_flag(tsk, TIF_DEBUG);
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
tsk->thread.debugreg2 = 0;
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
set_debugreg(next->debugreg0, 0);
set_debugreg(next->debugreg1, 1);
set_debugreg(next->debugreg2, 2);
set_debugreg(next->debugreg3, 3);
/* no 4 and 5 */
set_debugreg(next->debugreg6, 6);
set_debugreg(next->debugreg7, 7);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */

View File

@@ -58,6 +58,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all)
ss = regs->ss & 0xffff;
gs = get_user_gs(regs);
} else {
sp = (unsigned long) (&regs->sp);
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
savesegment(gs, gs);
}
@@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
__show_regs(regs, 1);
show_registers(regs);
show_trace(NULL, regs, &regs->sp, regs->bp);
}
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
task_user_gs(p) = get_user_gs(regs);
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);

View File

@@ -52,6 +52,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
#include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
@@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
printk(KERN_INFO "CPU %d:", smp_processor_id());
__show_regs(regs, 1);
show_registers(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +345,7 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
return err;
}
@@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (preload_fpu)
__math_state_restore();
return prev_p;
}

View File

@@ -22,6 +22,8 @@
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
#include <asm/hw_breakpoint.h>
#include "tls.h"
@@ -49,6 +52,118 @@ enum x86_regset {
REGSET_IOPERM32,
};
struct pt_regs_offset {
const char *name;
int offset;
};
#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
#define REG_OFFSET_END {.name = NULL, .offset = 0}
static const struct pt_regs_offset regoffset_table[] = {
#ifdef CONFIG_X86_64
REG_OFFSET_NAME(r15),
REG_OFFSET_NAME(r14),
REG_OFFSET_NAME(r13),
REG_OFFSET_NAME(r12),
REG_OFFSET_NAME(r11),
REG_OFFSET_NAME(r10),
REG_OFFSET_NAME(r9),
REG_OFFSET_NAME(r8),
#endif
REG_OFFSET_NAME(bx),
REG_OFFSET_NAME(cx),
REG_OFFSET_NAME(dx),
REG_OFFSET_NAME(si),
REG_OFFSET_NAME(di),
REG_OFFSET_NAME(bp),
REG_OFFSET_NAME(ax),
#ifdef CONFIG_X86_32
REG_OFFSET_NAME(ds),
REG_OFFSET_NAME(es),
REG_OFFSET_NAME(fs),
REG_OFFSET_NAME(gs),
#endif
REG_OFFSET_NAME(orig_ax),
REG_OFFSET_NAME(ip),
REG_OFFSET_NAME(cs),
REG_OFFSET_NAME(flags),
REG_OFFSET_NAME(sp),
REG_OFFSET_NAME(ss),
REG_OFFSET_END,
};
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register
*
* regs_query_register_offset() returns the offset of a register in struct
* pt_regs from its name. If the name is invalid, this returns -EINVAL;
*/
int regs_query_register_offset(const char *name)
{
const struct pt_regs_offset *roff;
for (roff = regoffset_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return roff->offset;
return -EINVAL;
}
/**
* regs_query_register_name() - query register name from its offset
* @offset: the offset of a register in struct pt_regs.
*
* regs_query_register_name() returns the name of a register from its
* offset in struct pt_regs. If the @offset is invalid, this returns NULL;
*/
const char *regs_query_register_name(unsigned int offset)
{
const struct pt_regs_offset *roff;
for (roff = regoffset_table; roff->name != NULL; roff++)
if (roff->offset == offset)
return roff->name;
return NULL;
}
static const int arg_offs_table[] = {
#ifdef CONFIG_X86_32
[0] = offsetof(struct pt_regs, ax),
[1] = offsetof(struct pt_regs, dx),
[2] = offsetof(struct pt_regs, cx)
#else /* CONFIG_X86_64 */
[0] = offsetof(struct pt_regs, di),
[1] = offsetof(struct pt_regs, si),
[2] = offsetof(struct pt_regs, dx),
[3] = offsetof(struct pt_regs, cx),
[4] = offsetof(struct pt_regs, r8),
[5] = offsetof(struct pt_regs, r9)
#endif
};
/**
* regs_get_argument_nth() - get Nth argument at function call
* @regs: pt_regs which contains registers at function entry.
* @n: argument number.
*
* regs_get_argument_nth() returns @n th argument of a function call.
* Since usually the kernel stack will be changed right after function entry,
* you must use this at function entry. If the @n th entry is NOT in the
* kernel stack or pt_regs, this returns 0.
*/
unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
{
if (n < ARRAY_SIZE(arg_offs_table))
return *(unsigned long *)((char *)regs + arg_offs_table[n]);
else {
/*
* The typical case: arg n is on the stack.
* (Note: stack[0] = return address, so skip it)
*/
n -= ARRAY_SIZE(arg_offs_table);
return regs_get_kernel_stack_nth(regs, 1 + n);
}
}
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
static unsigned long debugreg_addr_limit(struct task_struct *task)
{
return TASK_SIZE - 3;
}
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
static unsigned long debugreg_addr_limit(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
@@ -454,96 +555,236 @@ static int genregs_set(struct task_struct *target,
return ret;
}
/*
* This function is trivial and will be inlined by the compiler.
* Having it separates the implementation details of debug
* registers from the interface details of ptrace.
*/
static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
static void ptrace_triggered(struct perf_event *bp, void *data)
{
switch (n) {
case 0: return child->thread.debugreg0;
case 1: return child->thread.debugreg1;
case 2: return child->thread.debugreg2;
case 3: return child->thread.debugreg3;
case 6: return child->thread.debugreg6;
case 7: return child->thread.debugreg7;
int i;
struct thread_struct *thread = &(current->thread);
/*
* Store in the virtual DR6 register the fact that the breakpoint
* was hit so the thread's debugger will see it.
*/
for (i = 0; i < HBP_NUM; i++) {
if (thread->ptrace_bps[i] == bp)
break;
}
thread->debugreg6 |= (DR_TRAP0 << i);
}
/*
* Walk through every ptrace breakpoints for this thread and
* build the dr7 value on top of their attributes.
*
*/
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
int i;
int dr7 = 0;
struct arch_hw_breakpoint *info;
for (i = 0; i < HBP_NUM; i++) {
if (bp[i] && !bp[i]->attr.disabled) {
info = counter_arch_bp(bp[i]);
dr7 |= encode_dr7(i, info->len, info->type);
}
}
return dr7;
}
static struct perf_event *
ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
struct task_struct *tsk, int disabled)
{
int err;
int gen_len, gen_type;
DEFINE_BREAKPOINT_ATTR(attr);
/*
* We shoud have at least an inactive breakpoint at this
* slot. It means the user is writing dr7 without having
* written the address register first
*/
if (!bp)
return ERR_PTR(-EINVAL);
err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
if (err)
return ERR_PTR(err);
attr = bp->attr;
attr.bp_len = gen_len;
attr.bp_type = gen_type;
attr.disabled = disabled;
return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
}
/*
* Handle ptrace writes to debug register 7.
*/
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
struct thread_struct *thread = &(tsk->thread);
unsigned long old_dr7;
int i, orig_ret = 0, rc = 0;
int enabled, second_pass = 0;
unsigned len, type;
struct perf_event *bp;
data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
restore:
/*
* Loop through all the hardware breakpoints, making the
* appropriate changes to each.
*/
for (i = 0; i < HBP_NUM; i++) {
enabled = decode_dr7(data, i, &len, &type);
bp = thread->ptrace_bps[i];
if (!enabled) {
if (bp) {
/*
* Don't unregister the breakpoints right-away,
* unless all register_user_hw_breakpoint()
* requests have succeeded. This prevents
* any window of opportunity for debug
* register grabbing by other users.
*/
if (!second_pass)
continue;
thread->ptrace_bps[i] = NULL;
bp = ptrace_modify_breakpoint(bp, len, type,
tsk, 1);
if (IS_ERR(bp)) {
rc = PTR_ERR(bp);
thread->ptrace_bps[i] = NULL;
break;
}
thread->ptrace_bps[i] = bp;
}
continue;
}
bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
/* Incorrect bp, or we have a bug in bp API */
if (IS_ERR(bp)) {
rc = PTR_ERR(bp);
thread->ptrace_bps[i] = NULL;
break;
}
thread->ptrace_bps[i] = bp;
}
/*
* Make a second pass to free the remaining unused breakpoints
* or to restore the original breakpoints if an error occurred.
*/
if (!second_pass) {
second_pass = 1;
if (rc < 0) {
orig_ret = rc;
data = old_dr7;
}
goto restore;
}
return ((orig_ret < 0) ? orig_ret : rc);
}
/*
* Handle PTRACE_PEEKUSR calls for the debug register area.
*/
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
{
struct thread_struct *thread = &(tsk->thread);
unsigned long val = 0;
if (n < HBP_NUM) {
struct perf_event *bp;
bp = thread->ptrace_bps[n];
if (!bp)
return 0;
val = bp->hw.info.address;
} else if (n == 6) {
val = thread->debugreg6;
} else if (n == 7) {
val = ptrace_get_dr7(thread->ptrace_bps);
}
return val;
}
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
DEFINE_BREAKPOINT_ATTR(attr);
if (!t->ptrace_bps[nr]) {
/*
* Put stub len and type to register (reserve) an inactive but
* correct bp
*/
attr.bp_addr = addr;
attr.bp_len = HW_BREAKPOINT_LEN_1;
attr.bp_type = HW_BREAKPOINT_W;
attr.disabled = 1;
bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
} else {
bp = t->ptrace_bps[nr];
t->ptrace_bps[nr] = NULL;
attr = bp->attr;
attr.bp_addr = addr;
bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
}
/*
* CHECKME: the previous code returned -EIO if the addr wasn't a
* valid task virtual addr. The new one will return -EINVAL in this
* case.
* -EINVAL may be what we want for in-kernel breakpoints users, but
* -EIO looks better for ptrace, since we refuse a register writing
* for the user. And anyway this is the previous behaviour.
*/
if (IS_ERR(bp))
return PTR_ERR(bp);
t->ptrace_bps[nr] = bp;
return 0;
}
static int ptrace_set_debugreg(struct task_struct *child,
int n, unsigned long data)
/*
* Handle PTRACE_POKEUSR calls for the debug register area.
*/
int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
{
int i;
struct thread_struct *thread = &(tsk->thread);
int rc = 0;
if (unlikely(n == 4 || n == 5))
/* There are no DR4 or DR5 registers */
if (n == 4 || n == 5)
return -EIO;
if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
return -EIO;
switch (n) {
case 0: child->thread.debugreg0 = data; break;
case 1: child->thread.debugreg1 = data; break;
case 2: child->thread.debugreg2 = data; break;
case 3: child->thread.debugreg3 = data; break;
case 6:
if ((data & ~0xffffffffUL) != 0)
return -EIO;
child->thread.debugreg6 = data;
break;
case 7:
/*
* Sanity-check data. Take one half-byte at once with
* check = (val >> (16 + 4*i)) & 0xf. It contains the
* R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
* 2 and 3 are LENi. Given a list of invalid values,
* we do mask |= 1 << invalid_value, so that
* (mask >> check) & 1 is a correct test for invalid
* values.
*
* R/Wi contains the type of the breakpoint /
* watchpoint, LENi contains the length of the watched
* data in the watchpoint case.
*
* The invalid values are:
* - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
* - R/Wi == 0x10 (break on I/O reads or writes), so
* mask |= 0x4444.
* - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
* 0x1110.
*
* Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
*
* See the Intel Manual "System Programming Guide",
* 15.2.4
*
* Note that LENi == 0x10 is defined on x86_64 in long
* mode (i.e. even for 32-bit userspace software, but
* 64-bit kernel), so the x86_64 mask value is 0x5454.
* See the AMD manual no. 24593 (AMD64 System Programming)
*/
#ifdef CONFIG_X86_32
#define DR7_MASK 0x5f54
#else
#define DR7_MASK 0x5554
#endif
data &= ~DR_CONTROL_RESERVED;
for (i = 0; i < 4; i++)
if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
return -EIO;
child->thread.debugreg7 = data;
if (data)
set_tsk_thread_flag(child, TIF_DEBUG);
else
clear_tsk_thread_flag(child, TIF_DEBUG);
break;
if (n == 6) {
thread->debugreg6 = val;
goto ret_path;
}
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
if (rc)
return rc;
}
/* All that's left is DR7 */
if (n == 7)
rc = ptrace_write_dr7(tsk, val);
return 0;
ret_path:
return rc;
}
/*

View File

@@ -23,7 +23,7 @@
# include <linux/ctype.h>
# include <linux/mc146818rtc.h>
#else
# include <asm/iommu.h>
# include <asm/x86_init.h>
#endif
/*
@@ -630,7 +630,7 @@ void native_machine_shutdown(void)
#endif
#ifdef CONFIG_X86_64
pci_iommu_shutdown();
x86_platform.iommu_shutdown();
#endif
}

View File

@@ -109,6 +109,7 @@
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd);
* from boot_params into a safe place.
*
*/
static inline void copy_edd(void)
static inline void __init copy_edd(void)
{
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
sizeof(edd.mbr_signature));
@@ -256,7 +257,7 @@ static inline void copy_edd(void)
edd.edd_info_nr = boot_params.eddbuf_entries;
}
#else
static inline void copy_edd(void)
static inline void __init copy_edd(void)
{
}
#endif
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
x86_init.oem.banner();
mcheck_init();
}
#ifdef CONFIG_X86_32

View File

@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/*
* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*

View File

@@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu)
void cpu_disable_common(void)
{
int cpu = smp_processor_id();
/*
* HACK:
* Allow any queued timer interrupts to get serviced
* This is only a temporary solution until we cleanup
* fixup_irqs as we do for IA64.
*/
local_irq_enable();
mdelay(1);
local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */

View File

@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
unsigned long condition;
unsigned long dr6;
int si_code;
get_debugreg(condition, 6);
get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
if (condition & DR_STEP && kmemcheck_trap(regs))
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
/* DR6 may or may not be cleared by the CPU */
set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg7)
goto clear_dr7;
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
return;
}
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK)
goto debug_vm86;
#endif
/* Save debug status register where ptrace can see it */
tsk->thread.debugreg6 = condition;
/*
* Single-stepping through TF: make sure we ignore any events in
* kernel space (but re-enable TF when returning to user mode).
* Single-stepping through system calls: ignore any exceptions in
* kernel space, but re-enable TF when returning to user mode.
*
* We already checked v86 mode above, so we can check for kernel mode
* by just checking the CPL of CS.
*/
if (condition & DR_STEP) {
if (!user_mode(regs))
goto clear_TF_reenable;
if ((dr6 & DR_STEP) && !user_mode(regs)) {
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(condition);
/* Ok, finally something we can handle */
send_sigtrap(tsk, regs, error_code, si_code);
/*
* Disable additional traps. They'll be re-enabled when
* the signal is delivered.
*/
clear_dr7:
set_debugreg(0, 7);
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
return;
#ifdef CONFIG_X86_32
debug_vm86:
/* reenable preemption: handle_vm86_trap() might sleep */
dec_preempt_count();
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
conditional_cli(regs);
return;
#endif
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
preempt_conditional_cli(regs);
return;
}

View File

@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
return;
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
pr_info(
"Skipped synchronization checks as TSC is reliable.\n");
return;
}
pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
smp_processor_id(), cpu);
/*
* Reset it - in case this is a second bootup:
*/
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu)
cpu_relax();
if (nr_warps) {
printk("\n");
pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
smp_processor_id(), cpu);
pr_warning("Measured %Ld cycles TSC warp between CPUs, "
"turning off TSC clock.\n", max_warp);
mark_tsc_unstable("check_tsc_sync_source failed");
} else {
printk(" passed.\n");
pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
smp_processor_id(), cpu);
}
/*

View File

@@ -9,10 +9,25 @@
*/
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/irq.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
#include <asm/uv/uv_hub.h>
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
struct uv_irq_2_mmr_pnode{
struct rb_node list;
unsigned long offset;
int pnode;
int irq;
};
static spinlock_t uv_irq_lock;
static struct rb_root uv_irq_root;
static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
static void uv_noop(unsigned int irq)
{
@@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = {
.unmask = uv_noop,
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
};
/*
* Add offset and pnode information of the hub sourcing interrupts to the
* rb tree for a specific irq.
*/
static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
{
struct rb_node **link = &uv_irq_root.rb_node;
struct rb_node *parent = NULL;
struct uv_irq_2_mmr_pnode *n;
struct uv_irq_2_mmr_pnode *e;
unsigned long irqflags;
n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
uv_blade_to_memory_nid(blade));
if (!n)
return -ENOMEM;
n->irq = irq;
n->offset = offset;
n->pnode = uv_blade_to_pnode(blade);
spin_lock_irqsave(&uv_irq_lock, irqflags);
/* Find the right place in the rbtree: */
while (*link) {
parent = *link;
e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
if (unlikely(irq == e->irq)) {
/* irq entry exists */
e->pnode = uv_blade_to_pnode(blade);
e->offset = offset;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
kfree(n);
return 0;
}
if (irq < e->irq)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
/* Insert the node into the rbtree. */
rb_link_node(&n->list, parent, link);
rb_insert_color(&n->list, &uv_irq_root);
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
}
/* Retrieve offset and pnode information from the rb tree for a specific irq */
int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
{
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
*offset = e->offset;
*pnode = e->pnode;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return -1;
}
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
static int
arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset, int restrict)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int err;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
if (restrict == UV_AFFINITY_CPU)
desc->status |= IRQ_NO_BALANCING;
else
desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return irq;
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg = desc->chip_data;
unsigned int dest;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
unsigned long mmr_offset;
unsigned mmr_pnode;
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
return -1;
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = dest;
/* Get previously stored MMR and pnode of hub sourcing interrupts */
if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
return -1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return 0;
}
/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
*/
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset)
unsigned long mmr_offset, int restrict)
{
int irq;
int ret;
int irq, ret;
irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
irq = create_irq();
if (irq <= 0)
return -EBUSY;
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
if (ret != irq)
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
restrict);
if (ret == irq)
uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
else
destroy_irq(irq);
return ret;
@@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
*/
void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
void uv_teardown_irq(unsigned int irq)
{
arch_disable_uv_irq(mmr_blade, mmr_offset);
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
arch_disable_uv_irq(e->pnode, e->offset);
rb_erase(n, &uv_irq_root);
kfree(e);
break;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
destroy_irq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);

View File

@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
return;
}
apic_cpus = apic->apicid_to_cpu_present(m->apicid);
apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
/*
* Validate version
@@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq)
}
static struct irq_chip cobalt_irq_type = {
.typename = "Cobalt-APIC",
.name = "Cobalt-APIC",
.startup = startup_cobalt_irq,
.shutdown = disable_cobalt_irq,
.enable = enable_cobalt_irq,
@@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq)
}
static struct irq_chip piix4_master_irq_type = {
.typename = "PIIX4-master",
.name = "PIIX4-master",
.startup = startup_piix4_master_irq,
.ack = ack_cobalt_irq,
.end = end_piix4_master_irq,
@@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = {
static struct irq_chip piix4_virtual_irq_type = {
.typename = "PIIX4-virtual",
.name = "PIIX4-virtual",
.shutdown = disable_8259A_irq,
.enable = enable_8259A_irq,
.disable = disable_8259A_irq,

View File

@@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(copy_user_generic);
EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(copy_to_user);
EXPORT_SYMBOL(__copy_from_user_inatomic);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);

View File

@@ -14,10 +14,13 @@
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
void __cpuinit x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
void __init x86_init_pgd_noop(pgd_t *unused) { }
int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { }
/*
* The platform setup functions are preset with the default functions
@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = {
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
},
.iommu = {
.iommu_init = iommu_init_noop,
},
};
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = {
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop,
};