Merge branch 'linus' into x86/urgent
Merge reason: we want to queue up a dependent fix. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
|
||||
obj-y += bootflag.o e820.o
|
||||
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
|
||||
obj-y += alternative.o i8253.o pci-nommu.o
|
||||
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
|
||||
obj-y += tsc.o io_delay.o rtc.o
|
||||
|
||||
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
|
||||
|
@@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
|
||||
struct cpuinfo_x86 *c = &cpu_data(pr->id);
|
||||
|
||||
pr->pdc = NULL;
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL)
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL ||
|
||||
c->x86_vendor == X86_VENDOR_CENTAUR)
|
||||
init_intel_pdc(pr, c);
|
||||
|
||||
return;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
|
||||
* Author: Joerg Roedel <joerg.roedel@amd.com>
|
||||
* Leo Duran <leo.duran@amd.com>
|
||||
*
|
||||
@@ -25,10 +25,12 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/msi.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/amd_iommu_proto.h>
|
||||
#include <asm/amd_iommu_types.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/gart.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
/*
|
||||
* definitions for the ACPI scanning code
|
||||
@@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
|
||||
to handle */
|
||||
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
|
||||
we find in ACPI */
|
||||
#ifdef CONFIG_IOMMU_STRESS
|
||||
bool amd_iommu_isolate = false;
|
||||
#else
|
||||
bool amd_iommu_isolate = true; /* if true, device isolation is
|
||||
enabled */
|
||||
#endif
|
||||
|
||||
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
|
||||
|
||||
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
|
||||
system */
|
||||
|
||||
/* Array to assign indices to IOMMUs*/
|
||||
struct amd_iommu *amd_iommus[MAX_IOMMUS];
|
||||
int amd_iommus_present;
|
||||
|
||||
/* IOMMUs have a non-present cache? */
|
||||
bool amd_iommu_np_cache __read_mostly;
|
||||
|
||||
/*
|
||||
* List of protection domains - used during resume
|
||||
*/
|
||||
LIST_HEAD(amd_iommu_pd_list);
|
||||
spinlock_t amd_iommu_pd_lock;
|
||||
|
||||
/*
|
||||
* Pointer to the device table which is shared by all AMD IOMMUs
|
||||
* it is indexed by the PCI device id or the HT unit id and contains
|
||||
@@ -156,12 +164,6 @@ u16 *amd_iommu_alias_table;
|
||||
*/
|
||||
struct amd_iommu **amd_iommu_rlookup_table;
|
||||
|
||||
/*
|
||||
* The pd table (protection domain table) is used to find the protection domain
|
||||
* data structure a device belongs to. Indexed with the PCI device id too.
|
||||
*/
|
||||
struct protection_domain **amd_iommu_pd_table;
|
||||
|
||||
/*
|
||||
* AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
|
||||
* to know which ones are already in use.
|
||||
@@ -838,7 +840,18 @@ static void __init free_iommu_all(void)
|
||||
static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
||||
{
|
||||
spin_lock_init(&iommu->lock);
|
||||
|
||||
/* Add IOMMU to internal data structures */
|
||||
list_add_tail(&iommu->list, &amd_iommu_list);
|
||||
iommu->index = amd_iommus_present++;
|
||||
|
||||
if (unlikely(iommu->index >= MAX_IOMMUS)) {
|
||||
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
/* Index is fine - add IOMMU to the array */
|
||||
amd_iommus[iommu->index] = iommu;
|
||||
|
||||
/*
|
||||
* Copy data from ACPI table entry to the iommu struct
|
||||
@@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
||||
init_iommu_from_acpi(iommu, h);
|
||||
init_iommu_devices(iommu);
|
||||
|
||||
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
|
||||
amd_iommu_np_cache = true;
|
||||
|
||||
return pci_enable_device(iommu->dev);
|
||||
}
|
||||
|
||||
@@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static int __init iommu_setup_msi(struct amd_iommu *iommu)
|
||||
static int iommu_setup_msi(struct amd_iommu *iommu)
|
||||
{
|
||||
int r;
|
||||
|
||||
@@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = {
|
||||
* functions. Finally it prints some information about AMD IOMMUs and
|
||||
* the driver state and enables the hardware.
|
||||
*/
|
||||
int __init amd_iommu_init(void)
|
||||
static int __init amd_iommu_init(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
|
||||
if (no_iommu) {
|
||||
printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!amd_iommu_detected)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* First parse ACPI tables to find the largest Bus/Dev/Func
|
||||
* we need to handle. Upon this information the shared data
|
||||
@@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void)
|
||||
if (amd_iommu_rlookup_table == NULL)
|
||||
goto free;
|
||||
|
||||
/*
|
||||
* Protection Domain table - maps devices to protection domains
|
||||
* This table has the same size as the rlookup_table
|
||||
*/
|
||||
amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(rlookup_table_size));
|
||||
if (amd_iommu_pd_table == NULL)
|
||||
goto free;
|
||||
|
||||
amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
|
||||
GFP_KERNEL | __GFP_ZERO,
|
||||
get_order(MAX_DOMAIN_ID/8));
|
||||
@@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void)
|
||||
*/
|
||||
amd_iommu_pd_alloc_bitmap[0] = 1;
|
||||
|
||||
spin_lock_init(&amd_iommu_pd_lock);
|
||||
|
||||
/*
|
||||
* now the data structures are allocated and basically initialized
|
||||
* start the real acpi table scan
|
||||
@@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void)
|
||||
if (iommu_pass_through)
|
||||
goto out;
|
||||
|
||||
printk(KERN_INFO "AMD-Vi: device isolation ");
|
||||
if (amd_iommu_isolate)
|
||||
printk("enabled\n");
|
||||
else
|
||||
printk("disabled\n");
|
||||
|
||||
if (amd_iommu_unmap_flush)
|
||||
printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
|
||||
else
|
||||
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
|
||||
|
||||
x86_platform.iommu_shutdown = disable_iommus;
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -1304,9 +1299,6 @@ free:
|
||||
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
|
||||
get_order(MAX_DOMAIN_ID/8));
|
||||
|
||||
free_pages((unsigned long)amd_iommu_pd_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
free_pages((unsigned long)amd_iommu_rlookup_table,
|
||||
get_order(rlookup_table_size));
|
||||
|
||||
@@ -1323,11 +1315,6 @@ free:
|
||||
goto out;
|
||||
}
|
||||
|
||||
void amd_iommu_shutdown(void)
|
||||
{
|
||||
disable_iommus();
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Early detect code. This code runs at IOMMU detection time in the DMA
|
||||
@@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
|
||||
|
||||
void __init amd_iommu_detect(void)
|
||||
{
|
||||
if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
|
||||
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
|
||||
return;
|
||||
|
||||
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
|
||||
iommu_detected = 1;
|
||||
amd_iommu_detected = 1;
|
||||
#ifdef CONFIG_GART_IOMMU
|
||||
gart_iommu_aperture_disabled = 1;
|
||||
gart_iommu_aperture = 0;
|
||||
#endif
|
||||
x86_init.iommu.iommu_init = amd_iommu_init;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str)
|
||||
static int __init parse_amd_iommu_options(char *str)
|
||||
{
|
||||
for (; *str; ++str) {
|
||||
if (strncmp(str, "isolate", 7) == 0)
|
||||
amd_iommu_isolate = true;
|
||||
if (strncmp(str, "share", 5) == 0)
|
||||
amd_iommu_isolate = false;
|
||||
if (strncmp(str, "fullflush", 9) == 0)
|
||||
amd_iommu_unmap_flush = true;
|
||||
}
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/k8.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
int gart_iommu_aperture;
|
||||
int gart_iommu_aperture_disabled __initdata;
|
||||
@@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void)
|
||||
|
||||
iommu_detected = 1;
|
||||
gart_iommu_aperture = 1;
|
||||
x86_init.iommu.iommu_init = gart_iommu_init;
|
||||
|
||||
aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
|
||||
aper_size = (32 * 1024 * 1024) << aper_order;
|
||||
@@ -456,7 +458,7 @@ out:
|
||||
|
||||
if (aper_alloc) {
|
||||
/* Got the aperture from the AGP bridge */
|
||||
} else if (swiotlb && !valid_agp) {
|
||||
} else if (!valid_agp) {
|
||||
/* Do nothing */
|
||||
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
|
||||
force_iommu ||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
# Makefile for local APIC drivers and for the IO-APIC code
|
||||
#
|
||||
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
|
||||
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
|
||||
obj-$(CONFIG_SMP) += ipi.o
|
||||
|
||||
|
@@ -241,28 +241,13 @@ static int modern_apic(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* bare function to substitute write operation
|
||||
* and it's _that_ fast :)
|
||||
*/
|
||||
static void native_apic_write_dummy(u32 reg, u32 v)
|
||||
{
|
||||
WARN_ON_ONCE((cpu_has_apic || !disable_apic));
|
||||
}
|
||||
|
||||
static u32 native_apic_read_dummy(u32 reg)
|
||||
{
|
||||
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* right after this call apic->write/read doesn't do anything
|
||||
* note that there is no restore operation it works one way
|
||||
* right after this call apic become NOOP driven
|
||||
* so apic->write/read doesn't do anything
|
||||
*/
|
||||
void apic_disable(void)
|
||||
{
|
||||
apic->read = native_apic_read_dummy;
|
||||
apic->write = native_apic_write_dummy;
|
||||
pr_info("APIC: switched to apic NOOP\n");
|
||||
apic = &apic_noop;
|
||||
}
|
||||
|
||||
void native_apic_wait_icr_idle(void)
|
||||
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
|
||||
v = apic_read(APIC_LVTT);
|
||||
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
|
||||
apic_write(APIC_LVTT, v);
|
||||
apic_write(APIC_TMICT, 0xffffffff);
|
||||
apic_write(APIC_TMICT, 0);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
/* Nothing to do here */
|
||||
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void)
|
||||
unsigned long flags;
|
||||
struct IO_APIC_route_entry **ioapic_entries = NULL;
|
||||
int ret, x2apic_enabled = 0;
|
||||
int dmar_table_init_ret = 0;
|
||||
int dmar_table_init_ret;
|
||||
|
||||
#ifdef CONFIG_INTR_REMAP
|
||||
dmar_table_init_ret = dmar_table_init();
|
||||
if (dmar_table_init_ret)
|
||||
pr_debug("dmar_table_init() failed with %d:\n",
|
||||
dmar_table_init_ret);
|
||||
#endif
|
||||
if (dmar_table_init_ret && !x2apic_supported())
|
||||
return;
|
||||
|
||||
ioapic_entries = alloc_ioapic_entries();
|
||||
if (!ioapic_entries) {
|
||||
|
200
arch/x86/kernel/apic/apic_noop.c
Normal file
200
arch/x86/kernel/apic/apic_noop.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* NOOP APIC driver.
|
||||
*
|
||||
* Does almost nothing and should be substituted by a real apic driver via
|
||||
* probe routine.
|
||||
*
|
||||
* Though in case if apic is disabled (for some reason) we try
|
||||
* to not uglify the caller's code and allow to call (some) apic routines
|
||||
* like self-ipi, etc...
|
||||
*/
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include <linux/smp.h>
|
||||
#include <asm/ipi.h>
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/e820.h>
|
||||
|
||||
static void noop_init_apic_ldr(void) { }
|
||||
static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
|
||||
static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
|
||||
static void noop_send_IPI_allbutself(int vector) { }
|
||||
static void noop_send_IPI_all(int vector) { }
|
||||
static void noop_send_IPI_self(int vector) { }
|
||||
static void noop_apic_wait_icr_idle(void) { }
|
||||
static void noop_apic_icr_write(u32 low, u32 id) { }
|
||||
|
||||
static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static u32 noop_safe_apic_wait_icr_idle(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 noop_apic_icr_read(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int noop_cpu_to_logical_apicid(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int noop_get_apic_id(unsigned long x)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int noop_probe(void)
|
||||
{
|
||||
/*
|
||||
* NOOP apic should not ever be
|
||||
* enabled via probe routine
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int noop_apic_id_registered(void)
|
||||
{
|
||||
/*
|
||||
* if we would be really "pedantic"
|
||||
* we should pass read_apic_id() here
|
||||
* but since NOOP suppose APIC ID = 0
|
||||
* lets save a few cycles
|
||||
*/
|
||||
return physid_isset(0, phys_cpu_present_map);
|
||||
}
|
||||
|
||||
static const struct cpumask *noop_target_cpus(void)
|
||||
{
|
||||
/* only BSP here */
|
||||
return cpumask_of(0);
|
||||
}
|
||||
|
||||
static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
|
||||
{
|
||||
return physid_isset(apicid, *map);
|
||||
}
|
||||
|
||||
static unsigned long noop_check_apicid_present(int bit)
|
||||
{
|
||||
return physid_isset(bit, phys_cpu_present_map);
|
||||
}
|
||||
|
||||
static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
|
||||
{
|
||||
if (cpu != 0)
|
||||
pr_warning("APIC: Vector allocated for non-BSP cpu\n");
|
||||
cpumask_clear(retmask);
|
||||
cpumask_set_cpu(cpu, retmask);
|
||||
}
|
||||
|
||||
int noop_apicid_to_node(int logical_apicid)
|
||||
{
|
||||
/* we're always on node 0 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 noop_apic_read(u32 reg)
|
||||
{
|
||||
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void noop_apic_write(u32 reg, u32 v)
|
||||
{
|
||||
WARN_ON_ONCE((cpu_has_apic || !disable_apic));
|
||||
}
|
||||
|
||||
struct apic apic_noop = {
|
||||
.name = "noop",
|
||||
.probe = noop_probe,
|
||||
.acpi_madt_oem_check = NULL,
|
||||
|
||||
.apic_id_registered = noop_apic_id_registered,
|
||||
|
||||
.irq_delivery_mode = dest_LowestPrio,
|
||||
/* logical delivery broadcast to all CPUs: */
|
||||
.irq_dest_mode = 1,
|
||||
|
||||
.target_cpus = noop_target_cpus,
|
||||
.disable_esr = 0,
|
||||
.dest_logical = APIC_DEST_LOGICAL,
|
||||
.check_apicid_used = noop_check_apicid_used,
|
||||
.check_apicid_present = noop_check_apicid_present,
|
||||
|
||||
.vector_allocation_domain = noop_vector_allocation_domain,
|
||||
.init_apic_ldr = noop_init_apic_ldr,
|
||||
|
||||
.ioapic_phys_id_map = default_ioapic_phys_id_map,
|
||||
.setup_apic_routing = NULL,
|
||||
.multi_timer_check = NULL,
|
||||
.apicid_to_node = noop_apicid_to_node,
|
||||
|
||||
.cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
|
||||
.cpu_present_to_apicid = default_cpu_present_to_apicid,
|
||||
.apicid_to_cpu_present = physid_set_mask_of_physid,
|
||||
|
||||
.setup_portio_remap = NULL,
|
||||
.check_phys_apicid_present = default_check_phys_apicid_present,
|
||||
.enable_apic_mode = NULL,
|
||||
|
||||
.phys_pkg_id = noop_phys_pkg_id,
|
||||
|
||||
.mps_oem_check = NULL,
|
||||
|
||||
.get_apic_id = noop_get_apic_id,
|
||||
.set_apic_id = NULL,
|
||||
.apic_id_mask = 0x0F << 24,
|
||||
|
||||
.cpu_mask_to_apicid = default_cpu_mask_to_apicid,
|
||||
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
|
||||
|
||||
.send_IPI_mask = noop_send_IPI_mask,
|
||||
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
|
||||
.send_IPI_allbutself = noop_send_IPI_allbutself,
|
||||
.send_IPI_all = noop_send_IPI_all,
|
||||
.send_IPI_self = noop_send_IPI_self,
|
||||
|
||||
.wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
|
||||
|
||||
/* should be safe */
|
||||
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
|
||||
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
|
||||
|
||||
.wait_for_init_deassert = NULL,
|
||||
|
||||
.smp_callin_clear_local_apic = NULL,
|
||||
.inquire_remote_apic = NULL,
|
||||
|
||||
.read = noop_apic_read,
|
||||
.write = noop_apic_write,
|
||||
.icr_read = noop_apic_icr_read,
|
||||
.icr_write = noop_apic_icr_write,
|
||||
.wait_icr_idle = noop_apic_wait_icr_idle,
|
||||
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
|
||||
};
|
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
|
||||
static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
|
||||
return BAD_APICID;
|
||||
}
|
||||
|
||||
static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
|
||||
{
|
||||
return physid_mask_of_physid(phys_apicid);
|
||||
}
|
||||
|
||||
/* Mapping from cpu number to logical apicid */
|
||||
static inline int bigsmp_cpu_to_logical_apicid(int cpu)
|
||||
{
|
||||
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
|
||||
return cpu_physical_id(cpu);
|
||||
}
|
||||
|
||||
static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
|
||||
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
|
||||
{
|
||||
/* For clustered we don't have a good way to do this yet - hack */
|
||||
return physids_promote(0xFFL);
|
||||
physids_promote(0xFFL, retmap);
|
||||
}
|
||||
|
||||
static int bigsmp_check_phys_apicid_present(int phys_apicid)
|
||||
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = {
|
||||
.apicid_to_node = bigsmp_apicid_to_node,
|
||||
.cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
|
||||
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
|
||||
.apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
|
||||
.apicid_to_cpu_present = physid_set_mask_of_physid,
|
||||
.setup_portio_remap = NULL,
|
||||
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
|
||||
.enable_apic_mode = NULL,
|
||||
|
@@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void)
|
||||
return cpumask_of(smp_processor_id());
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
|
||||
static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long es7000_check_apicid_present(int bit)
|
||||
{
|
||||
return physid_isset(bit, phys_cpu_present_map);
|
||||
@@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu)
|
||||
|
||||
static int cpu_id;
|
||||
|
||||
static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
|
||||
static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
|
||||
{
|
||||
physid_mask_t mask;
|
||||
|
||||
mask = physid_mask_of_physid(cpu_id);
|
||||
physid_set_mask_of_physid(cpu_id, retmap);
|
||||
++cpu_id;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Mapping from cpu number to logical apicid */
|
||||
@@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
|
||||
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
|
||||
{
|
||||
/* For clustered we don't have a good way to do this yet - hack */
|
||||
return physids_promote(0xff);
|
||||
physids_promote(0xFFL, retmap);
|
||||
}
|
||||
|
||||
static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
|
||||
|
@@ -60,8 +60,6 @@
|
||||
#include <asm/irq_remapping.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
#include <asm/uv/uv_irq.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
|
||||
return pin;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is performance-critical, we want to do it O(1)
|
||||
*
|
||||
* Most irqs are mapped 1:1 with pins.
|
||||
*/
|
||||
struct irq_cfg {
|
||||
struct irq_pin_list *irq_2_pin;
|
||||
cpumask_var_t domain;
|
||||
cpumask_var_t old_domain;
|
||||
unsigned move_cleanup_count;
|
||||
u8 vector;
|
||||
u8 move_in_progress : 1;
|
||||
};
|
||||
|
||||
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
static struct irq_cfg irq_cfgx[] = {
|
||||
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
static struct irq_cfg *irq_cfg(unsigned int irq)
|
||||
struct irq_cfg *irq_cfg(unsigned int irq)
|
||||
{
|
||||
struct irq_cfg *cfg = NULL;
|
||||
struct irq_desc *desc;
|
||||
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
/* end for move_irq_desc */
|
||||
|
||||
#else
|
||||
static struct irq_cfg *irq_cfg(unsigned int irq)
|
||||
struct irq_cfg *irq_cfg(unsigned int irq)
|
||||
{
|
||||
return irq < nr_irqs ? irq_cfgx + irq : NULL;
|
||||
}
|
||||
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
|
||||
add_pin_to_irq_node(cfg, node, newapic, newpin);
|
||||
}
|
||||
|
||||
static void __io_apic_modify_irq(struct irq_pin_list *entry,
|
||||
int mask_and, int mask_or,
|
||||
void (*final)(struct irq_pin_list *entry))
|
||||
{
|
||||
unsigned int reg, pin;
|
||||
|
||||
pin = entry->pin;
|
||||
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
|
||||
reg &= mask_and;
|
||||
reg |= mask_or;
|
||||
io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
|
||||
if (final)
|
||||
final(entry);
|
||||
}
|
||||
|
||||
static void io_apic_modify_irq(struct irq_cfg *cfg,
|
||||
int mask_and, int mask_or,
|
||||
void (*final)(struct irq_pin_list *entry))
|
||||
{
|
||||
int pin;
|
||||
struct irq_pin_list *entry;
|
||||
|
||||
for_each_irq_pin(entry, cfg->irq_2_pin) {
|
||||
unsigned int reg;
|
||||
pin = entry->pin;
|
||||
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
|
||||
reg &= mask_and;
|
||||
reg |= mask_or;
|
||||
io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
|
||||
if (final)
|
||||
final(entry);
|
||||
}
|
||||
for_each_irq_pin(entry, cfg->irq_2_pin)
|
||||
__io_apic_modify_irq(entry, mask_and, mask_or, final);
|
||||
}
|
||||
|
||||
static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
|
||||
{
|
||||
__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
||||
IO_APIC_REDIR_MASKED, NULL);
|
||||
}
|
||||
|
||||
static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
|
||||
{
|
||||
__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
|
||||
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
||||
}
|
||||
|
||||
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
|
||||
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
|
||||
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
|
||||
}
|
||||
|
||||
static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
|
||||
{
|
||||
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
||||
IO_APIC_REDIR_MASKED, NULL);
|
||||
}
|
||||
|
||||
static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
|
||||
{
|
||||
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
|
||||
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
||||
}
|
||||
|
||||
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_cfg *cfg = desc->chip_data;
|
||||
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
||||
int cpu, err;
|
||||
cpumask_var_t tmp_mask;
|
||||
|
||||
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
|
||||
if (cfg->move_in_progress)
|
||||
return -EBUSY;
|
||||
|
||||
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
|
||||
@@ -1237,8 +1227,7 @@ next:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
||||
int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
||||
{
|
||||
int err;
|
||||
unsigned long flags;
|
||||
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void)
|
||||
struct irq_desc *desc;
|
||||
unsigned int irq;
|
||||
|
||||
if (apic_verbosity == APIC_QUIET)
|
||||
return;
|
||||
|
||||
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
||||
for (i = 0; i < nr_ioapics; i++)
|
||||
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
||||
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (apic_verbosity == APIC_QUIET)
|
||||
return;
|
||||
|
||||
printk(KERN_DEBUG);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
|
||||
unsigned int i, v, ver, maxlvt;
|
||||
u64 icr;
|
||||
|
||||
if (apic_verbosity == APIC_QUIET)
|
||||
return;
|
||||
|
||||
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
||||
smp_processor_id(), hard_smp_processor_id());
|
||||
v = apic_read(APIC_ID);
|
||||
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
__apicdebuginit(void) print_all_local_APICs(void)
|
||||
__apicdebuginit(void) print_local_APICs(int maxcpu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!maxcpu)
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu >= maxcpu)
|
||||
break;
|
||||
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void)
|
||||
unsigned int v;
|
||||
unsigned long flags;
|
||||
|
||||
if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
|
||||
if (!nr_legacy_irqs)
|
||||
return;
|
||||
|
||||
printk(KERN_DEBUG "\nprinting PIC contents\n");
|
||||
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void)
|
||||
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
||||
}
|
||||
|
||||
__apicdebuginit(int) print_all_ICs(void)
|
||||
static int __initdata show_lapic = 1;
|
||||
static __init int setup_show_lapic(char *arg)
|
||||
{
|
||||
int num = -1;
|
||||
|
||||
if (strcmp(arg, "all") == 0) {
|
||||
show_lapic = CONFIG_NR_CPUS;
|
||||
} else {
|
||||
get_option(&arg, &num);
|
||||
if (num >= 0)
|
||||
show_lapic = num;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("show_lapic=", setup_show_lapic);
|
||||
|
||||
__apicdebuginit(int) print_ICs(void)
|
||||
{
|
||||
if (apic_verbosity == APIC_QUIET)
|
||||
return 0;
|
||||
|
||||
print_PIC();
|
||||
|
||||
/* don't print out if apic is not there */
|
||||
if (!cpu_has_apic && !apic_from_smp_config())
|
||||
return 0;
|
||||
|
||||
print_all_local_APICs();
|
||||
print_local_APICs(show_lapic);
|
||||
print_IO_APIC();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
fs_initcall(print_all_ICs);
|
||||
fs_initcall(print_ICs);
|
||||
|
||||
|
||||
/* Where if anywhere is the i8259 connect in external int mode */
|
||||
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void)
|
||||
* This is broken; anything with a real cpu count has to
|
||||
* circumvent this idiocy regardless.
|
||||
*/
|
||||
phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
||||
apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
|
||||
|
||||
/*
|
||||
* Set the IOAPIC ID to the value stored in the MPC table.
|
||||
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void)
|
||||
* system must have a unique ID or we get lots of nice
|
||||
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
||||
*/
|
||||
if (apic->check_apicid_used(phys_id_present_map,
|
||||
if (apic->check_apicid_used(&phys_id_present_map,
|
||||
mp_ioapics[apic_id].apicid)) {
|
||||
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
||||
apic_id, mp_ioapics[apic_id].apicid);
|
||||
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void)
|
||||
mp_ioapics[apic_id].apicid = i;
|
||||
} else {
|
||||
physid_mask_t tmp;
|
||||
tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
|
||||
apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
|
||||
apic_printk(APIC_VERBOSE, "Setting %d in the "
|
||||
"phys_id_present_map\n",
|
||||
mp_ioapics[apic_id].apicid);
|
||||
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void send_cleanup_vector(struct irq_cfg *cfg)
|
||||
void send_cleanup_vector(struct irq_cfg *cfg)
|
||||
{
|
||||
cpumask_var_t cleanup_mask;
|
||||
|
||||
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
|
||||
unsigned int i;
|
||||
cfg->move_cleanup_count = 0;
|
||||
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
||||
cfg->move_cleanup_count++;
|
||||
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
||||
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
|
||||
} else {
|
||||
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
|
||||
cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
|
||||
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
free_cpumask_var(cleanup_mask);
|
||||
}
|
||||
@@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
|
||||
|
||||
/*
|
||||
* Either sets desc->affinity to a valid value, and returns
|
||||
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
|
||||
* leaves desc->affinity untouched.
|
||||
*/
|
||||
static unsigned int
|
||||
unsigned int
|
||||
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
|
||||
{
|
||||
struct irq_cfg *cfg;
|
||||
@@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
||||
|
||||
cfg = irq_cfg(irq);
|
||||
spin_lock(&desc->lock);
|
||||
if (!cfg->move_cleanup_count)
|
||||
goto unlock;
|
||||
|
||||
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
||||
goto unlock;
|
||||
@@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
||||
goto unlock;
|
||||
}
|
||||
__get_cpu_var(vector_irq)[vector] = -1;
|
||||
cfg->move_cleanup_count--;
|
||||
unlock:
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
@@ -2460,21 +2456,33 @@ unlock:
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
static void irq_complete_move(struct irq_desc **descp)
|
||||
static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
|
||||
{
|
||||
struct irq_desc *desc = *descp;
|
||||
struct irq_cfg *cfg = desc->chip_data;
|
||||
unsigned vector, me;
|
||||
unsigned me;
|
||||
|
||||
if (likely(!cfg->move_in_progress))
|
||||
return;
|
||||
|
||||
vector = ~get_irq_regs()->orig_ax;
|
||||
me = smp_processor_id();
|
||||
|
||||
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
||||
send_cleanup_vector(cfg);
|
||||
}
|
||||
|
||||
static void irq_complete_move(struct irq_desc **descp)
|
||||
{
|
||||
__irq_complete_move(descp, ~get_irq_regs()->orig_ax);
|
||||
}
|
||||
|
||||
void irq_force_complete_move(int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irq_cfg *cfg = desc->chip_data;
|
||||
|
||||
__irq_complete_move(&desc, cfg->vector);
|
||||
}
|
||||
#else
|
||||
static inline void irq_complete_move(struct irq_desc **descp) {}
|
||||
#endif
|
||||
@@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq)
|
||||
|
||||
atomic_t irq_mis_count;
|
||||
|
||||
/*
|
||||
* IO-APIC versions below 0x20 don't support EOI register.
|
||||
* For the record, here is the information about various versions:
|
||||
* 0Xh 82489DX
|
||||
* 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
|
||||
* 2Xh I/O(x)APIC which is PCI 2.2 Compliant
|
||||
* 30h-FFh Reserved
|
||||
*
|
||||
* Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
|
||||
* version as 0x2. This is an error with documentation and these ICH chips
|
||||
* use io-apic's of version 0x20.
|
||||
*
|
||||
* For IO-APIC's with EOI register, we use that to do an explicit EOI.
|
||||
* Otherwise, we simulate the EOI message manually by changing the trigger
|
||||
* mode to edge and then back to level, with RTE being masked during this.
|
||||
*/
|
||||
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
|
||||
{
|
||||
struct irq_pin_list *entry;
|
||||
|
||||
for_each_irq_pin(entry, cfg->irq_2_pin) {
|
||||
if (mp_ioapics[entry->apic].apicver >= 0x20) {
|
||||
/*
|
||||
* Intr-remapping uses pin number as the virtual vector
|
||||
* in the RTE. Actual vector is programmed in
|
||||
* intr-remapping table entry. Hence for the io-apic
|
||||
* EOI we use the pin number.
|
||||
*/
|
||||
if (irq_remapped(irq))
|
||||
io_apic_eoi(entry->apic, entry->pin);
|
||||
else
|
||||
io_apic_eoi(entry->apic, cfg->vector);
|
||||
} else {
|
||||
__mask_and_edge_IO_APIC_irq(entry);
|
||||
__unmask_and_level_IO_APIC_irq(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void eoi_ioapic_irq(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_cfg *cfg;
|
||||
unsigned long flags;
|
||||
unsigned int irq;
|
||||
|
||||
irq = desc->irq;
|
||||
cfg = desc->chip_data;
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
__eoi_ioapic_irq(irq, cfg);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
}
|
||||
|
||||
static void ack_apic_level(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
@@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq)
|
||||
* level-triggered interrupt. We mask the source for the time of the
|
||||
* operation to prevent an edge-triggered interrupt escaping meanwhile.
|
||||
* The idea is from Manfred Spraul. --macro
|
||||
*
|
||||
* Also in the case when cpu goes offline, fixup_irqs() will forward
|
||||
* any unhandled interrupt on the offlined cpu to the new cpu
|
||||
* destination that is handling the corresponding interrupt. This
|
||||
* interrupt forwarding is done via IPI's. Hence, in this case also
|
||||
* level-triggered io-apic interrupt will be seen as an edge
|
||||
* interrupt in the IRR. And we can't rely on the cpu's EOI
|
||||
* to be broadcasted to the IO-APIC's which will clear the remoteIRR
|
||||
* corresponding to the level-triggered interrupt. Hence on IO-APIC's
|
||||
* supporting EOI register, we do an explicit EOI to clear the
|
||||
* remote IRR and on IO-APIC's which don't have an EOI register,
|
||||
* we use the above logic (mask+edge followed by unmask+level) from
|
||||
* Manfred Spraul to clear the remote IRR.
|
||||
*/
|
||||
cfg = desc->chip_data;
|
||||
i = cfg->vector;
|
||||
@@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq)
|
||||
*/
|
||||
ack_APIC_irq();
|
||||
|
||||
/*
|
||||
* Tail end of clearing remote IRR bit (either by delivering the EOI
|
||||
* message via io-apic EOI register write or simulating it using
|
||||
* mask+edge followed by unnask+level logic) manually when the
|
||||
* level triggered interrupt is seen as the edge triggered interrupt
|
||||
* at the cpu.
|
||||
*/
|
||||
if (!(v & (1 << (i & 0x1f)))) {
|
||||
atomic_inc(&irq_mis_count);
|
||||
|
||||
eoi_ioapic_irq(desc);
|
||||
}
|
||||
|
||||
/* Now we can move and renable the irq */
|
||||
if (unlikely(do_unmask_irq)) {
|
||||
/* Only migrate the irq if the ack has been received.
|
||||
@@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq)
|
||||
move_masked_irq(irq);
|
||||
unmask_IO_APIC_irq_desc(desc);
|
||||
}
|
||||
|
||||
/* Tail end of version 0x11 I/O APIC bug workaround */
|
||||
if (!(v & (1 << (i & 0x1f)))) {
|
||||
atomic_inc(&irq_mis_count);
|
||||
spin_lock(&ioapic_lock);
|
||||
__mask_and_edge_IO_APIC_irq(cfg);
|
||||
__unmask_and_level_IO_APIC_irq(cfg);
|
||||
spin_unlock(&ioapic_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INTR_REMAP
|
||||
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
|
||||
{
|
||||
struct irq_pin_list *entry;
|
||||
|
||||
for_each_irq_pin(entry, cfg->irq_2_pin)
|
||||
io_apic_eoi(entry->apic, entry->pin);
|
||||
}
|
||||
|
||||
static void
|
||||
eoi_ioapic_irq(struct irq_desc *desc)
|
||||
{
|
||||
struct irq_cfg *cfg;
|
||||
unsigned long flags;
|
||||
unsigned int irq;
|
||||
|
||||
irq = desc->irq;
|
||||
cfg = desc->chip_data;
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
__eoi_ioapic_irq(irq, cfg);
|
||||
spin_unlock_irqrestore(&ioapic_lock, flags);
|
||||
}
|
||||
|
||||
static void ir_ack_apic_edge(unsigned int irq)
|
||||
{
|
||||
ack_APIC_irq();
|
||||
@@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
|
||||
continue;
|
||||
|
||||
desc_new = move_irq_desc(desc_new, node);
|
||||
cfg_new = desc_new->chip_data;
|
||||
|
||||
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
|
||||
irq = new;
|
||||
@@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
||||
}
|
||||
#endif /* CONFIG_HT_IRQ */
|
||||
|
||||
#ifdef CONFIG_X86_UV
|
||||
/*
|
||||
* Re-target the irq to the specified CPU and enable the specified MMR located
|
||||
* on the specified blade to allow the sending of MSIs to the specified CPU.
|
||||
*/
|
||||
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
||||
unsigned long mmr_offset)
|
||||
{
|
||||
const struct cpumask *eligible_cpu = cpumask_of(cpu);
|
||||
struct irq_cfg *cfg;
|
||||
int mmr_pnode;
|
||||
unsigned long mmr_value;
|
||||
struct uv_IO_APIC_route_entry *entry;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
||||
|
||||
cfg = irq_cfg(irq);
|
||||
|
||||
err = assign_irq_vector(irq, cfg, eligible_cpu);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
spin_lock_irqsave(&vector_lock, flags);
|
||||
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
|
||||
irq_name);
|
||||
spin_unlock_irqrestore(&vector_lock, flags);
|
||||
|
||||
mmr_value = 0;
|
||||
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
||||
entry->vector = cfg->vector;
|
||||
entry->delivery_mode = apic->irq_delivery_mode;
|
||||
entry->dest_mode = apic->irq_dest_mode;
|
||||
entry->polarity = 0;
|
||||
entry->trigger = 0;
|
||||
entry->mask = 0;
|
||||
entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
|
||||
|
||||
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
|
||||
if (cfg->move_in_progress)
|
||||
send_cleanup_vector(cfg);
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable the specified MMR located on the specified blade so that MSIs are
|
||||
* longer allowed to be sent.
|
||||
*/
|
||||
void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
|
||||
{
|
||||
unsigned long mmr_value;
|
||||
struct uv_IO_APIC_route_entry *entry;
|
||||
int mmr_pnode;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
||||
|
||||
mmr_value = 0;
|
||||
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
||||
entry->mask = 1;
|
||||
|
||||
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
int __init io_apic_get_redir_entries (int ioapic)
|
||||
{
|
||||
union IO_APIC_reg_01 reg_01;
|
||||
@@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
||||
*/
|
||||
|
||||
if (physids_empty(apic_id_map))
|
||||
apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
||||
apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
|
||||
|
||||
spin_lock_irqsave(&ioapic_lock, flags);
|
||||
reg_00.raw = io_apic_read(ioapic, 0);
|
||||
@@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
||||
* Every APIC in a system must have a unique ID or we get lots of nice
|
||||
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
||||
*/
|
||||
if (apic->check_apicid_used(apic_id_map, apic_id)) {
|
||||
if (apic->check_apicid_used(&apic_id_map, apic_id)) {
|
||||
|
||||
for (i = 0; i < get_physical_broadcast(); i++) {
|
||||
if (!apic->check_apicid_used(apic_id_map, i))
|
||||
if (!apic->check_apicid_used(&apic_id_map, i))
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
||||
apic_id = i;
|
||||
}
|
||||
|
||||
tmp = apic->apicid_to_cpu_present(apic_id);
|
||||
apic->apicid_to_cpu_present(apic_id, &tmp);
|
||||
physids_or(apic_id_map, apic_id_map, tmp);
|
||||
|
||||
if (reg_00.bits.ID != apic_id) {
|
||||
@@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
|
||||
for (i = 0; i < nr_ioapics; i++) {
|
||||
res[i].name = mem;
|
||||
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
sprintf(mem, "IOAPIC %u", i);
|
||||
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
|
||||
mem += IOAPIC_RESOURCE_NAME_SIZE;
|
||||
}
|
||||
|
||||
@@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void)
|
||||
#ifdef CONFIG_X86_32
|
||||
fake_ioapic_page:
|
||||
#endif
|
||||
ioapic_phys = (unsigned long)
|
||||
alloc_bootmem_pages(PAGE_SIZE);
|
||||
ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
||||
ioapic_phys = __pa(ioapic_phys);
|
||||
}
|
||||
set_fixmap_nocache(idx, ioapic_phys);
|
||||
apic_printk(APIC_VERBOSE,
|
||||
"mapped IOAPIC to %08lx (%08lx)\n",
|
||||
__fix_to_virt(idx), ioapic_phys);
|
||||
apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
|
||||
__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
|
||||
ioapic_phys);
|
||||
idx++;
|
||||
|
||||
ioapic_res->start = ioapic_phys;
|
||||
ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
|
||||
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
|
||||
ioapic_res++;
|
||||
}
|
||||
}
|
||||
|
@@ -39,7 +39,8 @@
|
||||
int unknown_nmi_panic;
|
||||
int nmi_watchdog_enabled;
|
||||
|
||||
static cpumask_t backtrace_mask __read_mostly;
|
||||
/* For reliability, we're prepared to waste bits here. */
|
||||
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
|
||||
|
||||
/* nmi_active:
|
||||
* >0: the lapic NMI watchdog is active, but can be disabled
|
||||
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
||||
}
|
||||
|
||||
/* We can be called before check_nmi_watchdog, hence NULL check. */
|
||||
if (cpumask_test_cpu(cpu, &backtrace_mask)) {
|
||||
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
|
||||
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
|
||||
|
||||
spin_lock(&lock);
|
||||
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
|
||||
show_regs(regs);
|
||||
dump_stack();
|
||||
spin_unlock(&lock);
|
||||
cpumask_clear_cpu(cpu, &backtrace_mask);
|
||||
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
|
||||
|
||||
rc = 1;
|
||||
}
|
||||
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
cpumask_copy(&backtrace_mask, cpu_online_mask);
|
||||
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
|
||||
|
||||
printk(KERN_INFO "sending NMI to all CPUs:\n");
|
||||
apic->send_IPI_all(NMI_VECTOR);
|
||||
|
||||
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
|
||||
for (i = 0; i < 10 * 1000; i++) {
|
||||
if (cpumask_empty(&backtrace_mask))
|
||||
if (cpumask_empty(to_cpumask(backtrace_mask)))
|
||||
break;
|
||||
mdelay(1);
|
||||
}
|
||||
|
@@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void)
|
||||
return cpu_all_mask;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
|
||||
static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
|
||||
{
|
||||
return physid_isset(apicid, bitmap);
|
||||
return physid_isset(apicid, *map);
|
||||
}
|
||||
|
||||
static inline unsigned long numaq_check_apicid_present(int bit)
|
||||
@@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq)
|
||||
return apic != 0 && irq == 0;
|
||||
}
|
||||
|
||||
static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
|
||||
static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
|
||||
{
|
||||
/* We don't have a good way to do this yet - hack */
|
||||
return physids_promote(0xFUL);
|
||||
return physids_promote(0xFUL, retmap);
|
||||
}
|
||||
|
||||
static inline int numaq_cpu_to_logical_apicid(int cpu)
|
||||
@@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid)
|
||||
return logical_apicid >> 4;
|
||||
}
|
||||
|
||||
static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
|
||||
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
|
||||
{
|
||||
int node = numaq_apicid_to_node(logical_apicid);
|
||||
int cpu = __ffs(logical_apicid & 0xf);
|
||||
|
||||
return physid_mask_of_physid(cpu + 4*node);
|
||||
physid_set_mask_of_physid(cpu + 4*node, retmap);
|
||||
}
|
||||
|
||||
/* Where the IO area was mapped on multiquad, always 0 otherwise */
|
||||
|
@@ -108,7 +108,7 @@ struct apic apic_default = {
|
||||
.apicid_to_node = default_apicid_to_node,
|
||||
.cpu_to_logical_apicid = default_cpu_to_logical_apicid,
|
||||
.cpu_present_to_apicid = default_cpu_present_to_apicid,
|
||||
.apicid_to_cpu_present = default_apicid_to_cpu_present,
|
||||
.apicid_to_cpu_present = physid_set_mask_of_physid,
|
||||
.setup_portio_remap = NULL,
|
||||
.check_phys_apicid_present = default_check_phys_apicid_present,
|
||||
.enable_apic_mode = NULL,
|
||||
|
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void)
|
||||
return cpumask_of(0);
|
||||
}
|
||||
|
||||
static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
|
||||
static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu)
|
||||
return BAD_APICID;
|
||||
}
|
||||
|
||||
static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
|
||||
static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
|
||||
{
|
||||
/* For clustered we don't have a good way to do this yet - hack */
|
||||
return physids_promote(0x0F);
|
||||
physids_promote(0x0FL, retmap);
|
||||
}
|
||||
|
||||
static physid_mask_t summit_apicid_to_cpu_present(int apicid)
|
||||
static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
|
||||
{
|
||||
return physid_mask_of_physid(0);
|
||||
physid_set_mask_of_physid(0, retmap);
|
||||
}
|
||||
|
||||
static int summit_check_phys_apicid_present(int physical_apicid)
|
||||
|
@@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode)
|
||||
map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
|
||||
}
|
||||
|
||||
static __init void map_low_mmrs(void)
|
||||
{
|
||||
init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
|
||||
init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
|
||||
}
|
||||
|
||||
static __init void uv_rtc_init(void)
|
||||
{
|
||||
long status;
|
||||
@@ -550,6 +556,8 @@ void __init uv_system_init(void)
|
||||
unsigned long mmr_base, present, paddr;
|
||||
unsigned short pnode_mask;
|
||||
|
||||
map_low_mmrs();
|
||||
|
||||
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
|
||||
m_val = m_n_config.s.m_skt;
|
||||
n_val = m_n_config.s.n_skt;
|
||||
|
@@ -204,7 +204,6 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/poll.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/timer.h>
|
||||
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
|
||||
static struct apm_user *user_list;
|
||||
static DEFINE_SPINLOCK(user_list_lock);
|
||||
static DEFINE_MUTEX(apm_mutex);
|
||||
|
||||
/*
|
||||
* Set up a segment that references the real mode segment 0x40
|
||||
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
|
||||
return -EPERM;
|
||||
switch (cmd) {
|
||||
case APM_IOC_STANDBY:
|
||||
lock_kernel();
|
||||
mutex_lock(&apm_mutex);
|
||||
if (as->standbys_read > 0) {
|
||||
as->standbys_read--;
|
||||
as->standbys_pending--;
|
||||
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
|
||||
queue_event(APM_USER_STANDBY, as);
|
||||
if (standbys_pending <= 0)
|
||||
standby();
|
||||
unlock_kernel();
|
||||
mutex_unlock(&apm_mutex);
|
||||
break;
|
||||
case APM_IOC_SUSPEND:
|
||||
lock_kernel();
|
||||
mutex_lock(&apm_mutex);
|
||||
if (as->suspends_read > 0) {
|
||||
as->suspends_read--;
|
||||
as->suspends_pending--;
|
||||
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
|
||||
queue_event(APM_USER_SUSPEND, as);
|
||||
if (suspends_pending <= 0) {
|
||||
ret = suspend(1);
|
||||
mutex_unlock(&apm_mutex);
|
||||
} else {
|
||||
as->suspend_wait = 1;
|
||||
mutex_unlock(&apm_mutex);
|
||||
wait_event_interruptible(apm_suspend_waitqueue,
|
||||
as->suspend_wait == 0);
|
||||
ret = as->suspend_result;
|
||||
}
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
default:
|
||||
return -ENOTTY;
|
||||
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct apm_user *as;
|
||||
|
||||
lock_kernel();
|
||||
as = kmalloc(sizeof(*as), GFP_KERNEL);
|
||||
if (as == NULL) {
|
||||
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
|
||||
sizeof(*as));
|
||||
unlock_kernel();
|
||||
return -ENOMEM;
|
||||
}
|
||||
as->magic = APM_BIOS_MAGIC;
|
||||
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp)
|
||||
user_list = as;
|
||||
spin_unlock(&user_list_lock);
|
||||
filp->private_data = as;
|
||||
unlock_kernel();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -5,6 +5,7 @@
|
||||
# Don't trace early stages of a secondary CPU boot
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_common.o = -pg
|
||||
CFLAGS_REMOVE_perf_event.o = -pg
|
||||
endif
|
||||
|
||||
# Make sure load_percpu_segment has no stackprotector
|
||||
|
@@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
display_cacheinfo(c);
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
||||
/* Multi core CPU? */
|
||||
if (c->extended_cpuid_level >= 0x80000008) {
|
||||
|
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
}
|
||||
|
||||
display_cacheinfo(c);
|
||||
cpu_detect_cache_sizes(c);
|
||||
}
|
||||
|
||||
enum {
|
||||
|
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
display_cacheinfo(c);
|
||||
cpu_detect_cache_sizes(c);
|
||||
#else
|
||||
/* Not much we can do here... */
|
||||
/* Check if at least it has cpuid */
|
||||
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
||||
void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int n, dummy, ebx, ecx, edx, l2size;
|
||||
|
||||
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
||||
|
||||
if (n >= 0x80000005) {
|
||||
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
||||
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
|
||||
edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
||||
c->x86_cache_size = (ecx>>24) + (edx>>24);
|
||||
#ifdef CONFIG_X86_64
|
||||
/* On K8 L1 TLB is inclusive, so don't count it */
|
||||
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
|
||||
c->x86_cache_size = l2size;
|
||||
|
||||
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
||||
l2size, ecx & 0xFF);
|
||||
}
|
||||
|
||||
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
||||
@@ -659,24 +654,31 @@ void __init early_cpu_init(void)
|
||||
const struct cpu_dev *const *cdev;
|
||||
int count = 0;
|
||||
|
||||
#ifdef PROCESSOR_SELECT
|
||||
printk(KERN_INFO "KERNEL supported cpus:\n");
|
||||
#endif
|
||||
|
||||
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
||||
const struct cpu_dev *cpudev = *cdev;
|
||||
unsigned int j;
|
||||
|
||||
if (count >= X86_VENDOR_NUM)
|
||||
break;
|
||||
cpu_devs[count] = cpudev;
|
||||
count++;
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (!cpudev->c_ident[j])
|
||||
continue;
|
||||
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
||||
cpudev->c_ident[j]);
|
||||
}
|
||||
}
|
||||
#ifdef PROCESSOR_SELECT
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (!cpudev->c_ident[j])
|
||||
continue;
|
||||
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
||||
cpudev->c_ident[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
early_identify_cpu(&boot_cpu_data);
|
||||
}
|
||||
|
||||
@@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* Init Machine Check Exception if available. */
|
||||
mcheck_init(c);
|
||||
#endif
|
||||
mcheck_cpu_init(c);
|
||||
|
||||
select_idle_routine(c);
|
||||
|
||||
|
@@ -32,6 +32,6 @@ struct cpu_dev {
|
||||
extern const struct cpu_dev *const __x86_cpu_dev_start[],
|
||||
*const __x86_cpu_dev_end[];
|
||||
|
||||
extern void display_cacheinfo(struct cpuinfo_x86 *c);
|
||||
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
|
||||
|
||||
#endif
|
||||
|
@@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
|
||||
|
||||
static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
|
||||
/* Intel Xeon Processor 7100 Series Specification Update
|
||||
* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
|
||||
* AL30: A Machine Check Exception (MCE) Occurring during an
|
||||
* Enhanced Intel SpeedStep Technology Ratio Change May Cause
|
||||
* Both Processor Cores to Lock Up when HT is enabled*/
|
||||
* Both Processor Cores to Lock Up. */
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
if ((c->x86 == 15) &&
|
||||
(c->x86_model == 6) &&
|
||||
(c->x86_mask == 8) && smt_capable())
|
||||
(c->x86_mask == 8)) {
|
||||
printk(KERN_INFO "acpi-cpufreq: Intel(R) "
|
||||
"Xeon(R) 7100 Errata AL30, processors may "
|
||||
"lock up on frequency changes: disabling "
|
||||
"acpi-cpufreq.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
|
||||
unsigned int result = 0;
|
||||
struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
|
||||
struct acpi_processor_performance *perf;
|
||||
#ifdef CONFIG_SMP
|
||||
static int blacklisted;
|
||||
#endif
|
||||
|
||||
dprintk("acpi_cpufreq_cpu_init\n");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
result = acpi_cpufreq_blacklist(c);
|
||||
if (result)
|
||||
return result;
|
||||
if (blacklisted)
|
||||
return blacklisted;
|
||||
blacklisted = acpi_cpufreq_blacklist(c);
|
||||
if (blacklisted)
|
||||
return blacklisted;
|
||||
#endif
|
||||
|
||||
data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
|
||||
|
@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
|
||||
memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
|
||||
break;
|
||||
case 1 ... 15:
|
||||
longhaul_version = TYPE_LONGHAUL_V1;
|
||||
longhaul_version = TYPE_LONGHAUL_V2;
|
||||
if (c->x86_mask < 8) {
|
||||
cpu_model = CPU_SAMUEL2;
|
||||
cpuname = "C3 'Samuel 2' [C5B]";
|
||||
|
@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data)
|
||||
* set it to 1 to avoid problems in the future.
|
||||
* For all others it's a BIOS bug.
|
||||
*/
|
||||
if (!boot_cpu_data.x86 == 0x11)
|
||||
if (boot_cpu_data.x86 != 0x11)
|
||||
printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
|
||||
"latency\n");
|
||||
max_latency = 1;
|
||||
|
@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct get_freq_data {
|
||||
unsigned int speed;
|
||||
unsigned int processor;
|
||||
};
|
||||
|
||||
static void get_freq_data(void *_data)
|
||||
static void get_freq_data(void *_speed)
|
||||
{
|
||||
struct get_freq_data *data = _data;
|
||||
unsigned int *speed = _speed;
|
||||
|
||||
data->speed = speedstep_get_frequency(data->processor);
|
||||
*speed = speedstep_get_frequency(speedstep_processor);
|
||||
}
|
||||
|
||||
static unsigned int speedstep_get(unsigned int cpu)
|
||||
{
|
||||
struct get_freq_data data = { .processor = cpu };
|
||||
unsigned int speed;
|
||||
|
||||
/* You're supposed to ensure CPU is online. */
|
||||
if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
|
||||
if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
|
||||
BUG();
|
||||
|
||||
dprintk("detected %u kHz as current frequency\n", data.speed);
|
||||
return data.speed;
|
||||
dprintk("detected %u kHz as current frequency\n", speed);
|
||||
return speed;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
|
||||
/* Handle the GX (Formally known as the GX2) */
|
||||
|
||||
if (c->x86 == 5 && c->x86_model == 5)
|
||||
display_cacheinfo(c);
|
||||
cpu_detect_cache_sizes(c);
|
||||
else
|
||||
init_cyrix(c);
|
||||
}
|
||||
|
@@ -491,22 +491,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
}
|
||||
|
||||
if (trace)
|
||||
printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
|
||||
else if (l1i)
|
||||
printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
|
||||
|
||||
if (l1d)
|
||||
printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
|
||||
else
|
||||
printk(KERN_CONT "\n");
|
||||
|
||||
if (l2)
|
||||
printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
|
||||
|
||||
if (l3)
|
||||
printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
|
||||
|
||||
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
|
||||
|
||||
return l2;
|
||||
|
@@ -46,6 +46,9 @@
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/mce.h>
|
||||
|
||||
int mce_disabled __read_mostly;
|
||||
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
|
||||
static DEFINE_PER_CPU(struct mce, mces_seen);
|
||||
static int cpu_missing;
|
||||
|
||||
static void default_decode_mce(struct mce *m)
|
||||
/*
|
||||
* CPU/chipset specific EDAC code can register a notifier call here to print
|
||||
* MCE errors in a human-readable form.
|
||||
*/
|
||||
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
|
||||
|
||||
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
|
||||
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
|
||||
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
||||
/*
|
||||
* CPU/chipset specific EDAC code can register a callback here to print
|
||||
* MCE errors in a human-readable form:
|
||||
*/
|
||||
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
|
||||
EXPORT_SYMBOL(x86_mce_decode_callback);
|
||||
static struct notifier_block mce_dec_nb = {
|
||||
.notifier_call = default_decode_mce,
|
||||
.priority = -1,
|
||||
};
|
||||
|
||||
/* MCA banks polled by the period polling timer for corrected events */
|
||||
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
||||
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
|
||||
{
|
||||
unsigned next, entry;
|
||||
|
||||
/* Emit the trace record: */
|
||||
trace_mce_record(mce);
|
||||
|
||||
mce->finished = 0;
|
||||
wmb();
|
||||
for (;;) {
|
||||
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
|
||||
|
||||
/*
|
||||
* Print out human-readable details about the MCE error,
|
||||
* (if the CPU has an implementation for that):
|
||||
* (if the CPU has an implementation for that)
|
||||
*/
|
||||
x86_mce_decode_callback(m);
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||
}
|
||||
|
||||
static void print_mce_head(void)
|
||||
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
|
||||
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
|
||||
static DEFINE_PER_CPU(struct timer_list, mce_timer);
|
||||
|
||||
static void mcheck_timer(unsigned long data)
|
||||
static void mce_start_timer(unsigned long data)
|
||||
{
|
||||
struct timer_list *t = &per_cpu(mce_timer, data);
|
||||
int *n;
|
||||
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_notify_irq);
|
||||
|
||||
static int mce_banks_init(void)
|
||||
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
|
||||
/*
|
||||
* Initialize Machine Checks for a CPU.
|
||||
*/
|
||||
static int __cpuinit mce_cap_init(void)
|
||||
static int __cpuinit __mcheck_cpu_cap_init(void)
|
||||
{
|
||||
unsigned b;
|
||||
u64 cap;
|
||||
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
|
||||
WARN_ON(banks != 0 && b != banks);
|
||||
banks = b;
|
||||
if (!mce_banks) {
|
||||
int err = mce_banks_init();
|
||||
int err = __mcheck_cpu_mce_banks_init();
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mce_init(void)
|
||||
static void __mcheck_cpu_init_generic(void)
|
||||
{
|
||||
mce_banks_t all_banks;
|
||||
u64 cap;
|
||||
@@ -1273,7 +1287,7 @@ static void mce_init(void)
|
||||
}
|
||||
|
||||
/* Add per CPU specific workarounds here */
|
||||
static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
|
||||
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
|
||||
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86 != 5)
|
||||
return;
|
||||
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_cpu_features(struct cpuinfo_x86 *c)
|
||||
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_init_timer(void)
|
||||
static void __mcheck_cpu_init_timer(void)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
int *n = &__get_cpu_var(mce_next_interval);
|
||||
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
|
||||
*n = check_interval * HZ;
|
||||
if (!*n)
|
||||
return;
|
||||
setup_timer(t, mcheck_timer, smp_processor_id());
|
||||
setup_timer(t, mce_start_timer, smp_processor_id());
|
||||
t->expires = round_jiffies(jiffies + *n);
|
||||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
* Called for each booted CPU to set up machine checks.
|
||||
* Must be called with preempt off:
|
||||
*/
|
||||
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
|
||||
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled)
|
||||
return;
|
||||
|
||||
mce_ancient_init(c);
|
||||
__mcheck_cpu_ancient_init(c);
|
||||
|
||||
if (!mce_available(c))
|
||||
return;
|
||||
|
||||
if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
|
||||
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
|
||||
mce_disabled = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
machine_check_vector = do_machine_check;
|
||||
|
||||
mce_init();
|
||||
mce_cpu_features(c);
|
||||
mce_init_timer();
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_timer();
|
||||
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
|
||||
}
|
||||
__setup("mce", mcheck_enable);
|
||||
|
||||
int __init mcheck_init(void)
|
||||
{
|
||||
atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
|
||||
|
||||
mcheck_intel_therm_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sysfs support
|
||||
*/
|
||||
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
|
||||
* Disable machine checks on suspend and shutdown. We can't really handle
|
||||
* them later.
|
||||
*/
|
||||
static int mce_disable(void)
|
||||
static int mce_disable_error_reporting(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1663,12 +1687,12 @@ static int mce_disable(void)
|
||||
|
||||
static int mce_suspend(struct sys_device *dev, pm_message_t state)
|
||||
{
|
||||
return mce_disable();
|
||||
return mce_disable_error_reporting();
|
||||
}
|
||||
|
||||
static int mce_shutdown(struct sys_device *dev)
|
||||
{
|
||||
return mce_disable();
|
||||
return mce_disable_error_reporting();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
|
||||
*/
|
||||
static int mce_resume(struct sys_device *dev)
|
||||
{
|
||||
mce_init();
|
||||
mce_cpu_features(¤t_cpu_data);
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(¤t_cpu_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
|
||||
del_timer_sync(&__get_cpu_var(mce_timer));
|
||||
if (!mce_available(¤t_cpu_data))
|
||||
return;
|
||||
mce_init();
|
||||
mce_init_timer();
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_timer();
|
||||
}
|
||||
|
||||
/* Reinit MCEs after user configuration changes */
|
||||
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
|
||||
cmci_reenable();
|
||||
cmci_recheck();
|
||||
if (all)
|
||||
mce_init_timer();
|
||||
__mcheck_cpu_init_timer();
|
||||
}
|
||||
|
||||
static struct sysdev_class mce_sysclass = {
|
||||
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
|
||||
}
|
||||
|
||||
/* Make sure there are no machine checks on offlined CPUs. */
|
||||
static void mce_disable_cpu(void *h)
|
||||
static void __cpuinit mce_disable_cpu(void *h)
|
||||
{
|
||||
unsigned long action = *(unsigned long *)h;
|
||||
int i;
|
||||
|
||||
if (!mce_available(¤t_cpu_data))
|
||||
return;
|
||||
|
||||
if (!(action & CPU_TASKS_FROZEN))
|
||||
cmci_clear();
|
||||
for (i = 0; i < banks; i++) {
|
||||
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_reenable_cpu(void *h)
|
||||
static void __cpuinit mce_reenable_cpu(void *h)
|
||||
{
|
||||
unsigned long action = *(unsigned long *)h;
|
||||
int i;
|
||||
@@ -2027,7 +2052,7 @@ static __init void mce_init_banks(void)
|
||||
}
|
||||
}
|
||||
|
||||
static __init int mce_init_device(void)
|
||||
static __init int mcheck_init_device(void)
|
||||
{
|
||||
int err;
|
||||
int i = 0;
|
||||
@@ -2055,7 +2080,7 @@ static __init int mce_init_device(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
device_initcall(mce_init_device);
|
||||
device_initcall(mcheck_init_device);
|
||||
|
||||
/*
|
||||
* Old style boot options parsing. Only for compatibility.
|
||||
@@ -2103,7 +2128,7 @@ static int fake_panic_set(void *data, u64 val)
|
||||
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
|
||||
fake_panic_set, "%llu\n");
|
||||
|
||||
static int __init mce_debugfs_init(void)
|
||||
static int __init mcheck_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *ffake_panic;
|
||||
|
||||
@@ -2117,5 +2142,5 @@ static int __init mce_debugfs_init(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mce_debugfs_init);
|
||||
late_initcall(mcheck_debugfs_init);
|
||||
#endif
|
||||
|
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
|
||||
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
static u32 lvtthmr_init __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_sysdev_one_ro(_name) \
|
||||
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
|
||||
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
void __init mcheck_intel_therm_init(void)
|
||||
{
|
||||
/*
|
||||
* This function is only called on boot CPU. Save the init thermal
|
||||
* LVT value on BSP and use that value to restore APs' thermal LVT
|
||||
* entry BIOS programmed later
|
||||
*/
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
|
||||
cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
|
||||
lvtthmr_init = apic_read(APIC_LVTTHMR);
|
||||
}
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
|
||||
/*
|
||||
* The initial value of thermal LVT entries on all APs always reads
|
||||
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
|
||||
* sequence to them and LVT registers are reset to 0s except for
|
||||
* the mask bits which are set to 1s when APs receive INIT IPI.
|
||||
* Always restore the value that BIOS has programmed on AP based on
|
||||
* BSP's info we saved since BIOS is always setting the same value
|
||||
* for all threads/cores
|
||||
*/
|
||||
apic_write(APIC_LVTTHMR, lvtthmr_init);
|
||||
|
||||
h = lvtthmr_init;
|
||||
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
|
@@ -77,6 +77,18 @@ struct cpu_hw_events {
|
||||
struct debug_store *ds;
|
||||
};
|
||||
|
||||
struct event_constraint {
|
||||
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int code;
|
||||
};
|
||||
|
||||
#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
|
||||
#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
|
||||
|
||||
#define for_each_event_constraint(e, c) \
|
||||
for ((e) = (c); (e)->idxmsk[0]; (e)++)
|
||||
|
||||
|
||||
/*
|
||||
* struct x86_pmu - generic x86 pmu
|
||||
*/
|
||||
@@ -102,6 +114,8 @@ struct x86_pmu {
|
||||
u64 intel_ctrl;
|
||||
void (*enable_bts)(u64 config);
|
||||
void (*disable_bts)(void);
|
||||
int (*get_event_idx)(struct cpu_hw_events *cpuc,
|
||||
struct hw_perf_event *hwc);
|
||||
};
|
||||
|
||||
static struct x86_pmu x86_pmu __read_mostly;
|
||||
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
|
||||
.enabled = 1,
|
||||
};
|
||||
|
||||
static const struct event_constraint *event_constraints;
|
||||
|
||||
/*
|
||||
* Not sure about some of these
|
||||
*/
|
||||
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
|
||||
return hw_event & P6_EVNTSEL_MASK;
|
||||
}
|
||||
|
||||
static const struct event_constraint intel_p6_event_constraints[] =
|
||||
{
|
||||
EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
|
||||
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
||||
EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
|
||||
EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
||||
EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
||||
EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
/*
|
||||
* Intel PerfMon v3. Used on Core2 and later.
|
||||
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
|
||||
};
|
||||
|
||||
static const struct event_constraint intel_core_event_constraints[] =
|
||||
{
|
||||
EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
||||
EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
|
||||
EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
||||
EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
||||
EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
||||
EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
|
||||
EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
|
||||
EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
|
||||
EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static const struct event_constraint intel_nehalem_event_constraints[] =
|
||||
{
|
||||
EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
|
||||
EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
|
||||
EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
|
||||
EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
|
||||
EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
|
||||
EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
|
||||
EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
|
||||
EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
|
||||
EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
|
||||
EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static u64 intel_pmu_event_map(int hw_event)
|
||||
{
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||
|
||||
static const u64 nehalem_hw_cache_event_ids
|
||||
static __initconst u64 nehalem_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids
|
||||
},
|
||||
};
|
||||
|
||||
static const u64 core2_hw_cache_event_ids
|
||||
static __initconst u64 core2_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids
|
||||
},
|
||||
};
|
||||
|
||||
static const u64 atom_hw_cache_event_ids
|
||||
static __initconst u64 atom_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
|
||||
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
|
||||
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
|
||||
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
|
||||
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
|
||||
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
|
||||
|
||||
#define CORE_EVNTSEL_MASK \
|
||||
(CORE_EVNTSEL_EVENT_MASK | \
|
||||
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
|
||||
return hw_event & CORE_EVNTSEL_MASK;
|
||||
}
|
||||
|
||||
static const u64 amd_hw_cache_event_ids
|
||||
static __initconst u64 amd_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
|
||||
*/
|
||||
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
hwc->idx = -1;
|
||||
|
||||
/*
|
||||
* Count user and OS events unless requested not to.
|
||||
*/
|
||||
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
|
||||
x86_pmu_enable_event(hwc, idx);
|
||||
}
|
||||
|
||||
static int
|
||||
fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
|
||||
static int fixed_mode_idx(struct hw_perf_event *hwc)
|
||||
{
|
||||
unsigned int hw_event;
|
||||
|
||||
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
|
||||
if (!x86_pmu.num_events_fixed)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* fixed counters do not take all possible filters
|
||||
*/
|
||||
if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
|
||||
return -1;
|
||||
|
||||
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
|
||||
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
|
||||
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
|
||||
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a PMC slot for the freshly enabled / scheduled in event:
|
||||
* generic counter allocator: get next free counter
|
||||
*/
|
||||
static int x86_pmu_enable(struct perf_event *event)
|
||||
static int
|
||||
gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
|
||||
idx = fixed_mode_idx(event, hwc);
|
||||
idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
|
||||
return idx == x86_pmu.num_events ? -1 : idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* intel-specific counter allocator: check event constraints
|
||||
*/
|
||||
static int
|
||||
intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
|
||||
{
|
||||
const struct event_constraint *event_constraint;
|
||||
int i, code;
|
||||
|
||||
if (!event_constraints)
|
||||
goto skip;
|
||||
|
||||
code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
|
||||
|
||||
for_each_event_constraint(event_constraint, event_constraints) {
|
||||
if (code == event_constraint->code) {
|
||||
for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
|
||||
if (!test_and_set_bit(i, cpuc->used_mask))
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
skip:
|
||||
return gen_get_event_idx(cpuc, hwc);
|
||||
}
|
||||
|
||||
static int
|
||||
x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = fixed_mode_idx(hwc);
|
||||
if (idx == X86_PMC_IDX_FIXED_BTS) {
|
||||
/* BTS is already occupied. */
|
||||
if (test_and_set_bit(idx, cpuc->used_mask))
|
||||
return -EAGAIN;
|
||||
|
||||
hwc->config_base = 0;
|
||||
hwc->event_base = 0;
|
||||
hwc->event_base = 0;
|
||||
hwc->idx = idx;
|
||||
} else if (idx >= 0) {
|
||||
/*
|
||||
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
|
||||
} else {
|
||||
idx = hwc->idx;
|
||||
/* Try to get the previous generic event again */
|
||||
if (test_and_set_bit(idx, cpuc->used_mask)) {
|
||||
if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
|
||||
try_generic:
|
||||
idx = find_first_zero_bit(cpuc->used_mask,
|
||||
x86_pmu.num_events);
|
||||
if (idx == x86_pmu.num_events)
|
||||
idx = x86_pmu.get_event_idx(cpuc, hwc);
|
||||
if (idx == -1)
|
||||
return -EAGAIN;
|
||||
|
||||
set_bit(idx, cpuc->used_mask);
|
||||
hwc->idx = idx;
|
||||
}
|
||||
hwc->config_base = x86_pmu.eventsel;
|
||||
hwc->event_base = x86_pmu.perfctr;
|
||||
hwc->config_base = x86_pmu.eventsel;
|
||||
hwc->event_base = x86_pmu.perfctr;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a PMC slot for the freshly enabled / scheduled in event:
|
||||
*/
|
||||
static int x86_pmu_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
|
||||
idx = x86_schedule_event(cpuc, hwc);
|
||||
if (idx < 0)
|
||||
return idx;
|
||||
|
||||
perf_events_lapic_init();
|
||||
|
||||
x86_pmu.disable(hwc, idx);
|
||||
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
|
||||
.priority = 1
|
||||
};
|
||||
|
||||
static struct x86_pmu p6_pmu = {
|
||||
static __initconst struct x86_pmu p6_pmu = {
|
||||
.name = "p6",
|
||||
.handle_irq = p6_pmu_handle_irq,
|
||||
.disable_all = p6_pmu_disable_all,
|
||||
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = {
|
||||
*/
|
||||
.event_bits = 32,
|
||||
.event_mask = (1ULL << 32) - 1,
|
||||
.get_event_idx = intel_get_event_idx,
|
||||
};
|
||||
|
||||
static struct x86_pmu intel_pmu = {
|
||||
static __initconst struct x86_pmu intel_pmu = {
|
||||
.name = "Intel",
|
||||
.handle_irq = intel_pmu_handle_irq,
|
||||
.disable_all = intel_pmu_disable_all,
|
||||
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = {
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.enable_bts = intel_pmu_enable_bts,
|
||||
.disable_bts = intel_pmu_disable_bts,
|
||||
.get_event_idx = intel_get_event_idx,
|
||||
};
|
||||
|
||||
static struct x86_pmu amd_pmu = {
|
||||
static __initconst struct x86_pmu amd_pmu = {
|
||||
.name = "AMD",
|
||||
.handle_irq = amd_pmu_handle_irq,
|
||||
.disable_all = amd_pmu_disable_all,
|
||||
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = {
|
||||
.apic = 1,
|
||||
/* use highest bit to detect overflow */
|
||||
.max_period = (1ULL << 47) - 1,
|
||||
.get_event_idx = gen_get_event_idx,
|
||||
};
|
||||
|
||||
static int p6_pmu_init(void)
|
||||
static __init int p6_pmu_init(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 1:
|
||||
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void)
|
||||
case 7:
|
||||
case 8:
|
||||
case 11: /* Pentium III */
|
||||
event_constraints = intel_p6_event_constraints;
|
||||
break;
|
||||
case 9:
|
||||
case 13:
|
||||
/* Pentium M */
|
||||
event_constraints = intel_p6_event_constraints;
|
||||
break;
|
||||
default:
|
||||
pr_cont("unsupported p6 CPU model %d ",
|
||||
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_pmu_init(void)
|
||||
static __init int intel_pmu_init(void)
|
||||
{
|
||||
union cpuid10_edx edx;
|
||||
union cpuid10_eax eax;
|
||||
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void)
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
pr_cont("Core2 events, ");
|
||||
event_constraints = intel_core_event_constraints;
|
||||
break;
|
||||
default:
|
||||
case 26:
|
||||
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
event_constraints = intel_nehalem_event_constraints;
|
||||
pr_cont("Nehalem/Corei7 events, ");
|
||||
break;
|
||||
case 28:
|
||||
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amd_pmu_init(void)
|
||||
static __init int amd_pmu_init(void)
|
||||
{
|
||||
/* Performance-monitoring supported from K7 and later: */
|
||||
if (boot_cpu_data.x86 < 6)
|
||||
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = {
|
||||
.unthrottle = x86_pmu_unthrottle,
|
||||
};
|
||||
|
||||
static int
|
||||
validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event fake_event = event->hw;
|
||||
|
||||
if (event->pmu && event->pmu != &pmu)
|
||||
return 0;
|
||||
|
||||
return x86_schedule_event(cpuc, &fake_event) >= 0;
|
||||
}
|
||||
|
||||
static int validate_group(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling, *leader = event->group_leader;
|
||||
struct cpu_hw_events fake_pmu;
|
||||
|
||||
memset(&fake_pmu, 0, sizeof(fake_pmu));
|
||||
|
||||
if (!validate_event(&fake_pmu, leader))
|
||||
return -ENOSPC;
|
||||
|
||||
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
|
||||
if (!validate_event(&fake_pmu, sibling))
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (!validate_event(&fake_pmu, event))
|
||||
return -ENOSPC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu *hw_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = __hw_perf_event_init(event);
|
||||
if (!err) {
|
||||
if (event->group_leader != event)
|
||||
err = validate_group(event);
|
||||
}
|
||||
if (err) {
|
||||
if (event->destroy)
|
||||
event->destroy(event);
|
||||
|
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void)
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
|
||||
boot_cpu_data.x86 != 16)
|
||||
boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
|
||||
return;
|
||||
wd_ops = &k7_wd_ops;
|
||||
break;
|
||||
|
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
|
||||
|
||||
early_init_transmeta(c);
|
||||
|
||||
display_cacheinfo(c);
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
||||
/* Print CMS and CPU revision */
|
||||
max = cpuid_eax(0x80860000);
|
||||
|
@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
unsigned int cpu;
|
||||
struct cpuinfo_x86 *c;
|
||||
int ret = 0;
|
||||
|
||||
lock_kernel();
|
||||
|
||||
cpu = iminor(file->f_path.dentry->d_inode);
|
||||
if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
|
||||
ret = -ENXIO; /* No such CPU */
|
||||
goto out;
|
||||
}
|
||||
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
|
||||
return -ENXIO; /* No such CPU */
|
||||
|
||||
c = &cpu_data(cpu);
|
||||
if (c->cpuid_level < 0)
|
||||
ret = -EIO; /* CPUID not supported */
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
return -EIO; /* CPUID not supported */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -27,8 +27,7 @@
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/virtext.h>
|
||||
#include <asm/iommu.h>
|
||||
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
|
||||
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
pci_iommu_shutdown();
|
||||
x86_platform.iommu_shutdown();
|
||||
#endif
|
||||
|
||||
crash_save_cpu(regs, safe_smp_processor_id());
|
||||
|
@@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
||||
|
||||
show_registers(regs);
|
||||
#ifdef CONFIG_X86_32
|
||||
sp = (unsigned long) (®s->sp);
|
||||
savesegment(ss, ss);
|
||||
if (user_mode(regs)) {
|
||||
if (user_mode_vm(regs)) {
|
||||
sp = regs->sp;
|
||||
ss = regs->ss & 0xffff;
|
||||
} else {
|
||||
sp = kernel_stack_pointer(regs);
|
||||
savesegment(ss, ss);
|
||||
}
|
||||
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
|
||||
print_symbol("%s", regs->ip);
|
||||
|
@@ -10,9 +10,9 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
if (!stack) {
|
||||
unsigned long dummy;
|
||||
|
||||
stack = &dummy;
|
||||
if (task && task != current)
|
||||
stack = (unsigned long *)task->thread.sp;
|
||||
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
context = (struct thread_info *)
|
||||
((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
||||
bp = print_context_stack(context, stack, bp, ops,
|
||||
data, NULL, &graph);
|
||||
bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph);
|
||||
|
||||
stack = (unsigned long *)context->previous_esp;
|
||||
if (!stack)
|
||||
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace);
|
||||
|
||||
void
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
unsigned long *stack;
|
||||
int i;
|
||||
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip)
|
||||
|
||||
return ud2 == 0x0b0f;
|
||||
}
|
||||
|
||||
|
@@ -10,26 +10,28 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
#include "dumpstack.h"
|
||||
|
||||
#define N_EXCEPTION_STACKS_END \
|
||||
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
|
||||
|
||||
static char x86_stack_ids[][8] = {
|
||||
[DEBUG_STACK - 1] = "#DB",
|
||||
[NMI_STACK - 1] = "NMI",
|
||||
[DOUBLEFAULT_STACK - 1] = "#DF",
|
||||
[STACKFAULT_STACK - 1] = "#SS",
|
||||
[MCE_STACK - 1] = "#MC",
|
||||
[ DEBUG_STACK-1 ] = "#DB",
|
||||
[ NMI_STACK-1 ] = "NMI",
|
||||
[ DOUBLEFAULT_STACK-1 ] = "#DF",
|
||||
[ STACKFAULT_STACK-1 ] = "#SS",
|
||||
[ MCE_STACK-1 ] = "#MC",
|
||||
#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
||||
[N_EXCEPTION_STACKS ...
|
||||
N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
|
||||
[ N_EXCEPTION_STACKS ...
|
||||
N_EXCEPTION_STACKS_END ] = "#DB[?]"
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
int x86_is_stack_id(int id, char *name)
|
||||
{
|
||||
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name)
|
||||
}
|
||||
|
||||
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
||||
unsigned *usedp, char **idp)
|
||||
unsigned *usedp, char **idp)
|
||||
{
|
||||
unsigned k;
|
||||
|
||||
@@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace);
|
||||
|
||||
void
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
unsigned long *irq_stack_end;
|
||||
unsigned long *irq_stack;
|
||||
unsigned long *stack;
|
||||
int cpu;
|
||||
int i;
|
||||
const int cpu = smp_processor_id();
|
||||
unsigned long *irq_stack_end =
|
||||
(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
|
||||
unsigned long *irq_stack =
|
||||
(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
|
||||
irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
|
||||
|
||||
/*
|
||||
* debugging aid: "show_stack(NULL, NULL);" prints the
|
||||
* back trace for this cpu.
|
||||
* Debugging aid: "show_stack(NULL, NULL);" prints the
|
||||
* back trace for this cpu:
|
||||
*/
|
||||
|
||||
if (sp == NULL) {
|
||||
if (task)
|
||||
sp = (unsigned long *)task->thread.sp;
|
||||
@@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
printk(" %016lx", *stack++);
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
printk("\n");
|
||||
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
||||
}
|
||||
@@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip)
|
||||
|
||||
return ud2 == 0x0b0f;
|
||||
}
|
||||
|
||||
|
@@ -333,6 +333,10 @@ ENTRY(ret_from_fork)
|
||||
CFI_ENDPROC
|
||||
END(ret_from_fork)
|
||||
|
||||
/*
|
||||
* Interrupt exit functions should be protected against kprobes
|
||||
*/
|
||||
.pushsection .kprobes.text, "ax"
|
||||
/*
|
||||
* Return to user mode is not as complex as all this looks,
|
||||
* but we want the default path for a system call return to
|
||||
@@ -383,6 +387,10 @@ need_resched:
|
||||
END(resume_kernel)
|
||||
#endif
|
||||
CFI_ENDPROC
|
||||
/*
|
||||
* End of kprobes section
|
||||
*/
|
||||
.popsection
|
||||
|
||||
/* SYSENTER_RETURN points to after the "sysenter" instruction in
|
||||
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
|
||||
@@ -513,6 +521,10 @@ sysexit_audit:
|
||||
PTGS_TO_GS_EX
|
||||
ENDPROC(ia32_sysenter_target)
|
||||
|
||||
/*
|
||||
* syscall stub including irq exit should be protected against kprobes
|
||||
*/
|
||||
.pushsection .kprobes.text, "ax"
|
||||
# system call handler stub
|
||||
ENTRY(system_call)
|
||||
RING0_INT_FRAME # can't unwind into user space anyway
|
||||
@@ -705,6 +717,10 @@ syscall_badsys:
|
||||
jmp resume_userspace
|
||||
END(syscall_badsys)
|
||||
CFI_ENDPROC
|
||||
/*
|
||||
* End of kprobes section
|
||||
*/
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* System calls that need a pt_regs pointer.
|
||||
@@ -814,6 +830,10 @@ common_interrupt:
|
||||
ENDPROC(common_interrupt)
|
||||
CFI_ENDPROC
|
||||
|
||||
/*
|
||||
* Irq entries should be protected against kprobes
|
||||
*/
|
||||
.pushsection .kprobes.text, "ax"
|
||||
#define BUILD_INTERRUPT3(name, nr, fn) \
|
||||
ENTRY(name) \
|
||||
RING0_INT_FRAME; \
|
||||
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
|
||||
jmp error_code
|
||||
CFI_ENDPROC
|
||||
END(spurious_interrupt_bug)
|
||||
/*
|
||||
* End of kprobes section
|
||||
*/
|
||||
.popsection
|
||||
|
||||
ENTRY(kernel_thread_helper)
|
||||
pushl $0 # fake return address for unwinder
|
||||
@@ -1185,17 +1209,14 @@ END(ftrace_graph_caller)
|
||||
|
||||
.globl return_to_handler
|
||||
return_to_handler:
|
||||
pushl $0
|
||||
pushl %eax
|
||||
pushl %ecx
|
||||
pushl %edx
|
||||
movl %ebp, %eax
|
||||
call ftrace_return_to_handler
|
||||
movl %eax, 0xc(%esp)
|
||||
movl %eax, %ecx
|
||||
popl %edx
|
||||
popl %ecx
|
||||
popl %eax
|
||||
ret
|
||||
jmp *%ecx
|
||||
#endif
|
||||
|
||||
.section .rodata,"a"
|
||||
|
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
|
||||
|
||||
call ftrace_return_to_handler
|
||||
|
||||
movq %rax, 16(%rsp)
|
||||
movq %rax, %rdi
|
||||
movq 8(%rsp), %rdx
|
||||
movq (%rsp), %rax
|
||||
addq $16, %rsp
|
||||
retq
|
||||
addq $24, %rsp
|
||||
jmp *%rdi
|
||||
#endif
|
||||
|
||||
|
||||
@@ -803,6 +803,10 @@ END(interrupt)
|
||||
call \func
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Interrupt entry/exit should be protected against kprobes
|
||||
*/
|
||||
.pushsection .kprobes.text, "ax"
|
||||
/*
|
||||
* The interrupt stubs push (~vector+0x80) onto the stack and
|
||||
* then jump to common_interrupt.
|
||||
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
|
||||
|
||||
CFI_ENDPROC
|
||||
END(common_interrupt)
|
||||
/*
|
||||
* End of kprobes section
|
||||
*/
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* APIC interrupts.
|
||||
@@ -1491,12 +1499,17 @@ error_kernelspace:
|
||||
leaq irq_return(%rip),%rcx
|
||||
cmpq %rcx,RIP+8(%rsp)
|
||||
je error_swapgs
|
||||
movl %ecx,%ecx /* zero extend */
|
||||
cmpq %rcx,RIP+8(%rsp)
|
||||
je error_swapgs
|
||||
movl %ecx,%eax /* zero extend */
|
||||
cmpq %rax,RIP+8(%rsp)
|
||||
je bstep_iret
|
||||
cmpq $gs_change,RIP+8(%rsp)
|
||||
je error_swapgs
|
||||
jmp error_sti
|
||||
|
||||
bstep_iret:
|
||||
/* Fix truncated RIP */
|
||||
movq %rcx,RIP+8(%rsp)
|
||||
jmp error_swapgs
|
||||
END(error_entry)
|
||||
|
||||
|
||||
|
@@ -9,6 +9,8 @@
|
||||
* the dangers of modifying code on the run.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data)
|
||||
|
||||
switch (faulted) {
|
||||
case 0:
|
||||
pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
|
||||
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
|
||||
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
|
||||
break;
|
||||
case 1:
|
||||
pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
|
||||
pr_info("converting mcount calls to 66 66 66 66 90\n");
|
||||
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
|
||||
break;
|
||||
case 2:
|
||||
pr_info("ftrace: converting mcount calls to jmp . + 5\n");
|
||||
pr_info("converting mcount calls to jmp . + 5\n");
|
||||
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
|
||||
break;
|
||||
}
|
||||
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
|
||||
#ifdef CONFIG_FTRACE_SYSCALLS
|
||||
|
||||
extern unsigned long __start_syscalls_metadata[];
|
||||
extern unsigned long __stop_syscalls_metadata[];
|
||||
extern unsigned long *sys_call_table;
|
||||
|
||||
static struct syscall_metadata **syscalls_metadata;
|
||||
|
||||
static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
|
||||
unsigned long __init arch_syscall_addr(int nr)
|
||||
{
|
||||
struct syscall_metadata *start;
|
||||
struct syscall_metadata *stop;
|
||||
char str[KSYM_SYMBOL_LEN];
|
||||
|
||||
|
||||
start = (struct syscall_metadata *)__start_syscalls_metadata;
|
||||
stop = (struct syscall_metadata *)__stop_syscalls_metadata;
|
||||
kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
|
||||
|
||||
for ( ; start < stop; start++) {
|
||||
if (start->name && !strcmp(start->name, str))
|
||||
return start;
|
||||
}
|
||||
return NULL;
|
||||
return (unsigned long)(&sys_call_table)[nr];
|
||||
}
|
||||
|
||||
struct syscall_metadata *syscall_nr_to_meta(int nr)
|
||||
{
|
||||
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
|
||||
return NULL;
|
||||
|
||||
return syscalls_metadata[nr];
|
||||
}
|
||||
|
||||
int syscall_name_to_nr(char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!syscalls_metadata)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < NR_syscalls; i++) {
|
||||
if (syscalls_metadata[i]) {
|
||||
if (!strcmp(syscalls_metadata[i]->name, name))
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void set_syscall_enter_id(int num, int id)
|
||||
{
|
||||
syscalls_metadata[num]->enter_id = id;
|
||||
}
|
||||
|
||||
void set_syscall_exit_id(int num, int id)
|
||||
{
|
||||
syscalls_metadata[num]->exit_id = id;
|
||||
}
|
||||
|
||||
static int __init arch_init_ftrace_syscalls(void)
|
||||
{
|
||||
int i;
|
||||
struct syscall_metadata *meta;
|
||||
unsigned long **psys_syscall_table = &sys_call_table;
|
||||
|
||||
syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
|
||||
NR_syscalls, GFP_KERNEL);
|
||||
if (!syscalls_metadata) {
|
||||
WARN_ON(1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < NR_syscalls; i++) {
|
||||
meta = find_syscall_meta(psys_syscall_table[i]);
|
||||
syscalls_metadata[i] = meta;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(arch_init_ftrace_syscalls);
|
||||
#endif
|
||||
|
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
|
||||
*/
|
||||
lgdt early_gdt_descr(%rip)
|
||||
|
||||
/* set up data segments. actually 0 would do too */
|
||||
movl $__KERNEL_DS,%eax
|
||||
/* set up data segments */
|
||||
xorl %eax,%eax
|
||||
movl %eax,%ds
|
||||
movl %eax,%ss
|
||||
movl %eax,%es
|
||||
|
555
arch/x86/kernel/hw_breakpoint.c
Normal file
555
arch/x86/kernel/hw_breakpoint.c
Normal file
@@ -0,0 +1,555 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) 2007 Alan Stern
|
||||
* Copyright (C) 2009 IBM Corporation
|
||||
* Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
|
||||
*
|
||||
* Authors: Alan Stern <stern@rowland.harvard.edu>
|
||||
* K.Prasad <prasad@linux.vnet.ibm.com>
|
||||
* Frederic Weisbecker <fweisbec@gmail.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
|
||||
* using the CPU's debug registers.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
/* Per cpu debug control register value */
|
||||
DEFINE_PER_CPU(unsigned long, cpu_dr7);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_dr7);
|
||||
|
||||
/* Per cpu debug address registers values */
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
|
||||
|
||||
/*
|
||||
* Stores the breakpoints currently in use on each breakpoint address
|
||||
* register for each cpus
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
|
||||
|
||||
|
||||
static inline unsigned long
|
||||
__encode_dr7(int drnum, unsigned int len, unsigned int type)
|
||||
{
|
||||
unsigned long bp_info;
|
||||
|
||||
bp_info = (len | type) & 0xf;
|
||||
bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
|
||||
bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
|
||||
|
||||
return bp_info;
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode the length, type, Exact, and Enable bits for a particular breakpoint
|
||||
* as stored in debug register 7.
|
||||
*/
|
||||
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
|
||||
{
|
||||
return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode the length and type bits for a particular breakpoint as
|
||||
* stored in debug register 7. Return the "enabled" status.
|
||||
*/
|
||||
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
|
||||
{
|
||||
int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
|
||||
|
||||
*len = (bp_info & 0xc) | 0x40;
|
||||
*type = (bp_info & 0x3) | 0x80;
|
||||
|
||||
return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Install a perf counter breakpoint.
|
||||
*
|
||||
* We seek a free debug address register and use it for this
|
||||
* breakpoint. Eventually we enable it in the debug control register.
|
||||
*
|
||||
* Atomic: we hold the counter->ctx->lock and we only handle variables
|
||||
* and registers local to this cpu.
|
||||
*/
|
||||
int arch_install_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned long *dr7;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
|
||||
|
||||
if (!*slot) {
|
||||
*slot = bp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
|
||||
return -EBUSY;
|
||||
|
||||
set_debugreg(info->address, i);
|
||||
__get_cpu_var(cpu_debugreg[i]) = info->address;
|
||||
|
||||
dr7 = &__get_cpu_var(cpu_dr7);
|
||||
*dr7 |= encode_dr7(i, info->len, info->type);
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Uninstall the breakpoint contained in the given counter.
|
||||
*
|
||||
* First we search the debug address register it uses and then we disable
|
||||
* it.
|
||||
*
|
||||
* Atomic: we hold the counter->ctx->lock and we only handle variables
|
||||
* and registers local to this cpu.
|
||||
*/
|
||||
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned long *dr7;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
|
||||
|
||||
if (*slot == bp) {
|
||||
*slot = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
|
||||
return;
|
||||
|
||||
dr7 = &__get_cpu_var(cpu_dr7);
|
||||
*dr7 &= ~__encode_dr7(i, info->len, info->type);
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
}
|
||||
|
||||
static int get_hbp_len(u8 hbp_len)
|
||||
{
|
||||
unsigned int len_in_bytes = 0;
|
||||
|
||||
switch (hbp_len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
len_in_bytes = 1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
len_in_bytes = 2;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
len_in_bytes = 4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
len_in_bytes = 8;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return len_in_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for virtual address in user space.
|
||||
*/
|
||||
int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
len = get_hbp_len(hbp_len);
|
||||
|
||||
return (va <= TASK_SIZE - len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for virtual address in kernel space.
|
||||
*/
|
||||
static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
|
||||
{
|
||||
unsigned int len;
|
||||
|
||||
len = get_hbp_len(hbp_len);
|
||||
|
||||
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a breakpoint's encoded address, length, and type.
|
||||
*/
|
||||
static int arch_store_info(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
/*
|
||||
* For kernel-addresses, either the address or symbol name can be
|
||||
* specified.
|
||||
*/
|
||||
if (info->name)
|
||||
info->address = (unsigned long)
|
||||
kallsyms_lookup_name(info->name);
|
||||
if (info->address)
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int arch_bp_generic_fields(int x86_len, int x86_type,
|
||||
int *gen_len, int *gen_type)
|
||||
{
|
||||
/* Len */
|
||||
switch (x86_len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
*gen_len = HW_BREAKPOINT_LEN_1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
*gen_len = HW_BREAKPOINT_LEN_2;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
*gen_len = HW_BREAKPOINT_LEN_4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
*gen_len = HW_BREAKPOINT_LEN_8;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Type */
|
||||
switch (x86_type) {
|
||||
case X86_BREAKPOINT_EXECUTE:
|
||||
*gen_type = HW_BREAKPOINT_X;
|
||||
break;
|
||||
case X86_BREAKPOINT_WRITE:
|
||||
*gen_type = HW_BREAKPOINT_W;
|
||||
break;
|
||||
case X86_BREAKPOINT_RW:
|
||||
*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int arch_build_bp_info(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
|
||||
info->address = bp->attr.bp_addr;
|
||||
|
||||
/* Len */
|
||||
switch (bp->attr.bp_len) {
|
||||
case HW_BREAKPOINT_LEN_1:
|
||||
info->len = X86_BREAKPOINT_LEN_1;
|
||||
break;
|
||||
case HW_BREAKPOINT_LEN_2:
|
||||
info->len = X86_BREAKPOINT_LEN_2;
|
||||
break;
|
||||
case HW_BREAKPOINT_LEN_4:
|
||||
info->len = X86_BREAKPOINT_LEN_4;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case HW_BREAKPOINT_LEN_8:
|
||||
info->len = X86_BREAKPOINT_LEN_8;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Type */
|
||||
switch (bp->attr.bp_type) {
|
||||
case HW_BREAKPOINT_W:
|
||||
info->type = X86_BREAKPOINT_WRITE;
|
||||
break;
|
||||
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
|
||||
info->type = X86_BREAKPOINT_RW;
|
||||
break;
|
||||
case HW_BREAKPOINT_X:
|
||||
info->type = X86_BREAKPOINT_EXECUTE;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Validate the arch-specific HW Breakpoint register settings
|
||||
*/
|
||||
int arch_validate_hwbkpt_settings(struct perf_event *bp,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned int align;
|
||||
int ret;
|
||||
|
||||
|
||||
ret = arch_build_bp_info(bp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -EINVAL;
|
||||
|
||||
if (info->type == X86_BREAKPOINT_EXECUTE)
|
||||
/*
|
||||
* Ptrace-refactoring code
|
||||
* For now, we'll allow instruction breakpoint only for user-space
|
||||
* addresses
|
||||
*/
|
||||
if ((!arch_check_va_in_userspace(info->address, info->len)) &&
|
||||
info->len != X86_BREAKPOINT_EXECUTE)
|
||||
return ret;
|
||||
|
||||
switch (info->len) {
|
||||
case X86_BREAKPOINT_LEN_1:
|
||||
align = 0;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_2:
|
||||
align = 1;
|
||||
break;
|
||||
case X86_BREAKPOINT_LEN_4:
|
||||
align = 3;
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case X86_BREAKPOINT_LEN_8:
|
||||
align = 7;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (bp->callback)
|
||||
ret = arch_store_info(bp);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* Check that the low-order bits of the address are appropriate
|
||||
* for the alignment implied by len.
|
||||
*/
|
||||
if (info->address & align)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check that the virtual address is in the proper range */
|
||||
if (tsk) {
|
||||
if (!arch_check_va_in_userspace(info->address, info->len))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
if (!arch_check_va_in_kernelspace(info->address, info->len))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump the debug register contents to the user.
|
||||
* We can't dump our per cpu values because it
|
||||
* may contain cpu wide breakpoint, something that
|
||||
* doesn't belong to the current task.
|
||||
*
|
||||
* TODO: include non-ptrace user breakpoints (perf)
|
||||
*/
|
||||
void aout_dump_debugregs(struct user *dump)
|
||||
{
|
||||
int i;
|
||||
int dr7 = 0;
|
||||
struct perf_event *bp;
|
||||
struct arch_hw_breakpoint *info;
|
||||
struct thread_struct *thread = ¤t->thread;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
bp = thread->ptrace_bps[i];
|
||||
|
||||
if (bp && !bp->attr.disabled) {
|
||||
dump->u_debugreg[i] = bp->attr.bp_addr;
|
||||
info = counter_arch_bp(bp);
|
||||
dr7 |= encode_dr7(i, info->len, info->type);
|
||||
} else {
|
||||
dump->u_debugreg[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
dump->u_debugreg[4] = 0;
|
||||
dump->u_debugreg[5] = 0;
|
||||
dump->u_debugreg[6] = current->thread.debugreg6;
|
||||
|
||||
dump->u_debugreg[7] = dr7;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(aout_dump_debugregs);
|
||||
|
||||
/*
|
||||
* Release the user breakpoints used by ptrace
|
||||
*/
|
||||
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
|
||||
{
|
||||
int i;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
unregister_hw_breakpoint(t->ptrace_bps[i]);
|
||||
t->ptrace_bps[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void hw_breakpoint_restore(void)
|
||||
{
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
|
||||
set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
|
||||
set_debugreg(current->thread.debugreg6, 6);
|
||||
set_debugreg(__get_cpu_var(cpu_dr7), 7);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
|
||||
|
||||
/*
|
||||
* Handle debug exception notifications.
|
||||
*
|
||||
* Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
|
||||
*
|
||||
* NOTIFY_DONE returned if one of the following conditions is true.
|
||||
* i) When the causative address is from user-space and the exception
|
||||
* is a valid one, i.e. not triggered as a result of lazy debug register
|
||||
* switching
|
||||
* ii) When there are more bits than trap<n> set in DR6 register (such
|
||||
* as BD, BS or BT) indicating that more than one debug condition is
|
||||
* met and requires some more action in do_debug().
|
||||
*
|
||||
* NOTIFY_STOP returned for all other cases
|
||||
*
|
||||
*/
|
||||
static int __kprobes hw_breakpoint_handler(struct die_args *args)
|
||||
{
|
||||
int i, cpu, rc = NOTIFY_STOP;
|
||||
struct perf_event *bp;
|
||||
unsigned long dr7, dr6;
|
||||
unsigned long *dr6_p;
|
||||
|
||||
/* The DR6 value is pointed by args->err */
|
||||
dr6_p = (unsigned long *)ERR_PTR(args->err);
|
||||
dr6 = *dr6_p;
|
||||
|
||||
/* Do an early return if no trap bits are set in DR6 */
|
||||
if ((dr6 & DR_TRAP_BITS) == 0)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
get_debugreg(dr7, 7);
|
||||
/* Disable breakpoints during exception handling */
|
||||
set_debugreg(0UL, 7);
|
||||
/*
|
||||
* Assert that local interrupts are disabled
|
||||
* Reset the DRn bits in the virtualized register value.
|
||||
* The ptrace trigger routine will add in whatever is needed.
|
||||
*/
|
||||
current->thread.debugreg6 &= ~DR_TRAP_BITS;
|
||||
cpu = get_cpu();
|
||||
|
||||
/* Handle all the breakpoints that were triggered */
|
||||
for (i = 0; i < HBP_NUM; ++i) {
|
||||
if (likely(!(dr6 & (DR_TRAP0 << i))))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The counter may be concurrently released but that can only
|
||||
* occur from a call_rcu() path. We can then safely fetch
|
||||
* the breakpoint, use its callback, touch its counter
|
||||
* while we are in an rcu_read_lock() path.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
bp = per_cpu(bp_per_reg[i], cpu);
|
||||
if (bp)
|
||||
rc = NOTIFY_DONE;
|
||||
/*
|
||||
* Reset the 'i'th TRAP bit in dr6 to denote completion of
|
||||
* exception handling
|
||||
*/
|
||||
(*dr6_p) &= ~(DR_TRAP0 << i);
|
||||
/*
|
||||
* bp can be NULL due to lazy debug register switching
|
||||
* or due to concurrent perf counter removing.
|
||||
*/
|
||||
if (!bp) {
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
(bp->callback)(bp, args->regs);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
if (dr6 & (~DR_TRAP_BITS))
|
||||
rc = NOTIFY_DONE;
|
||||
|
||||
set_debugreg(dr7, 7);
|
||||
put_cpu();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle debug exception notifications.
|
||||
*/
|
||||
int __kprobes hw_breakpoint_exceptions_notify(
|
||||
struct notifier_block *unused, unsigned long val, void *data)
|
||||
{
|
||||
if (val != DIE_DEBUG)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
return hw_breakpoint_handler(data);
|
||||
}
|
||||
|
||||
void hw_breakpoint_pmu_read(struct perf_event *bp)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
|
||||
seq_printf(p, " TLB shootdowns\n");
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
seq_printf(p, "%*s: ", prec, "TRM");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
|
||||
seq_printf(p, " Thermal event interrupts\n");
|
||||
# ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
seq_printf(p, "%*s: ", prec, "THR");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
||||
seq_printf(p, " Threshold APIC interrupts\n");
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
seq_printf(p, "%*s: ", prec, "MCE");
|
||||
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
||||
sum += irq_stats(cpu)->irq_call_count;
|
||||
sum += irq_stats(cpu)->irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
sum += irq_stats(cpu)->irq_thermal_count;
|
||||
# ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
sum += irq_stats(cpu)->irq_threshold_count;
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sum += per_cpu(mce_exception_count, cpu);
|
||||
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
|
||||
void fixup_irqs(void)
|
||||
{
|
||||
unsigned int irq, vector;
|
||||
static int warned;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(irq, desc) {
|
||||
int break_affinity = 0;
|
||||
int set_affinity = 1;
|
||||
const struct cpumask *affinity;
|
||||
|
||||
if (!desc)
|
||||
continue;
|
||||
if (irq == 2)
|
||||
continue;
|
||||
|
||||
/* interrupt's are disabled at this point */
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
affinity = desc->affinity;
|
||||
if (!irq_has_action(irq) ||
|
||||
cpumask_equal(affinity, cpu_online_mask)) {
|
||||
spin_unlock(&desc->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete the irq move. This cpu is going down and for
|
||||
* non intr-remapping case, we can't wait till this interrupt
|
||||
* arrives at this cpu before completing the irq move.
|
||||
*/
|
||||
irq_force_complete_move(irq);
|
||||
|
||||
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
|
||||
break_affinity = 1;
|
||||
affinity = cpu_all_mask;
|
||||
}
|
||||
|
||||
if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
|
||||
desc->chip->mask(irq);
|
||||
|
||||
if (desc->chip->set_affinity)
|
||||
desc->chip->set_affinity(irq, affinity);
|
||||
else if (!(warned++))
|
||||
set_affinity = 0;
|
||||
|
||||
if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
|
||||
desc->chip->unmask(irq);
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
if (break_affinity && set_affinity)
|
||||
printk("Broke affinity for irq %i\n", irq);
|
||||
else if (!set_affinity)
|
||||
printk("Cannot set affinity for irq %i\n", irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can remove mdelay() and then send spuriuous interrupts to
|
||||
* new cpu targets for all the irqs that were handled previously by
|
||||
* this cpu. While it works, I have seen spurious interrupt messages
|
||||
* (nothing wrong but still...).
|
||||
*
|
||||
* So for now, retain mdelay(1) and check the IRR and then send those
|
||||
* interrupts to new targets as this cpu is already offlined...
|
||||
*/
|
||||
mdelay(1);
|
||||
|
||||
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
||||
unsigned int irr;
|
||||
|
||||
if (__get_cpu_var(vector_irq)[vector] < 0)
|
||||
continue;
|
||||
|
||||
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
||||
if (irr & (1 << (vector % 32))) {
|
||||
irq = __get_cpu_var(vector_irq)[vector];
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
spin_lock(&desc->lock);
|
||||
if (desc->chip->retrigger)
|
||||
desc->chip->retrigger(irq);
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
|
||||
void fixup_irqs(void)
|
||||
{
|
||||
unsigned int irq;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(irq, desc) {
|
||||
const struct cpumask *affinity;
|
||||
|
||||
if (!desc)
|
||||
continue;
|
||||
if (irq == 2)
|
||||
continue;
|
||||
|
||||
affinity = desc->affinity;
|
||||
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
|
||||
printk("Breaking affinity for irq %i\n", irq);
|
||||
affinity = cpu_all_mask;
|
||||
}
|
||||
if (desc->chip->set_affinity)
|
||||
desc->chip->set_affinity(irq, affinity);
|
||||
else if (desc->action)
|
||||
printk_once("Cannot set affinity for irq %i\n", irq);
|
||||
}
|
||||
|
||||
#if 0
|
||||
barrier();
|
||||
/* Ingo Molnar says: "after the IO-APIC masks have been redirected
|
||||
[note the nop - the interrupt-enable boundary on x86 is two
|
||||
instructions from sti] - to flush out pending hardirqs and
|
||||
IPIs. After this point nothing is supposed to reach this CPU." */
|
||||
__asm__ __volatile__("sti; nop; cli");
|
||||
barrier();
|
||||
#else
|
||||
/* That doesn't seem sufficient. Give it 1ms. */
|
||||
local_irq_enable();
|
||||
mdelay(1);
|
||||
local_irq_disable();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
|
||||
void fixup_irqs(void)
|
||||
{
|
||||
unsigned int irq;
|
||||
static int warned;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(irq, desc) {
|
||||
int break_affinity = 0;
|
||||
int set_affinity = 1;
|
||||
const struct cpumask *affinity;
|
||||
|
||||
if (!desc)
|
||||
continue;
|
||||
if (irq == 2)
|
||||
continue;
|
||||
|
||||
/* interrupt's are disabled at this point */
|
||||
spin_lock(&desc->lock);
|
||||
|
||||
affinity = desc->affinity;
|
||||
if (!irq_has_action(irq) ||
|
||||
cpumask_equal(affinity, cpu_online_mask)) {
|
||||
spin_unlock(&desc->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
|
||||
break_affinity = 1;
|
||||
affinity = cpu_all_mask;
|
||||
}
|
||||
|
||||
if (desc->chip->mask)
|
||||
desc->chip->mask(irq);
|
||||
|
||||
if (desc->chip->set_affinity)
|
||||
desc->chip->set_affinity(irq, affinity);
|
||||
else if (!(warned++))
|
||||
set_affinity = 0;
|
||||
|
||||
if (desc->chip->unmask)
|
||||
desc->chip->unmask(irq);
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
if (break_affinity && set_affinity)
|
||||
printk("Broke affinity for irq %i\n", irq);
|
||||
else if (!set_affinity)
|
||||
printk("Cannot set affinity for irq %i\n", irq);
|
||||
}
|
||||
|
||||
/* That doesn't seem sufficient. Give it 1ms. */
|
||||
local_irq_enable();
|
||||
mdelay(1);
|
||||
local_irq_disable();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void call_softirq(void);
|
||||
|
||||
|
@@ -43,6 +43,7 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/system.h>
|
||||
|
||||
@@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
|
||||
gdb_regs[GDB_SS] = __KERNEL_DS;
|
||||
gdb_regs[GDB_FS] = 0xFFFF;
|
||||
gdb_regs[GDB_GS] = 0xFFFF;
|
||||
gdb_regs[GDB_SP] = (int)®s->sp;
|
||||
#else
|
||||
gdb_regs[GDB_R8] = regs->r8;
|
||||
gdb_regs[GDB_R9] = regs->r9;
|
||||
@@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
|
||||
gdb_regs32[GDB_PS] = regs->flags;
|
||||
gdb_regs32[GDB_CS] = regs->cs;
|
||||
gdb_regs32[GDB_SS] = regs->ss;
|
||||
gdb_regs[GDB_SP] = regs->sp;
|
||||
#endif
|
||||
gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
|
||||
"resuming...\n");
|
||||
kgdb_arch_handle_exception(args->trapnr, args->signr,
|
||||
args->err, "c", "", regs);
|
||||
/*
|
||||
* Reset the BS bit in dr6 (pointed by args->err) to
|
||||
* denote completion of processing
|
||||
*/
|
||||
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
|
||||
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
@@ -48,31 +48,22 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
void jprobe_return_end(void);
|
||||
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define stack_addr(regs) ((unsigned long *)regs->sp)
|
||||
#else
|
||||
/*
|
||||
* "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
|
||||
* don't save the ss and esp registers if the CPU is already in kernel
|
||||
* mode when it traps. So for kprobes, regs->sp and regs->ss are not
|
||||
* the [nonexistent] saved stack pointer and ss register, but rather
|
||||
* the top 8 bytes of the pre-int3 stack. So ®s->sp happens to
|
||||
* point to the top of the pre-int3 stack.
|
||||
*/
|
||||
#define stack_addr(regs) ((unsigned long *)®s->sp)
|
||||
#endif
|
||||
#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
|
||||
|
||||
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
|
||||
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
|
||||
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
|
||||
/* ----------------------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
static const u32 onebyte_has_modrm[256 / 32] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
/* ----------------------------------------------- */
|
||||
W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
|
||||
W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
|
||||
W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
|
||||
W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
|
||||
W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
|
||||
W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
|
||||
W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
|
||||
W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
|
||||
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
|
||||
W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
|
||||
W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
|
||||
W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
|
||||
W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
|
||||
W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
|
||||
W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
|
||||
W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
|
||||
/* ----------------------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
static const u32 twobyte_has_modrm[256 / 32] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
/* ----------------------------------------------- */
|
||||
W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
|
||||
W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
|
||||
W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
|
||||
W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
|
||||
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
|
||||
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
|
||||
W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
|
||||
W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
|
||||
W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
|
||||
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
|
||||
W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
|
||||
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
|
||||
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
|
||||
W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
|
||||
W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
|
||||
W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
|
||||
/* ----------------------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
#undef W
|
||||
|
||||
struct kretprobe_blackpoint kretprobe_blacklist[] = {
|
||||
@@ -244,6 +191,75 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
/* Recover the probed instruction at addr for further analysis. */
|
||||
static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
kp = get_kprobe((void *)addr);
|
||||
if (!kp)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Basically, kp->ainsn.insn has an original instruction.
|
||||
* However, RIP-relative instruction can not do single-stepping
|
||||
* at different place, fix_riprel() tweaks the displacement of
|
||||
* that instruction. In that case, we can't recover the instruction
|
||||
* from the kp->ainsn.insn.
|
||||
*
|
||||
* On the other hand, kp->opcode has a copy of the first byte of
|
||||
* the probed instruction, which is overwritten by int3. And
|
||||
* the instruction at kp->addr is not modified by kprobes except
|
||||
* for the first byte, we can recover the original instruction
|
||||
* from it and kp->opcode.
|
||||
*/
|
||||
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
|
||||
buf[0] = kp->opcode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Dummy buffers for kallsyms_lookup */
|
||||
static char __dummy_buf[KSYM_NAME_LEN];
|
||||
|
||||
/* Check if paddr is at an instruction boundary */
|
||||
static int __kprobes can_probe(unsigned long paddr)
|
||||
{
|
||||
int ret;
|
||||
unsigned long addr, offset = 0;
|
||||
struct insn insn;
|
||||
kprobe_opcode_t buf[MAX_INSN_SIZE];
|
||||
|
||||
if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
|
||||
return 0;
|
||||
|
||||
/* Decode instructions */
|
||||
addr = paddr - offset;
|
||||
while (addr < paddr) {
|
||||
kernel_insn_init(&insn, (void *)addr);
|
||||
insn_get_opcode(&insn);
|
||||
|
||||
/*
|
||||
* Check if the instruction has been modified by another
|
||||
* kprobe, in which case we replace the breakpoint by the
|
||||
* original instruction in our buffer.
|
||||
*/
|
||||
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
|
||||
ret = recover_probed_instruction(buf, addr);
|
||||
if (ret)
|
||||
/*
|
||||
* Another debugging subsystem might insert
|
||||
* this breakpoint. In that case, we can't
|
||||
* recover it.
|
||||
*/
|
||||
return 0;
|
||||
kernel_insn_init(&insn, buf);
|
||||
}
|
||||
insn_get_length(&insn);
|
||||
addr += insn.length;
|
||||
}
|
||||
|
||||
return (addr == paddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns non-zero if opcode modifies the interrupt flag.
|
||||
*/
|
||||
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
|
||||
static void __kprobes fix_riprel(struct kprobe *p)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
u8 *insn = p->ainsn.insn;
|
||||
s64 disp;
|
||||
int need_modrm;
|
||||
struct insn insn;
|
||||
kernel_insn_init(&insn, p->ainsn.insn);
|
||||
|
||||
/* Skip legacy instruction prefixes. */
|
||||
while (1) {
|
||||
switch (*insn) {
|
||||
case 0x66:
|
||||
case 0x67:
|
||||
case 0x2e:
|
||||
case 0x3e:
|
||||
case 0x26:
|
||||
case 0x64:
|
||||
case 0x65:
|
||||
case 0x36:
|
||||
case 0xf0:
|
||||
case 0xf3:
|
||||
case 0xf2:
|
||||
++insn;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Skip REX instruction prefix. */
|
||||
if (is_REX_prefix(insn))
|
||||
++insn;
|
||||
|
||||
if (*insn == 0x0f) {
|
||||
/* Two-byte opcode. */
|
||||
++insn;
|
||||
need_modrm = test_bit(*insn,
|
||||
(unsigned long *)twobyte_has_modrm);
|
||||
} else
|
||||
/* One-byte opcode. */
|
||||
need_modrm = test_bit(*insn,
|
||||
(unsigned long *)onebyte_has_modrm);
|
||||
|
||||
if (need_modrm) {
|
||||
u8 modrm = *++insn;
|
||||
if ((modrm & 0xc7) == 0x05) {
|
||||
/* %rip+disp32 addressing mode */
|
||||
/* Displacement follows ModRM byte. */
|
||||
++insn;
|
||||
/*
|
||||
* The copied instruction uses the %rip-relative
|
||||
* addressing mode. Adjust the displacement for the
|
||||
* difference between the original location of this
|
||||
* instruction and the location of the copy that will
|
||||
* actually be run. The tricky bit here is making sure
|
||||
* that the sign extension happens correctly in this
|
||||
* calculation, since we need a signed 32-bit result to
|
||||
* be sign-extended to 64 bits when it's added to the
|
||||
* %rip value and yield the same 64-bit result that the
|
||||
* sign-extension of the original signed 32-bit
|
||||
* displacement would have given.
|
||||
*/
|
||||
disp = (u8 *) p->addr + *((s32 *) insn) -
|
||||
(u8 *) p->ainsn.insn;
|
||||
BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
|
||||
*(s32 *)insn = (s32) disp;
|
||||
}
|
||||
if (insn_rip_relative(&insn)) {
|
||||
s64 newdisp;
|
||||
u8 *disp;
|
||||
insn_get_displacement(&insn);
|
||||
/*
|
||||
* The copied instruction uses the %rip-relative addressing
|
||||
* mode. Adjust the displacement for the difference between
|
||||
* the original location of this instruction and the location
|
||||
* of the copy that will actually be run. The tricky bit here
|
||||
* is making sure that the sign extension happens correctly in
|
||||
* this calculation, since we need a signed 32-bit result to
|
||||
* be sign-extended to 64 bits when it's added to the %rip
|
||||
* value and yield the same 64-bit result that the sign-
|
||||
* extension of the original signed 32-bit displacement would
|
||||
* have given.
|
||||
*/
|
||||
newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
|
||||
(u8 *) p->ainsn.insn;
|
||||
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
|
||||
disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
|
||||
*(s32 *) disp = (s32) newdisp;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
|
||||
|
||||
int __kprobes arch_prepare_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (!can_probe((unsigned long)p->addr))
|
||||
return -EILSEQ;
|
||||
/* insn: must be on special executable page on x86. */
|
||||
p->ainsn.insn = get_insn_slot();
|
||||
if (!p->ainsn.insn)
|
||||
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
|
||||
{
|
||||
switch (kcb->kprobe_status) {
|
||||
case KPROBE_HIT_SSDONE:
|
||||
#ifdef CONFIG_X86_64
|
||||
/* TODO: Provide re-entrancy from post_kprobes_handler() and
|
||||
* avoid exception stack corruption while single-stepping on
|
||||
* the instruction of the new probe.
|
||||
*/
|
||||
arch_disarm_kprobe(p);
|
||||
regs->ip = (unsigned long)p->addr;
|
||||
reset_current_kprobe();
|
||||
preempt_enable_no_resched();
|
||||
break;
|
||||
#endif
|
||||
case KPROBE_HIT_ACTIVE:
|
||||
save_previous_kprobe(kcb);
|
||||
set_current_kprobe(p, regs, kcb);
|
||||
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
|
||||
kcb->kprobe_status = KPROBE_REENTER;
|
||||
break;
|
||||
case KPROBE_HIT_SS:
|
||||
if (p == kprobe_running()) {
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
regs->flags |= kcb->kprobe_saved_flags;
|
||||
return 0;
|
||||
} else {
|
||||
/* A probe has been hit in the codepath leading up
|
||||
* to, or just after, single-stepping of a probed
|
||||
* instruction. This entire codepath should strictly
|
||||
* reside in .kprobes.text section. Raise a warning
|
||||
* to highlight this peculiar case.
|
||||
*/
|
||||
}
|
||||
/* A probe has been hit in the codepath leading up to, or just
|
||||
* after, single-stepping of a probed instruction. This entire
|
||||
* codepath should strictly reside in .kprobes.text section.
|
||||
* Raise a BUG or we'll continue in an endless reentering loop
|
||||
* and eventually a stack overflow.
|
||||
*/
|
||||
printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
|
||||
p->addr);
|
||||
dump_kprobe(p);
|
||||
BUG();
|
||||
default:
|
||||
/* impossible cases */
|
||||
WARN_ON(1);
|
||||
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_DEBUG:
|
||||
if (post_kprobe_handler(args->regs))
|
||||
if (post_kprobe_handler(args->regs)) {
|
||||
/*
|
||||
* Reset the BS bit in dr6 (pointed by args->err) to
|
||||
* denote completion of processing
|
||||
*/
|
||||
(*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
|
||||
ret = NOTIFY_STOP;
|
||||
}
|
||||
break;
|
||||
case DIE_GPF:
|
||||
/*
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#include <asm/desc.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
static void set_idt(void *newidt, __u16 limit)
|
||||
{
|
||||
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
|
||||
|
||||
/* Interrupts aren't acceptable while we reboot */
|
||||
local_irq_disable();
|
||||
hw_breakpoint_disable();
|
||||
|
||||
if (image->preserve_context) {
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
|
||||
unsigned long addr)
|
||||
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
|
||||
|
||||
/* Interrupts aren't acceptable while we reboot */
|
||||
local_irq_disable();
|
||||
hw_breakpoint_disable();
|
||||
|
||||
if (image->preserve_context) {
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
|
@@ -73,7 +73,6 @@
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
@@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size)
|
||||
|
||||
static int microcode_open(struct inode *unused1, struct file *unused2)
|
||||
{
|
||||
cycle_kernel_lock();
|
||||
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
|
@@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
int ret = 0;
|
||||
|
||||
lock_kernel();
|
||||
cpu = iminor(file->f_path.dentry->d_inode);
|
||||
|
||||
if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
|
||||
ret = -ENXIO; /* No such CPU */
|
||||
goto out;
|
||||
}
|
||||
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
|
||||
return -ENXIO; /* No such CPU */
|
||||
|
||||
c = &cpu_data(cpu);
|
||||
if (!cpu_has(c, X86_FEATURE_MSR))
|
||||
ret = -EIO; /* MSR not supported */
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
return -EIO; /* MSR not supported */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -46,6 +46,7 @@
|
||||
#include <asm/dma.h>
|
||||
#include <asm/rio.h>
|
||||
#include <asm/bios_ebda.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
|
||||
int use_calgary __read_mostly = 1;
|
||||
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
|
||||
if (panic_on_overflow)
|
||||
panic("Calgary: fix the allocator.\n");
|
||||
else
|
||||
return bad_dma_address;
|
||||
return DMA_ERROR_CODE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
|
||||
void *vaddr, unsigned int npages, int direction)
|
||||
{
|
||||
unsigned long entry;
|
||||
dma_addr_t ret = bad_dma_address;
|
||||
dma_addr_t ret;
|
||||
|
||||
entry = iommu_range_alloc(dev, tbl, npages);
|
||||
|
||||
if (unlikely(entry == bad_dma_address))
|
||||
goto error;
|
||||
if (unlikely(entry == DMA_ERROR_CODE)) {
|
||||
printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
|
||||
"iommu %p\n", npages, tbl);
|
||||
return DMA_ERROR_CODE;
|
||||
}
|
||||
|
||||
/* set the return dma address */
|
||||
ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
|
||||
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
|
||||
/* put the TCEs in the HW table */
|
||||
tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
|
||||
direction);
|
||||
|
||||
return ret;
|
||||
|
||||
error:
|
||||
printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
|
||||
"iommu %p\n", npages, tbl);
|
||||
return bad_dma_address;
|
||||
}
|
||||
|
||||
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
|
||||
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
|
||||
unsigned long flags;
|
||||
|
||||
/* were we called with bad_dma_address? */
|
||||
badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
|
||||
if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
|
||||
badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
|
||||
if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
|
||||
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
|
||||
"address 0x%Lx\n", dma_addr);
|
||||
return;
|
||||
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
|
||||
|
||||
pdev = to_pci_dev(dev);
|
||||
|
||||
/* search up the device tree for an iommu */
|
||||
pbus = pdev->bus;
|
||||
|
||||
/* is the device behind a bridge? Look for the root bus */
|
||||
while (pbus->parent)
|
||||
do {
|
||||
tbl = pci_iommu(pbus);
|
||||
if (tbl && tbl->it_busno == pbus->number)
|
||||
break;
|
||||
tbl = NULL;
|
||||
pbus = pbus->parent;
|
||||
|
||||
tbl = pci_iommu(pbus);
|
||||
} while (pbus);
|
||||
|
||||
BUG_ON(tbl && (tbl->it_busno != pbus->number));
|
||||
|
||||
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
|
||||
|
||||
entry = iommu_range_alloc(dev, tbl, npages);
|
||||
if (entry == bad_dma_address) {
|
||||
if (entry == DMA_ERROR_CODE) {
|
||||
/* makes sure unmap knows to stop */
|
||||
s->dma_length = 0;
|
||||
goto error;
|
||||
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
|
||||
error:
|
||||
calgary_unmap_sg(dev, sg, nelems, dir, NULL);
|
||||
for_each_sg(sg, s, nelems, i) {
|
||||
sg->dma_address = bad_dma_address;
|
||||
sg->dma_address = DMA_ERROR_CODE;
|
||||
sg->dma_length = 0;
|
||||
}
|
||||
return 0;
|
||||
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
|
||||
|
||||
/* set up tces to cover the allocated range */
|
||||
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
|
||||
if (mapping == bad_dma_address)
|
||||
if (mapping == DMA_ERROR_CODE)
|
||||
goto free;
|
||||
*dma_handle = mapping;
|
||||
return ret;
|
||||
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
|
||||
struct iommu_table *tbl = pci_iommu(dev->bus);
|
||||
|
||||
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
|
||||
iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
|
||||
iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
|
||||
|
||||
/* avoid the BIOS/VGA first 640KB-1MB region */
|
||||
/* for CalIOC2 - avoid the entire first MB */
|
||||
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void)
|
||||
return;
|
||||
}
|
||||
|
||||
static int __init calgary_iommu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* ok, we're trying to use Calgary - let's roll */
|
||||
printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
|
||||
|
||||
ret = calgary_init();
|
||||
if (ret) {
|
||||
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
|
||||
"falling back to no_iommu\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init detect_calgary(void)
|
||||
{
|
||||
int bus;
|
||||
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void)
|
||||
* if the user specified iommu=off or iommu=soft or we found
|
||||
* another HW IOMMU already, bail out.
|
||||
*/
|
||||
if (swiotlb || no_iommu || iommu_detected)
|
||||
if (no_iommu || iommu_detected)
|
||||
return;
|
||||
|
||||
if (!use_calgary)
|
||||
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void)
|
||||
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
|
||||
specified_table_size);
|
||||
|
||||
/* swiotlb for devices that aren't behind the Calgary. */
|
||||
if (max_pfn > MAX_DMA32_PFN)
|
||||
swiotlb = 1;
|
||||
x86_init.iommu.iommu_init = calgary_iommu_init;
|
||||
}
|
||||
return;
|
||||
|
||||
@@ -1457,35 +1472,6 @@ cleanup:
|
||||
}
|
||||
}
|
||||
|
||||
int __init calgary_iommu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (no_iommu || (swiotlb && !calgary_detected))
|
||||
return -ENODEV;
|
||||
|
||||
if (!calgary_detected)
|
||||
return -ENODEV;
|
||||
|
||||
/* ok, we're trying to use Calgary - let's roll */
|
||||
printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
|
||||
|
||||
ret = calgary_init();
|
||||
if (ret) {
|
||||
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
|
||||
"falling back to no_iommu\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
force_iommu = 1;
|
||||
bad_dma_address = 0x0;
|
||||
/* dma_ops is set to swiotlb or nommu */
|
||||
if (!dma_ops)
|
||||
dma_ops = &nommu_dma_ops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init calgary_parse_options(char *p)
|
||||
{
|
||||
unsigned int bridge;
|
||||
|
@@ -11,10 +11,11 @@
|
||||
#include <asm/gart.h>
|
||||
#include <asm/calgary.h>
|
||||
#include <asm/amd_iommu.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
static int forbid_dac __read_mostly;
|
||||
|
||||
struct dma_map_ops *dma_ops;
|
||||
struct dma_map_ops *dma_ops = &nommu_dma_ops;
|
||||
EXPORT_SYMBOL(dma_ops);
|
||||
|
||||
static int iommu_sac_force __read_mostly;
|
||||
@@ -42,15 +43,10 @@ int iommu_detected __read_mostly = 0;
|
||||
*/
|
||||
int iommu_pass_through __read_mostly;
|
||||
|
||||
dma_addr_t bad_dma_address __read_mostly = 0;
|
||||
EXPORT_SYMBOL(bad_dma_address);
|
||||
|
||||
/* Dummy device used for NULL arguments (normally ISA). Better would
|
||||
be probably a smaller DMA mask, but this is bug-to-bug compatible
|
||||
to older i386. */
|
||||
/* Dummy device used for NULL arguments (normally ISA). */
|
||||
struct device x86_dma_fallback_dev = {
|
||||
.init_name = "fallback device",
|
||||
.coherent_dma_mask = DMA_BIT_MASK(32),
|
||||
.coherent_dma_mask = ISA_DMA_BIT_MASK,
|
||||
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_dma_fallback_dev);
|
||||
@@ -128,20 +124,17 @@ void __init pci_iommu_alloc(void)
|
||||
/* free the range so iommu could get some range less than 4G */
|
||||
dma32_free_bootmem();
|
||||
#endif
|
||||
if (pci_swiotlb_init())
|
||||
return;
|
||||
|
||||
/*
|
||||
* The order of these functions is important for
|
||||
* fall-back/fail-over reasons
|
||||
*/
|
||||
gart_iommu_hole_init();
|
||||
|
||||
detect_calgary();
|
||||
|
||||
detect_intel_iommu();
|
||||
|
||||
/* needs to be called after gart_iommu_hole_init */
|
||||
amd_iommu_detect();
|
||||
|
||||
pci_swiotlb_init();
|
||||
}
|
||||
|
||||
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
|
||||
@@ -216,7 +209,7 @@ static __init int iommu_setup(char *p)
|
||||
if (!strncmp(p, "allowdac", 8))
|
||||
forbid_dac = 0;
|
||||
if (!strncmp(p, "nodac", 5))
|
||||
forbid_dac = -1;
|
||||
forbid_dac = 1;
|
||||
if (!strncmp(p, "usedac", 6)) {
|
||||
forbid_dac = -1;
|
||||
return 1;
|
||||
@@ -291,25 +284,17 @@ static int __init pci_iommu_init(void)
|
||||
#ifdef CONFIG_PCI
|
||||
dma_debug_add_bus(&pci_bus_type);
|
||||
#endif
|
||||
x86_init.iommu.iommu_init();
|
||||
|
||||
calgary_iommu_init();
|
||||
if (swiotlb) {
|
||||
printk(KERN_INFO "PCI-DMA: "
|
||||
"Using software bounce buffering for IO (SWIOTLB)\n");
|
||||
swiotlb_print_info();
|
||||
} else
|
||||
swiotlb_free();
|
||||
|
||||
intel_iommu_init();
|
||||
|
||||
amd_iommu_init();
|
||||
|
||||
gart_iommu_init();
|
||||
|
||||
no_iommu_init();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pci_iommu_shutdown(void)
|
||||
{
|
||||
gart_iommu_shutdown();
|
||||
|
||||
amd_iommu_shutdown();
|
||||
}
|
||||
/* Must execute after PCI subsystem */
|
||||
rootfs_initcall(pci_iommu_init);
|
||||
|
||||
|
@@ -39,6 +39,7 @@
|
||||
#include <asm/swiotlb.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/k8.h>
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
static unsigned long iommu_bus_base; /* GART remapping area (physical) */
|
||||
static unsigned long iommu_size; /* size of remapping area bytes */
|
||||
@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */
|
||||
|
||||
static u32 *iommu_gatt_base; /* Remapping table */
|
||||
|
||||
static dma_addr_t bad_dma_addr;
|
||||
|
||||
/*
|
||||
* If this is disabled the IOMMU will use an optimized flushing strategy
|
||||
* of only flushing when an mapping is reused. With it true the GART is
|
||||
@@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
|
||||
|
||||
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
|
||||
boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
|
||||
PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
spin_lock_irqsave(&iommu_bitmap_lock, flags);
|
||||
@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
|
||||
if (panic_on_overflow)
|
||||
panic("dma_map_area overflow %lu bytes\n", size);
|
||||
iommu_full(dev, size, dir);
|
||||
return bad_dma_address;
|
||||
return bad_dma_addr;
|
||||
}
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
@@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_IOMMU_DEBUG
|
||||
printk(KERN_DEBUG "dma_map_sg overflow\n");
|
||||
pr_debug("dma_map_sg overflow\n");
|
||||
#endif
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
|
||||
|
||||
if (nonforced_iommu(dev, addr, s->length)) {
|
||||
addr = dma_map_area(dev, addr, s->length, dir, 0);
|
||||
if (addr == bad_dma_address) {
|
||||
if (addr == bad_dma_addr) {
|
||||
if (i > 0)
|
||||
gart_unmap_sg(dev, sg, i, dir, NULL);
|
||||
nents = 0;
|
||||
@@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
|
||||
if (!dev)
|
||||
dev = &x86_dma_fallback_dev;
|
||||
|
||||
out = 0;
|
||||
start = 0;
|
||||
start_sg = sgmap = sg;
|
||||
seg_size = 0;
|
||||
max_seg_size = dma_get_max_seg_size(dev);
|
||||
ps = NULL; /* shut up gcc */
|
||||
out = 0;
|
||||
start = 0;
|
||||
start_sg = sg;
|
||||
sgmap = sg;
|
||||
seg_size = 0;
|
||||
max_seg_size = dma_get_max_seg_size(dev);
|
||||
ps = NULL; /* shut up gcc */
|
||||
|
||||
for_each_sg(sg, s, nents, i) {
|
||||
dma_addr_t addr = sg_phys(s);
|
||||
|
||||
@@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
|
||||
sgmap, pages, need) < 0)
|
||||
goto error;
|
||||
out++;
|
||||
seg_size = 0;
|
||||
sgmap = sg_next(sgmap);
|
||||
pages = 0;
|
||||
start = i;
|
||||
start_sg = s;
|
||||
|
||||
seg_size = 0;
|
||||
sgmap = sg_next(sgmap);
|
||||
pages = 0;
|
||||
start = i;
|
||||
start_sg = s;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -455,7 +461,7 @@ error:
|
||||
|
||||
iommu_full(dev, pages << PAGE_SHIFT, dir);
|
||||
for_each_sg(sg, s, nents, i)
|
||||
s->dma_address = bad_dma_address;
|
||||
s->dma_address = bad_dma_addr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
|
||||
DMA_BIDIRECTIONAL, align_mask);
|
||||
|
||||
flush_gart();
|
||||
if (paddr != bad_dma_address) {
|
||||
if (paddr != bad_dma_addr) {
|
||||
*dma_addr = paddr;
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
|
||||
free_pages((unsigned long)vaddr, get_order(size));
|
||||
}
|
||||
|
||||
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
return (dma_addr == bad_dma_addr);
|
||||
}
|
||||
|
||||
static int no_agp;
|
||||
|
||||
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
|
||||
@@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
|
||||
iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
|
||||
|
||||
if (iommu_size < 64*1024*1024) {
|
||||
printk(KERN_WARNING
|
||||
pr_warning(
|
||||
"PCI-DMA: Warning: Small IOMMU %luMB."
|
||||
" Consider increasing the AGP aperture in BIOS\n",
|
||||
iommu_size >> 20);
|
||||
@@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
|
||||
aperture_alloc = aper_alloc;
|
||||
}
|
||||
|
||||
static void gart_fixup_northbridges(struct sys_device *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!fix_up_north_bridges)
|
||||
return;
|
||||
|
||||
pr_info("PCI-DMA: Restoring GART aperture settings\n");
|
||||
|
||||
for (i = 0; i < num_k8_northbridges; i++) {
|
||||
struct pci_dev *dev = k8_northbridges[i];
|
||||
|
||||
/*
|
||||
* Don't enable translations just yet. That is the next
|
||||
* step. Restore the pre-suspend aperture settings.
|
||||
*/
|
||||
pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1);
|
||||
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
|
||||
}
|
||||
}
|
||||
|
||||
static int gart_resume(struct sys_device *dev)
|
||||
{
|
||||
printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
|
||||
pr_info("PCI-DMA: Resuming GART IOMMU\n");
|
||||
|
||||
if (fix_up_north_bridges) {
|
||||
int i;
|
||||
|
||||
printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
|
||||
|
||||
for (i = 0; i < num_k8_northbridges; i++) {
|
||||
struct pci_dev *dev = k8_northbridges[i];
|
||||
|
||||
/*
|
||||
* Don't enable translations just yet. That is the next
|
||||
* step. Restore the pre-suspend aperture settings.
|
||||
*/
|
||||
pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
|
||||
aperture_order << 1);
|
||||
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
|
||||
aperture_alloc >> 25);
|
||||
}
|
||||
}
|
||||
gart_fixup_northbridges(dev);
|
||||
|
||||
enable_gart_translations();
|
||||
|
||||
@@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state)
|
||||
}
|
||||
|
||||
static struct sysdev_class gart_sysdev_class = {
|
||||
.name = "gart",
|
||||
.suspend = gart_suspend,
|
||||
.resume = gart_resume,
|
||||
.name = "gart",
|
||||
.suspend = gart_suspend,
|
||||
.resume = gart_resume,
|
||||
|
||||
};
|
||||
|
||||
static struct sys_device device_gart = {
|
||||
.id = 0,
|
||||
.cls = &gart_sysdev_class,
|
||||
.cls = &gart_sysdev_class,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
|
||||
void *gatt;
|
||||
int i, error;
|
||||
|
||||
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
|
||||
pr_info("PCI-DMA: Disabling AGP.\n");
|
||||
|
||||
aper_size = aper_base = info->aper_size = 0;
|
||||
dev = NULL;
|
||||
for (i = 0; i < num_k8_northbridges; i++) {
|
||||
@@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
|
||||
}
|
||||
if (!aper_base)
|
||||
goto nommu;
|
||||
|
||||
info->aper_base = aper_base;
|
||||
info->aper_size = aper_size >> 20;
|
||||
|
||||
@@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
|
||||
|
||||
flush_gart();
|
||||
|
||||
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
|
||||
pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
|
||||
aper_base, aper_size>>10);
|
||||
|
||||
return 0;
|
||||
|
||||
nommu:
|
||||
/* Should not happen anymore */
|
||||
printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
|
||||
pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
|
||||
"falling back to iommu=soft.\n");
|
||||
return -1;
|
||||
}
|
||||
@@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = {
|
||||
.unmap_page = gart_unmap_page,
|
||||
.alloc_coherent = gart_alloc_coherent,
|
||||
.free_coherent = gart_free_coherent,
|
||||
.mapping_error = gart_mapping_error,
|
||||
};
|
||||
|
||||
void gart_iommu_shutdown(void)
|
||||
static void gart_iommu_shutdown(void)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
int i;
|
||||
|
||||
if (no_agp && (dma_ops != &gart_dma_ops))
|
||||
if (no_agp)
|
||||
return;
|
||||
|
||||
for (i = 0; i < num_k8_northbridges; i++) {
|
||||
@@ -708,7 +725,7 @@ void gart_iommu_shutdown(void)
|
||||
}
|
||||
}
|
||||
|
||||
void __init gart_iommu_init(void)
|
||||
int __init gart_iommu_init(void)
|
||||
{
|
||||
struct agp_kern_info info;
|
||||
unsigned long iommu_start;
|
||||
@@ -718,7 +735,7 @@ void __init gart_iommu_init(void)
|
||||
long i;
|
||||
|
||||
if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
#ifndef CONFIG_AGP_AMD64
|
||||
no_agp = 1;
|
||||
@@ -730,35 +747,28 @@ void __init gart_iommu_init(void)
|
||||
(agp_copy_info(agp_bridge, &info) < 0);
|
||||
#endif
|
||||
|
||||
if (swiotlb)
|
||||
return;
|
||||
|
||||
/* Did we detect a different HW IOMMU? */
|
||||
if (iommu_detected && !gart_iommu_aperture)
|
||||
return;
|
||||
|
||||
if (no_iommu ||
|
||||
(!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
|
||||
!gart_iommu_aperture ||
|
||||
(no_agp && init_k8_gatt(&info) < 0)) {
|
||||
if (max_pfn > MAX_DMA32_PFN) {
|
||||
printk(KERN_WARNING "More than 4GB of memory "
|
||||
"but GART IOMMU not available.\n");
|
||||
printk(KERN_WARNING "falling back to iommu=soft.\n");
|
||||
pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
|
||||
pr_warning("falling back to iommu=soft.\n");
|
||||
}
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* need to map that range */
|
||||
aper_size = info.aper_size << 20;
|
||||
aper_base = info.aper_base;
|
||||
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
|
||||
aper_size = info.aper_size << 20;
|
||||
aper_base = info.aper_base;
|
||||
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
|
||||
|
||||
if (end_pfn > max_low_pfn_mapped) {
|
||||
start_pfn = (aper_base>>PAGE_SHIFT);
|
||||
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
|
||||
}
|
||||
|
||||
printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
|
||||
pr_info("PCI-DMA: using GART IOMMU.\n");
|
||||
iommu_size = check_iommu_size(info.aper_base, aper_size);
|
||||
iommu_pages = iommu_size >> PAGE_SHIFT;
|
||||
|
||||
@@ -773,8 +783,7 @@ void __init gart_iommu_init(void)
|
||||
|
||||
ret = dma_debug_resize_entries(iommu_pages);
|
||||
if (ret)
|
||||
printk(KERN_DEBUG
|
||||
"PCI-DMA: Cannot trace all the entries\n");
|
||||
pr_debug("PCI-DMA: Cannot trace all the entries\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -784,15 +793,14 @@ void __init gart_iommu_init(void)
|
||||
*/
|
||||
iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
|
||||
|
||||
agp_memory_reserved = iommu_size;
|
||||
printk(KERN_INFO
|
||||
"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
|
||||
pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
|
||||
iommu_size >> 20);
|
||||
|
||||
iommu_start = aper_size - iommu_size;
|
||||
iommu_bus_base = info.aper_base + iommu_start;
|
||||
bad_dma_address = iommu_bus_base;
|
||||
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
|
||||
agp_memory_reserved = iommu_size;
|
||||
iommu_start = aper_size - iommu_size;
|
||||
iommu_bus_base = info.aper_base + iommu_start;
|
||||
bad_dma_addr = iommu_bus_base;
|
||||
iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
|
||||
|
||||
/*
|
||||
* Unmap the IOMMU part of the GART. The alias of the page is
|
||||
@@ -814,7 +822,7 @@ void __init gart_iommu_init(void)
|
||||
* the pages as Not-Present:
|
||||
*/
|
||||
wbinvd();
|
||||
|
||||
|
||||
/*
|
||||
* Now all caches are flushed and we can safely enable
|
||||
* GART hardware. Doing it early leaves the possibility
|
||||
@@ -838,6 +846,10 @@ void __init gart_iommu_init(void)
|
||||
|
||||
flush_gart();
|
||||
dma_ops = &gart_dma_ops;
|
||||
x86_platform.iommu_shutdown = gart_iommu_shutdown;
|
||||
swiotlb = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init gart_parse_options(char *p)
|
||||
@@ -856,7 +868,7 @@ void __init gart_parse_options(char *p)
|
||||
#endif
|
||||
if (isdigit(*p) && get_option(&p, &arg))
|
||||
iommu_size = arg;
|
||||
if (!strncmp(p, "fullflush", 8))
|
||||
if (!strncmp(p, "fullflush", 9))
|
||||
iommu_fullflush = 1;
|
||||
if (!strncmp(p, "nofullflush", 11))
|
||||
iommu_fullflush = 0;
|
||||
|
@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
|
||||
dma_addr_t bus = page_to_phys(page) + offset;
|
||||
WARN_ON(size == 0);
|
||||
if (!check_addr("map_single", dev, bus, size))
|
||||
return bad_dma_address;
|
||||
return DMA_ERROR_CODE;
|
||||
flush_write_buffers();
|
||||
return bus;
|
||||
}
|
||||
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
|
||||
.sync_sg_for_device = nommu_sync_sg_for_device,
|
||||
.is_phys = 1,
|
||||
};
|
||||
|
||||
void __init no_iommu_init(void)
|
||||
{
|
||||
if (dma_ops)
|
||||
return;
|
||||
|
||||
force_iommu = 0; /* no HW IOMMU */
|
||||
dma_ops = &nommu_dma_ops;
|
||||
}
|
||||
|
@@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = {
|
||||
.dma_supported = NULL,
|
||||
};
|
||||
|
||||
void __init pci_swiotlb_init(void)
|
||||
/*
|
||||
* pci_swiotlb_init - initialize swiotlb if necessary
|
||||
*
|
||||
* This returns non-zero if we are forced to use swiotlb (by the boot
|
||||
* option).
|
||||
*/
|
||||
int __init pci_swiotlb_init(void)
|
||||
{
|
||||
int use_swiotlb = swiotlb | swiotlb_force;
|
||||
|
||||
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
|
||||
#ifdef CONFIG_X86_64
|
||||
if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
|
||||
if (!no_iommu && max_pfn > MAX_DMA32_PFN)
|
||||
swiotlb = 1;
|
||||
#endif
|
||||
if (swiotlb_force)
|
||||
swiotlb = 1;
|
||||
|
||||
if (swiotlb) {
|
||||
printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
|
||||
swiotlb_init();
|
||||
swiotlb_init(0);
|
||||
dma_ops = &swiotlb_dma_ops;
|
||||
}
|
||||
|
||||
return use_swiotlb;
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/random.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/syscalls.h>
|
||||
@@ -17,6 +18,7 @@
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
unsigned long idle_halt;
|
||||
EXPORT_SYMBOL(idle_halt);
|
||||
@@ -103,14 +105,7 @@ void flush_thread(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
||||
|
||||
tsk->thread.debugreg0 = 0;
|
||||
tsk->thread.debugreg1 = 0;
|
||||
tsk->thread.debugreg2 = 0;
|
||||
tsk->thread.debugreg3 = 0;
|
||||
tsk->thread.debugreg6 = 0;
|
||||
tsk->thread.debugreg7 = 0;
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
/*
|
||||
* Forget coprocessor state..
|
||||
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
else if (next->debugctlmsr != prev->debugctlmsr)
|
||||
update_debugctlmsr(next->debugctlmsr);
|
||||
|
||||
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
||||
set_debugreg(next->debugreg0, 0);
|
||||
set_debugreg(next->debugreg1, 1);
|
||||
set_debugreg(next->debugreg2, 2);
|
||||
set_debugreg(next->debugreg3, 3);
|
||||
/* no 4 and 5 */
|
||||
set_debugreg(next->debugreg6, 6);
|
||||
set_debugreg(next->debugreg7, 7);
|
||||
}
|
||||
|
||||
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
||||
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
||||
/* prev and next are different */
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#include <asm/idle.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
|
||||
@@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
ss = regs->ss & 0xffff;
|
||||
gs = get_user_gs(regs);
|
||||
} else {
|
||||
sp = (unsigned long) (®s->sp);
|
||||
sp = kernel_stack_pointer(regs);
|
||||
savesegment(ss, ss);
|
||||
savesegment(gs, gs);
|
||||
}
|
||||
@@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
|
||||
void show_regs(struct pt_regs *regs)
|
||||
{
|
||||
__show_regs(regs, 1);
|
||||
show_registers(regs);
|
||||
show_trace(NULL, regs, ®s->sp, regs->bp);
|
||||
}
|
||||
|
||||
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
|
||||
task_user_gs(p) = get_user_gs(regs);
|
||||
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
tsk = current;
|
||||
err = -ENOMEM;
|
||||
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
||||
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
|
||||
IO_BITMAP_BYTES, GFP_KERNEL);
|
||||
|
@@ -52,6 +52,7 @@
|
||||
#include <asm/idle.h>
|
||||
#include <asm/syscalls.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/debugreg.h>
|
||||
|
||||
asmlinkage extern void ret_from_fork(void);
|
||||
|
||||
@@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
|
||||
void show_regs(struct pt_regs *regs)
|
||||
{
|
||||
printk(KERN_INFO "CPU %d:", smp_processor_id());
|
||||
__show_regs(regs, 1);
|
||||
show_registers(regs);
|
||||
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
|
||||
}
|
||||
|
||||
@@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
|
||||
p->thread.fs = me->thread.fs;
|
||||
p->thread.gs = me->thread.gs;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
savesegment(gs, p->thread.gsindex);
|
||||
savesegment(fs, p->thread.fsindex);
|
||||
savesegment(es, p->thread.es);
|
||||
savesegment(ds, p->thread.ds);
|
||||
|
||||
err = -ENOMEM;
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
||||
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
||||
if (!p->thread.io_bitmap_ptr) {
|
||||
@@ -341,6 +345,7 @@ out:
|
||||
kfree(p->thread.io_bitmap_ptr);
|
||||
p->thread.io_bitmap_max = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
*/
|
||||
if (preload_fpu)
|
||||
__math_state_restore();
|
||||
|
||||
return prev_p;
|
||||
}
|
||||
|
||||
|
@@ -22,6 +22,8 @@
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -34,6 +36,7 @@
|
||||
#include <asm/prctl.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ds.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#include "tls.h"
|
||||
|
||||
@@ -49,6 +52,118 @@ enum x86_regset {
|
||||
REGSET_IOPERM32,
|
||||
};
|
||||
|
||||
struct pt_regs_offset {
|
||||
const char *name;
|
||||
int offset;
|
||||
};
|
||||
|
||||
#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
|
||||
#define REG_OFFSET_END {.name = NULL, .offset = 0}
|
||||
|
||||
static const struct pt_regs_offset regoffset_table[] = {
|
||||
#ifdef CONFIG_X86_64
|
||||
REG_OFFSET_NAME(r15),
|
||||
REG_OFFSET_NAME(r14),
|
||||
REG_OFFSET_NAME(r13),
|
||||
REG_OFFSET_NAME(r12),
|
||||
REG_OFFSET_NAME(r11),
|
||||
REG_OFFSET_NAME(r10),
|
||||
REG_OFFSET_NAME(r9),
|
||||
REG_OFFSET_NAME(r8),
|
||||
#endif
|
||||
REG_OFFSET_NAME(bx),
|
||||
REG_OFFSET_NAME(cx),
|
||||
REG_OFFSET_NAME(dx),
|
||||
REG_OFFSET_NAME(si),
|
||||
REG_OFFSET_NAME(di),
|
||||
REG_OFFSET_NAME(bp),
|
||||
REG_OFFSET_NAME(ax),
|
||||
#ifdef CONFIG_X86_32
|
||||
REG_OFFSET_NAME(ds),
|
||||
REG_OFFSET_NAME(es),
|
||||
REG_OFFSET_NAME(fs),
|
||||
REG_OFFSET_NAME(gs),
|
||||
#endif
|
||||
REG_OFFSET_NAME(orig_ax),
|
||||
REG_OFFSET_NAME(ip),
|
||||
REG_OFFSET_NAME(cs),
|
||||
REG_OFFSET_NAME(flags),
|
||||
REG_OFFSET_NAME(sp),
|
||||
REG_OFFSET_NAME(ss),
|
||||
REG_OFFSET_END,
|
||||
};
|
||||
|
||||
/**
|
||||
* regs_query_register_offset() - query register offset from its name
|
||||
* @name: the name of a register
|
||||
*
|
||||
* regs_query_register_offset() returns the offset of a register in struct
|
||||
* pt_regs from its name. If the name is invalid, this returns -EINVAL;
|
||||
*/
|
||||
int regs_query_register_offset(const char *name)
|
||||
{
|
||||
const struct pt_regs_offset *roff;
|
||||
for (roff = regoffset_table; roff->name != NULL; roff++)
|
||||
if (!strcmp(roff->name, name))
|
||||
return roff->offset;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* regs_query_register_name() - query register name from its offset
|
||||
* @offset: the offset of a register in struct pt_regs.
|
||||
*
|
||||
* regs_query_register_name() returns the name of a register from its
|
||||
* offset in struct pt_regs. If the @offset is invalid, this returns NULL;
|
||||
*/
|
||||
const char *regs_query_register_name(unsigned int offset)
|
||||
{
|
||||
const struct pt_regs_offset *roff;
|
||||
for (roff = regoffset_table; roff->name != NULL; roff++)
|
||||
if (roff->offset == offset)
|
||||
return roff->name;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const int arg_offs_table[] = {
|
||||
#ifdef CONFIG_X86_32
|
||||
[0] = offsetof(struct pt_regs, ax),
|
||||
[1] = offsetof(struct pt_regs, dx),
|
||||
[2] = offsetof(struct pt_regs, cx)
|
||||
#else /* CONFIG_X86_64 */
|
||||
[0] = offsetof(struct pt_regs, di),
|
||||
[1] = offsetof(struct pt_regs, si),
|
||||
[2] = offsetof(struct pt_regs, dx),
|
||||
[3] = offsetof(struct pt_regs, cx),
|
||||
[4] = offsetof(struct pt_regs, r8),
|
||||
[5] = offsetof(struct pt_regs, r9)
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* regs_get_argument_nth() - get Nth argument at function call
|
||||
* @regs: pt_regs which contains registers at function entry.
|
||||
* @n: argument number.
|
||||
*
|
||||
* regs_get_argument_nth() returns @n th argument of a function call.
|
||||
* Since usually the kernel stack will be changed right after function entry,
|
||||
* you must use this at function entry. If the @n th entry is NOT in the
|
||||
* kernel stack or pt_regs, this returns 0.
|
||||
*/
|
||||
unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
|
||||
{
|
||||
if (n < ARRAY_SIZE(arg_offs_table))
|
||||
return *(unsigned long *)((char *)regs + arg_offs_table[n]);
|
||||
else {
|
||||
/*
|
||||
* The typical case: arg n is on the stack.
|
||||
* (Note: stack[0] = return address, so skip it)
|
||||
*/
|
||||
n -= ARRAY_SIZE(arg_offs_table);
|
||||
return regs_get_kernel_stack_nth(regs, 1 + n);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* does not yet catch signals sent when the child dies.
|
||||
* in exit.c or in signal.c.
|
||||
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long debugreg_addr_limit(struct task_struct *task)
|
||||
{
|
||||
return TASK_SIZE - 3;
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
|
||||
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long debugreg_addr_limit(struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_tsk_thread_flag(task, TIF_IA32))
|
||||
return IA32_PAGE_OFFSET - 3;
|
||||
#endif
|
||||
return TASK_SIZE_MAX - 7;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
static unsigned long get_flags(struct task_struct *task)
|
||||
@@ -454,96 +555,236 @@ static int genregs_set(struct task_struct *target,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is trivial and will be inlined by the compiler.
|
||||
* Having it separates the implementation details of debug
|
||||
* registers from the interface details of ptrace.
|
||||
*/
|
||||
static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
|
||||
static void ptrace_triggered(struct perf_event *bp, void *data)
|
||||
{
|
||||
switch (n) {
|
||||
case 0: return child->thread.debugreg0;
|
||||
case 1: return child->thread.debugreg1;
|
||||
case 2: return child->thread.debugreg2;
|
||||
case 3: return child->thread.debugreg3;
|
||||
case 6: return child->thread.debugreg6;
|
||||
case 7: return child->thread.debugreg7;
|
||||
int i;
|
||||
struct thread_struct *thread = &(current->thread);
|
||||
|
||||
/*
|
||||
* Store in the virtual DR6 register the fact that the breakpoint
|
||||
* was hit so the thread's debugger will see it.
|
||||
*/
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
if (thread->ptrace_bps[i] == bp)
|
||||
break;
|
||||
}
|
||||
|
||||
thread->debugreg6 |= (DR_TRAP0 << i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk through every ptrace breakpoints for this thread and
|
||||
* build the dr7 value on top of their attributes.
|
||||
*
|
||||
*/
|
||||
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
|
||||
{
|
||||
int i;
|
||||
int dr7 = 0;
|
||||
struct arch_hw_breakpoint *info;
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
if (bp[i] && !bp[i]->attr.disabled) {
|
||||
info = counter_arch_bp(bp[i]);
|
||||
dr7 |= encode_dr7(i, info->len, info->type);
|
||||
}
|
||||
}
|
||||
|
||||
return dr7;
|
||||
}
|
||||
|
||||
static struct perf_event *
|
||||
ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
|
||||
struct task_struct *tsk, int disabled)
|
||||
{
|
||||
int err;
|
||||
int gen_len, gen_type;
|
||||
DEFINE_BREAKPOINT_ATTR(attr);
|
||||
|
||||
/*
|
||||
* We shoud have at least an inactive breakpoint at this
|
||||
* slot. It means the user is writing dr7 without having
|
||||
* written the address register first
|
||||
*/
|
||||
if (!bp)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
attr = bp->attr;
|
||||
attr.bp_len = gen_len;
|
||||
attr.bp_type = gen_type;
|
||||
attr.disabled = disabled;
|
||||
|
||||
return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle ptrace writes to debug register 7.
|
||||
*/
|
||||
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
|
||||
{
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
unsigned long old_dr7;
|
||||
int i, orig_ret = 0, rc = 0;
|
||||
int enabled, second_pass = 0;
|
||||
unsigned len, type;
|
||||
struct perf_event *bp;
|
||||
|
||||
data &= ~DR_CONTROL_RESERVED;
|
||||
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
|
||||
restore:
|
||||
/*
|
||||
* Loop through all the hardware breakpoints, making the
|
||||
* appropriate changes to each.
|
||||
*/
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
enabled = decode_dr7(data, i, &len, &type);
|
||||
bp = thread->ptrace_bps[i];
|
||||
|
||||
if (!enabled) {
|
||||
if (bp) {
|
||||
/*
|
||||
* Don't unregister the breakpoints right-away,
|
||||
* unless all register_user_hw_breakpoint()
|
||||
* requests have succeeded. This prevents
|
||||
* any window of opportunity for debug
|
||||
* register grabbing by other users.
|
||||
*/
|
||||
if (!second_pass)
|
||||
continue;
|
||||
|
||||
thread->ptrace_bps[i] = NULL;
|
||||
bp = ptrace_modify_breakpoint(bp, len, type,
|
||||
tsk, 1);
|
||||
if (IS_ERR(bp)) {
|
||||
rc = PTR_ERR(bp);
|
||||
thread->ptrace_bps[i] = NULL;
|
||||
break;
|
||||
}
|
||||
thread->ptrace_bps[i] = bp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
|
||||
|
||||
/* Incorrect bp, or we have a bug in bp API */
|
||||
if (IS_ERR(bp)) {
|
||||
rc = PTR_ERR(bp);
|
||||
thread->ptrace_bps[i] = NULL;
|
||||
break;
|
||||
}
|
||||
thread->ptrace_bps[i] = bp;
|
||||
}
|
||||
/*
|
||||
* Make a second pass to free the remaining unused breakpoints
|
||||
* or to restore the original breakpoints if an error occurred.
|
||||
*/
|
||||
if (!second_pass) {
|
||||
second_pass = 1;
|
||||
if (rc < 0) {
|
||||
orig_ret = rc;
|
||||
data = old_dr7;
|
||||
}
|
||||
goto restore;
|
||||
}
|
||||
return ((orig_ret < 0) ? orig_ret : rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle PTRACE_PEEKUSR calls for the debug register area.
|
||||
*/
|
||||
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
|
||||
{
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
unsigned long val = 0;
|
||||
|
||||
if (n < HBP_NUM) {
|
||||
struct perf_event *bp;
|
||||
bp = thread->ptrace_bps[n];
|
||||
if (!bp)
|
||||
return 0;
|
||||
val = bp->hw.info.address;
|
||||
} else if (n == 6) {
|
||||
val = thread->debugreg6;
|
||||
} else if (n == 7) {
|
||||
val = ptrace_get_dr7(thread->ptrace_bps);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
|
||||
unsigned long addr)
|
||||
{
|
||||
struct perf_event *bp;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
DEFINE_BREAKPOINT_ATTR(attr);
|
||||
|
||||
if (!t->ptrace_bps[nr]) {
|
||||
/*
|
||||
* Put stub len and type to register (reserve) an inactive but
|
||||
* correct bp
|
||||
*/
|
||||
attr.bp_addr = addr;
|
||||
attr.bp_len = HW_BREAKPOINT_LEN_1;
|
||||
attr.bp_type = HW_BREAKPOINT_W;
|
||||
attr.disabled = 1;
|
||||
|
||||
bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
|
||||
} else {
|
||||
bp = t->ptrace_bps[nr];
|
||||
t->ptrace_bps[nr] = NULL;
|
||||
|
||||
attr = bp->attr;
|
||||
attr.bp_addr = addr;
|
||||
bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
|
||||
}
|
||||
/*
|
||||
* CHECKME: the previous code returned -EIO if the addr wasn't a
|
||||
* valid task virtual addr. The new one will return -EINVAL in this
|
||||
* case.
|
||||
* -EINVAL may be what we want for in-kernel breakpoints users, but
|
||||
* -EIO looks better for ptrace, since we refuse a register writing
|
||||
* for the user. And anyway this is the previous behaviour.
|
||||
*/
|
||||
if (IS_ERR(bp))
|
||||
return PTR_ERR(bp);
|
||||
|
||||
t->ptrace_bps[nr] = bp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptrace_set_debugreg(struct task_struct *child,
|
||||
int n, unsigned long data)
|
||||
/*
|
||||
* Handle PTRACE_POKEUSR calls for the debug register area.
|
||||
*/
|
||||
int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
|
||||
{
|
||||
int i;
|
||||
struct thread_struct *thread = &(tsk->thread);
|
||||
int rc = 0;
|
||||
|
||||
if (unlikely(n == 4 || n == 5))
|
||||
/* There are no DR4 or DR5 registers */
|
||||
if (n == 4 || n == 5)
|
||||
return -EIO;
|
||||
|
||||
if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
|
||||
return -EIO;
|
||||
|
||||
switch (n) {
|
||||
case 0: child->thread.debugreg0 = data; break;
|
||||
case 1: child->thread.debugreg1 = data; break;
|
||||
case 2: child->thread.debugreg2 = data; break;
|
||||
case 3: child->thread.debugreg3 = data; break;
|
||||
|
||||
case 6:
|
||||
if ((data & ~0xffffffffUL) != 0)
|
||||
return -EIO;
|
||||
child->thread.debugreg6 = data;
|
||||
break;
|
||||
|
||||
case 7:
|
||||
/*
|
||||
* Sanity-check data. Take one half-byte at once with
|
||||
* check = (val >> (16 + 4*i)) & 0xf. It contains the
|
||||
* R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
|
||||
* 2 and 3 are LENi. Given a list of invalid values,
|
||||
* we do mask |= 1 << invalid_value, so that
|
||||
* (mask >> check) & 1 is a correct test for invalid
|
||||
* values.
|
||||
*
|
||||
* R/Wi contains the type of the breakpoint /
|
||||
* watchpoint, LENi contains the length of the watched
|
||||
* data in the watchpoint case.
|
||||
*
|
||||
* The invalid values are:
|
||||
* - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
|
||||
* - R/Wi == 0x10 (break on I/O reads or writes), so
|
||||
* mask |= 0x4444.
|
||||
* - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
|
||||
* 0x1110.
|
||||
*
|
||||
* Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
|
||||
*
|
||||
* See the Intel Manual "System Programming Guide",
|
||||
* 15.2.4
|
||||
*
|
||||
* Note that LENi == 0x10 is defined on x86_64 in long
|
||||
* mode (i.e. even for 32-bit userspace software, but
|
||||
* 64-bit kernel), so the x86_64 mask value is 0x5454.
|
||||
* See the AMD manual no. 24593 (AMD64 System Programming)
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#define DR7_MASK 0x5f54
|
||||
#else
|
||||
#define DR7_MASK 0x5554
|
||||
#endif
|
||||
data &= ~DR_CONTROL_RESERVED;
|
||||
for (i = 0; i < 4; i++)
|
||||
if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
|
||||
return -EIO;
|
||||
child->thread.debugreg7 = data;
|
||||
if (data)
|
||||
set_tsk_thread_flag(child, TIF_DEBUG);
|
||||
else
|
||||
clear_tsk_thread_flag(child, TIF_DEBUG);
|
||||
break;
|
||||
if (n == 6) {
|
||||
thread->debugreg6 = val;
|
||||
goto ret_path;
|
||||
}
|
||||
if (n < HBP_NUM) {
|
||||
rc = ptrace_set_breakpoint_addr(tsk, n, val);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
/* All that's left is DR7 */
|
||||
if (n == 7)
|
||||
rc = ptrace_write_dr7(tsk, val);
|
||||
|
||||
return 0;
|
||||
ret_path:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -23,7 +23,7 @@
|
||||
# include <linux/ctype.h>
|
||||
# include <linux/mc146818rtc.h>
|
||||
#else
|
||||
# include <asm/iommu.h>
|
||||
# include <asm/x86_init.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -630,7 +630,7 @@ void native_machine_shutdown(void)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
pci_iommu_shutdown();
|
||||
x86_platform.iommu_shutdown();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -109,6 +109,7 @@
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/numa_64.h>
|
||||
#endif
|
||||
#include <asm/mce.h>
|
||||
|
||||
/*
|
||||
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
||||
@@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd);
|
||||
* from boot_params into a safe place.
|
||||
*
|
||||
*/
|
||||
static inline void copy_edd(void)
|
||||
static inline void __init copy_edd(void)
|
||||
{
|
||||
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
||||
sizeof(edd.mbr_signature));
|
||||
@@ -256,7 +257,7 @@ static inline void copy_edd(void)
|
||||
edd.edd_info_nr = boot_params.eddbuf_entries;
|
||||
}
|
||||
#else
|
||||
static inline void copy_edd(void)
|
||||
static inline void __init copy_edd(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
#endif
|
||||
#endif
|
||||
x86_init.oem.banner();
|
||||
|
||||
mcheck_init();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
|
||||
|
||||
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
|
||||
if (signr > 0) {
|
||||
/*
|
||||
* Re-enable any watchpoints before delivering the
|
||||
* signal to user space. The processor register will
|
||||
* have been cleared if the watchpoint triggered
|
||||
* inside the kernel.
|
||||
*/
|
||||
if (current->thread.debugreg7)
|
||||
set_debugreg(current->thread.debugreg7, 7);
|
||||
|
||||
/* Whee! Actually deliver the signal. */
|
||||
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
|
||||
/*
|
||||
|
@@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu)
|
||||
void cpu_disable_common(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
/*
|
||||
* HACK:
|
||||
* Allow any queued timer interrupts to get serviced
|
||||
* This is only a temporary solution until we cleanup
|
||||
* fixup_irqs as we do for IA64.
|
||||
*/
|
||||
local_irq_enable();
|
||||
mdelay(1);
|
||||
|
||||
local_irq_disable();
|
||||
remove_siblinginfo(cpu);
|
||||
|
||||
/* It's now safe to remove this processor from the online map */
|
||||
|
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
|
||||
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long condition;
|
||||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
get_debugreg(condition, 6);
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
if (condition & DR_STEP && kmemcheck_trap(regs))
|
||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
||||
return;
|
||||
|
||||
/* DR6 may or may not be cleared by the CPU */
|
||||
set_debugreg(0, 6);
|
||||
/*
|
||||
* The processor cleared BTF, so don't mark that we need it set.
|
||||
*/
|
||||
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
||||
tsk->thread.debugctlmsr = 0;
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
/* Store the virtualized DR6 value */
|
||||
tsk->thread.debugreg6 = dr6;
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
preempt_conditional_sti(regs);
|
||||
|
||||
/* Mask out spurious debug traps due to lazy DR7 setting */
|
||||
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
||||
if (!tsk->thread.debugreg7)
|
||||
goto clear_dr7;
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
||||
error_code, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
goto debug_vm86;
|
||||
#endif
|
||||
|
||||
/* Save debug status register where ptrace can see it */
|
||||
tsk->thread.debugreg6 = condition;
|
||||
|
||||
/*
|
||||
* Single-stepping through TF: make sure we ignore any events in
|
||||
* kernel space (but re-enable TF when returning to user mode).
|
||||
* Single-stepping through system calls: ignore any exceptions in
|
||||
* kernel space, but re-enable TF when returning to user mode.
|
||||
*
|
||||
* We already checked v86 mode above, so we can check for kernel mode
|
||||
* by just checking the CPL of CS.
|
||||
*/
|
||||
if (condition & DR_STEP) {
|
||||
if (!user_mode(regs))
|
||||
goto clear_TF_reenable;
|
||||
if ((dr6 & DR_STEP) && !user_mode(regs)) {
|
||||
tsk->thread.debugreg6 &= ~DR_STEP;
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
|
||||
si_code = get_si_code(condition);
|
||||
/* Ok, finally something we can handle */
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
|
||||
/*
|
||||
* Disable additional traps. They'll be re-enabled when
|
||||
* the signal is delivered.
|
||||
*/
|
||||
clear_dr7:
|
||||
set_debugreg(0, 7);
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
preempt_conditional_cli(regs);
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
debug_vm86:
|
||||
/* reenable preemption: handle_vm86_trap() might sleep */
|
||||
dec_preempt_count();
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
||||
conditional_cli(regs);
|
||||
return;
|
||||
#endif
|
||||
|
||||
clear_TF_reenable:
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
preempt_conditional_cli(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
|
||||
return;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
|
||||
printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
|
||||
if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
|
||||
pr_info(
|
||||
"Skipped synchronization checks as TSC is reliable.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
|
||||
smp_processor_id(), cpu);
|
||||
|
||||
/*
|
||||
* Reset it - in case this is a second bootup:
|
||||
*/
|
||||
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu)
|
||||
cpu_relax();
|
||||
|
||||
if (nr_warps) {
|
||||
printk("\n");
|
||||
pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
|
||||
smp_processor_id(), cpu);
|
||||
pr_warning("Measured %Ld cycles TSC warp between CPUs, "
|
||||
"turning off TSC clock.\n", max_warp);
|
||||
mark_tsc_unstable("check_tsc_sync_source failed");
|
||||
} else {
|
||||
printk(" passed.\n");
|
||||
pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
|
||||
smp_processor_id(), cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -9,10 +9,25 @@
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/uv/uv_irq.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
|
||||
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
|
||||
struct uv_irq_2_mmr_pnode{
|
||||
struct rb_node list;
|
||||
unsigned long offset;
|
||||
int pnode;
|
||||
int irq;
|
||||
};
|
||||
|
||||
static spinlock_t uv_irq_lock;
|
||||
static struct rb_root uv_irq_root;
|
||||
|
||||
static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
|
||||
|
||||
static void uv_noop(unsigned int irq)
|
||||
{
|
||||
@@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = {
|
||||
.unmask = uv_noop,
|
||||
.eoi = uv_ack_apic,
|
||||
.end = uv_noop,
|
||||
.set_affinity = uv_set_irq_affinity,
|
||||
};
|
||||
|
||||
/*
|
||||
* Add offset and pnode information of the hub sourcing interrupts to the
|
||||
* rb tree for a specific irq.
|
||||
*/
|
||||
static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
|
||||
{
|
||||
struct rb_node **link = &uv_irq_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct uv_irq_2_mmr_pnode *n;
|
||||
struct uv_irq_2_mmr_pnode *e;
|
||||
unsigned long irqflags;
|
||||
|
||||
n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
|
||||
uv_blade_to_memory_nid(blade));
|
||||
if (!n)
|
||||
return -ENOMEM;
|
||||
|
||||
n->irq = irq;
|
||||
n->offset = offset;
|
||||
n->pnode = uv_blade_to_pnode(blade);
|
||||
spin_lock_irqsave(&uv_irq_lock, irqflags);
|
||||
/* Find the right place in the rbtree: */
|
||||
while (*link) {
|
||||
parent = *link;
|
||||
e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
|
||||
|
||||
if (unlikely(irq == e->irq)) {
|
||||
/* irq entry exists */
|
||||
e->pnode = uv_blade_to_pnode(blade);
|
||||
e->offset = offset;
|
||||
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
|
||||
kfree(n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (irq < e->irq)
|
||||
link = &(*link)->rb_left;
|
||||
else
|
||||
link = &(*link)->rb_right;
|
||||
}
|
||||
|
||||
/* Insert the node into the rbtree. */
|
||||
rb_link_node(&n->list, parent, link);
|
||||
rb_insert_color(&n->list, &uv_irq_root);
|
||||
|
||||
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Retrieve offset and pnode information from the rb tree for a specific irq */
|
||||
int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
|
||||
{
|
||||
struct uv_irq_2_mmr_pnode *e;
|
||||
struct rb_node *n;
|
||||
unsigned long irqflags;
|
||||
|
||||
spin_lock_irqsave(&uv_irq_lock, irqflags);
|
||||
n = uv_irq_root.rb_node;
|
||||
while (n) {
|
||||
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
|
||||
|
||||
if (e->irq == irq) {
|
||||
*offset = e->offset;
|
||||
*pnode = e->pnode;
|
||||
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (irq < e->irq)
|
||||
n = n->rb_left;
|
||||
else
|
||||
n = n->rb_right;
|
||||
}
|
||||
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-target the irq to the specified CPU and enable the specified MMR located
|
||||
* on the specified blade to allow the sending of MSIs to the specified CPU.
|
||||
*/
|
||||
static int
|
||||
arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
||||
unsigned long mmr_offset, int restrict)
|
||||
{
|
||||
const struct cpumask *eligible_cpu = cpumask_of(cpu);
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irq_cfg *cfg;
|
||||
int mmr_pnode;
|
||||
unsigned long mmr_value;
|
||||
struct uv_IO_APIC_route_entry *entry;
|
||||
int err;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
|
||||
sizeof(unsigned long));
|
||||
|
||||
cfg = irq_cfg(irq);
|
||||
|
||||
err = assign_irq_vector(irq, cfg, eligible_cpu);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
if (restrict == UV_AFFINITY_CPU)
|
||||
desc->status |= IRQ_NO_BALANCING;
|
||||
else
|
||||
desc->status |= IRQ_MOVE_PCNTXT;
|
||||
|
||||
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
|
||||
irq_name);
|
||||
|
||||
mmr_value = 0;
|
||||
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
||||
entry->vector = cfg->vector;
|
||||
entry->delivery_mode = apic->irq_delivery_mode;
|
||||
entry->dest_mode = apic->irq_dest_mode;
|
||||
entry->polarity = 0;
|
||||
entry->trigger = 0;
|
||||
entry->mask = 0;
|
||||
entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
|
||||
|
||||
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
|
||||
if (cfg->move_in_progress)
|
||||
send_cleanup_vector(cfg);
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable the specified MMR located on the specified blade so that MSIs are
|
||||
* longer allowed to be sent.
|
||||
*/
|
||||
static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
|
||||
{
|
||||
unsigned long mmr_value;
|
||||
struct uv_IO_APIC_route_entry *entry;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
|
||||
sizeof(unsigned long));
|
||||
|
||||
mmr_value = 0;
|
||||
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
||||
entry->mask = 1;
|
||||
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
}
|
||||
|
||||
static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irq_cfg *cfg = desc->chip_data;
|
||||
unsigned int dest;
|
||||
unsigned long mmr_value;
|
||||
struct uv_IO_APIC_route_entry *entry;
|
||||
unsigned long mmr_offset;
|
||||
unsigned mmr_pnode;
|
||||
|
||||
dest = set_desc_affinity(desc, mask);
|
||||
if (dest == BAD_APICID)
|
||||
return -1;
|
||||
|
||||
mmr_value = 0;
|
||||
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
||||
|
||||
entry->vector = cfg->vector;
|
||||
entry->delivery_mode = apic->irq_delivery_mode;
|
||||
entry->dest_mode = apic->irq_dest_mode;
|
||||
entry->polarity = 0;
|
||||
entry->trigger = 0;
|
||||
entry->mask = 0;
|
||||
entry->dest = dest;
|
||||
|
||||
/* Get previously stored MMR and pnode of hub sourcing interrupts */
|
||||
if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
|
||||
return -1;
|
||||
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
|
||||
if (cfg->move_in_progress)
|
||||
send_cleanup_vector(cfg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up a mapping of an available irq and vector, and enable the specified
|
||||
* MMR that defines the MSI that is to be sent to the specified CPU when an
|
||||
* interrupt is raised.
|
||||
*/
|
||||
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
|
||||
unsigned long mmr_offset)
|
||||
unsigned long mmr_offset, int restrict)
|
||||
{
|
||||
int irq;
|
||||
int ret;
|
||||
int irq, ret;
|
||||
|
||||
irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
|
||||
|
||||
irq = create_irq();
|
||||
if (irq <= 0)
|
||||
return -EBUSY;
|
||||
|
||||
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
|
||||
if (ret != irq)
|
||||
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
|
||||
restrict);
|
||||
if (ret == irq)
|
||||
uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
|
||||
else
|
||||
destroy_irq(irq);
|
||||
|
||||
return ret;
|
||||
@@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
|
||||
*
|
||||
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
|
||||
*/
|
||||
void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
|
||||
void uv_teardown_irq(unsigned int irq)
|
||||
{
|
||||
arch_disable_uv_irq(mmr_blade, mmr_offset);
|
||||
struct uv_irq_2_mmr_pnode *e;
|
||||
struct rb_node *n;
|
||||
unsigned long irqflags;
|
||||
|
||||
spin_lock_irqsave(&uv_irq_lock, irqflags);
|
||||
n = uv_irq_root.rb_node;
|
||||
while (n) {
|
||||
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
|
||||
if (e->irq == irq) {
|
||||
arch_disable_uv_irq(e->pnode, e->offset);
|
||||
rb_erase(n, &uv_irq_root);
|
||||
kfree(e);
|
||||
break;
|
||||
}
|
||||
if (irq < e->irq)
|
||||
n = n->rb_left;
|
||||
else
|
||||
n = n->rb_right;
|
||||
}
|
||||
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
|
||||
destroy_irq(irq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uv_teardown_irq);
|
||||
|
@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
|
||||
return;
|
||||
}
|
||||
|
||||
apic_cpus = apic->apicid_to_cpu_present(m->apicid);
|
||||
apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
|
||||
physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
|
||||
/*
|
||||
* Validate version
|
||||
@@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq)
|
||||
}
|
||||
|
||||
static struct irq_chip cobalt_irq_type = {
|
||||
.typename = "Cobalt-APIC",
|
||||
.name = "Cobalt-APIC",
|
||||
.startup = startup_cobalt_irq,
|
||||
.shutdown = disable_cobalt_irq,
|
||||
.enable = enable_cobalt_irq,
|
||||
@@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq)
|
||||
}
|
||||
|
||||
static struct irq_chip piix4_master_irq_type = {
|
||||
.typename = "PIIX4-master",
|
||||
.name = "PIIX4-master",
|
||||
.startup = startup_piix4_master_irq,
|
||||
.ack = ack_cobalt_irq,
|
||||
.end = end_piix4_master_irq,
|
||||
@@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = {
|
||||
|
||||
|
||||
static struct irq_chip piix4_virtual_irq_type = {
|
||||
.typename = "PIIX4-virtual",
|
||||
.name = "PIIX4-virtual",
|
||||
.shutdown = disable_8259A_irq,
|
||||
.enable = enable_8259A_irq,
|
||||
.disable = disable_8259A_irq,
|
||||
|
@@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8);
|
||||
|
||||
EXPORT_SYMBOL(copy_user_generic);
|
||||
EXPORT_SYMBOL(__copy_user_nocache);
|
||||
EXPORT_SYMBOL(copy_from_user);
|
||||
EXPORT_SYMBOL(copy_to_user);
|
||||
EXPORT_SYMBOL(__copy_from_user_inatomic);
|
||||
EXPORT_SYMBOL(_copy_from_user);
|
||||
EXPORT_SYMBOL(_copy_to_user);
|
||||
|
||||
EXPORT_SYMBOL(copy_page);
|
||||
EXPORT_SYMBOL(clear_page);
|
||||
|
@@ -14,10 +14,13 @@
|
||||
#include <asm/time.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/tsc.h>
|
||||
#include <asm/iommu.h>
|
||||
|
||||
void __cpuinit x86_init_noop(void) { }
|
||||
void __init x86_init_uint_noop(unsigned int unused) { }
|
||||
void __init x86_init_pgd_noop(pgd_t *unused) { }
|
||||
int __init iommu_init_noop(void) { return 0; }
|
||||
void iommu_shutdown_noop(void) { }
|
||||
|
||||
/*
|
||||
* The platform setup functions are preset with the default functions
|
||||
@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = {
|
||||
.tsc_pre_init = x86_init_noop,
|
||||
.timer_init = hpet_time_init,
|
||||
},
|
||||
|
||||
.iommu = {
|
||||
.iommu_init = iommu_init_noop,
|
||||
},
|
||||
};
|
||||
|
||||
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
|
||||
@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = {
|
||||
.calibrate_tsc = native_calibrate_tsc,
|
||||
.get_wallclock = mach_get_cmos_time,
|
||||
.set_wallclock = mach_set_rtc_mmss,
|
||||
.iommu_shutdown = iommu_shutdown_noop,
|
||||
};
|
||||
|
Reference in New Issue
Block a user