Merge branch 'core/percpu' into percpu-cpumask-x86-for-linus-2

Conflicts:
	arch/parisc/kernel/irq.c
	arch/x86/include/asm/fixmap_64.h
	arch/x86/include/asm/setup.h
	kernel/irq/handle.c

Semantic merge:
        arch/x86/include/asm/fixmap.h

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-03-26 21:39:17 +01:00
465 changed files with 17724 additions and 17880 deletions

View File

@@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-y += setup.o i8259.o irqinit_$(BITS).o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -49,31 +50,27 @@ obj-y += step.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
obj-$(CONFIG_X86_32_SMP) += smpcommon.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_ES7000) += es7000_32.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
obj-y += vsmp_64.o
obj-$(CONFIG_X86_VSMP) += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module_$(BITS).o
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@ -114,16 +111,13 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o uv_irq.o uv_sysfs.o
obj-y += genx2apic_cluster.o
obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
endif

View File

@@ -37,15 +37,10 @@
#include <asm/pgtable.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/genapic.h>
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
# include <mach_apic.h>
#endif
static int __initdata acpi_force = 0;
u32 acpi_rsdt_forced;
#ifdef CONFIG_ACPI
@@ -56,16 +51,7 @@ int acpi_disabled = 1;
EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
#include <asm/proto.h>
#else /* X86 */
#ifdef CONFIG_X86_LOCAL_APIC
#include <mach_apic.h>
#include <mach_mpparse.h>
#endif /* CONFIG_X86_LOCAL_APIC */
# include <asm/proto.h>
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \
@@ -121,35 +107,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
*/
char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
unsigned long base, offset, mapped_size;
int idx;
if (!phys || !size)
return NULL;
if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
return __va(phys);
return early_ioremap(phys, size);
}
void __init __acpi_unmap_table(char *map, unsigned long size)
{
if (!map || !size)
return;
offset = phys & (PAGE_SIZE - 1);
mapped_size = PAGE_SIZE - offset;
clear_fixmap(FIX_ACPI_END);
set_fixmap(FIX_ACPI_END, phys);
base = fix_to_virt(FIX_ACPI_END);
/*
* Most cases can be covered by the below.
*/
idx = FIX_ACPI_END;
while (mapped_size < size) {
if (--idx < FIX_ACPI_BEGIN)
return NULL; /* cannot handle this */
phys += PAGE_SIZE;
clear_fixmap(idx);
set_fixmap(idx, phys);
mapped_size += PAGE_SIZE;
}
return ((unsigned char *)base + offset);
early_iounmap(map, size);
}
#ifdef CONFIG_PCI_MMCONFIG
@@ -239,7 +208,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
madt->address);
}
acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
default_acpi_madt_oem_check(madt->header.oem_id,
madt->header.oem_table_id);
return 0;
}
@@ -884,7 +854,7 @@ static struct {
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
static int mp_find_ioapic(int gsi)
int mp_find_ioapic(int gsi)
{
int i = 0;
@@ -899,6 +869,16 @@ static int mp_find_ioapic(int gsi)
return -1;
}
int mp_find_ioapic_pin(int ioapic, int gsi)
{
if (WARN_ON(ioapic == -1))
return -1;
if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
return -1;
return gsi - mp_ioapic_routing[ioapic].gsi_base;
}
static u8 __init uniq_ioapic_id(u8 id)
{
#ifdef CONFIG_X86_32
@@ -912,8 +892,8 @@ static u8 __init uniq_ioapic_id(u8 id)
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
for (i = 0; i < nr_ioapics; i++) {
struct mp_config_ioapic *ia = &mp_ioapics[i];
__set_bit(ia->mp_apicid, used);
struct mpc_ioapic *ia = &mp_ioapics[i];
__set_bit(ia->apicid, used);
}
if (!test_bit(id, used))
return id;
@@ -945,29 +925,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
idx = nr_ioapics;
mp_ioapics[idx].mp_type = MP_IOAPIC;
mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
mp_ioapics[idx].mp_apicaddr = address;
mp_ioapics[idx].type = MP_IOAPIC;
mp_ioapics[idx].flags = MPC_APIC_USABLE;
mp_ioapics[idx].apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
mp_ioapics[idx].apicid = uniq_ioapic_id(id);
#ifdef CONFIG_X86_32
mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
mp_ioapics[idx].apicver = io_apic_get_version(idx);
#else
mp_ioapics[idx].mp_apicver = 0;
mp_ioapics[idx].apicver = 0;
#endif
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
"GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
nr_ioapics++;
@@ -996,19 +976,19 @@ int __init acpi_probe_gsi(void)
return max_gsi + 1;
}
static void assign_to_mp_irq(struct mp_config_intsrc *m,
struct mp_config_intsrc *mp_irq)
static void assign_to_mp_irq(struct mpc_intsrc *m,
struct mpc_intsrc *mp_irq)
{
memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
}
static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
struct mp_config_intsrc *m)
static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
struct mpc_intsrc *m)
{
return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
}
static void save_mp_irq(struct mp_config_intsrc *m)
static void save_mp_irq(struct mpc_intsrc *m)
{
int i;
@@ -1026,7 +1006,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
int pin;
struct mp_config_intsrc mp_irq;
struct mpc_intsrc mp_irq;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1034,7 +1014,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
return;
pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
pin = mp_find_ioapic_pin(ioapic, gsi);
/*
* TBD: This check is for faulty timer entries, where the override
@@ -1044,13 +1024,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_irqflag = (trigger << 2) | polarity;
mp_irq.mp_srcbus = MP_ISA_BUS;
mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
mp_irq.mp_dstirq = pin; /* INTIN# */
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
mp_irq.irqflag = (trigger << 2) | polarity;
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.srcbusirq = bus_irq; /* IRQ */
mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
mp_irq.dstirq = pin; /* INTIN# */
save_mp_irq(&mp_irq);
}
@@ -1060,7 +1040,7 @@ void __init mp_config_acpi_legacy_irqs(void)
int i;
int ioapic;
unsigned int dstapic;
struct mp_config_intsrc mp_irq;
struct mpc_intsrc mp_irq;
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
/*
@@ -1085,7 +1065,7 @@ void __init mp_config_acpi_legacy_irqs(void)
ioapic = mp_find_ioapic(0);
if (ioapic < 0)
return;
dstapic = mp_ioapics[ioapic].mp_apicid;
dstapic = mp_ioapics[ioapic].apicid;
/*
* Use the default configuration for the IRQs 0-15. Unless
@@ -1095,16 +1075,14 @@ void __init mp_config_acpi_legacy_irqs(void)
int idx;
for (idx = 0; idx < mp_irq_entries; idx++) {
struct mp_config_intsrc *irq = mp_irqs + idx;
struct mpc_intsrc *irq = mp_irqs + idx;
/* Do we already have a mapping for this ISA IRQ? */
if (irq->mp_srcbus == MP_ISA_BUS
&& irq->mp_srcbusirq == i)
if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
break;
/* Do we already have a mapping for this IOAPIC pin */
if (irq->mp_dstapic == dstapic &&
irq->mp_dstirq == i)
if (irq->dstapic == dstapic && irq->dstirq == i)
break;
}
@@ -1113,13 +1091,13 @@ void __init mp_config_acpi_legacy_irqs(void)
continue; /* IRQ already used */
}
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqflag = 0; /* Conforming */
mp_irq.mp_srcbus = MP_ISA_BUS;
mp_irq.mp_dstapic = dstapic;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_srcbusirq = i; /* Identity mapped */
mp_irq.mp_dstirq = i;
mp_irq.type = MP_INTSRC;
mp_irq.irqflag = 0; /* Conforming */
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.dstapic = dstapic;
mp_irq.irqtype = mp_INT;
mp_irq.srcbusirq = i; /* Identity mapped */
mp_irq.dstirq = i;
save_mp_irq(&mp_irq);
}
@@ -1156,7 +1134,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
return gsi;
}
ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
#ifdef CONFIG_X86_32
if (ioapic_renumber_irq)
@@ -1230,22 +1208,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
u32 gsi, int triggering, int polarity)
{
#ifdef CONFIG_X86_MPPARSE
struct mp_config_intsrc mp_irq;
struct mpc_intsrc mp_irq;
int ioapic;
if (!acpi_ioapic)
return 0;
/* print the entry should happen on mptable identically */
mp_irq.mp_type = MP_INTSRC;
mp_irq.mp_irqtype = mp_INT;
mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
mp_irq.mp_srcbus = number;
mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
mp_irq.srcbus = number;
mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
ioapic = mp_find_ioapic(gsi);
mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
save_mp_irq(&mp_irq);
#endif
@@ -1372,7 +1350,7 @@ static void __init acpi_process_madt(void)
if (!error) {
acpi_lapic = 1;
#ifdef CONFIG_X86_GENERICARCH
#ifdef CONFIG_X86_BIGSMP
generic_bigsmp_probe();
#endif
/*
@@ -1384,9 +1362,8 @@ static void __init acpi_process_madt(void)
acpi_ioapic = 1;
smp_found_config = 1;
#ifdef CONFIG_X86_32
setup_apic_routing();
#endif
if (apic->setup_apic_routing)
apic->setup_apic_routing();
}
}
if (error == -EINVAL) {

View File

@@ -3,8 +3,8 @@
*/
#include <asm/segment.h>
#include <asm/msr-index.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/page_types.h>
#include <asm/pgtable_types.h>
#include <asm/processor-flags.h>
.code16

View File

@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
stack_start.sp = temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_table(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0;

View File

@@ -1,7 +1,7 @@
.section .text.page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/page_types.h>
# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2

View File

@@ -1,8 +1,8 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>

View File

@@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
*/
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
unsigned long flags;
char *vaddr;
int nr_pages = 2;
struct page *pages[2];
int i;
might_sleep();
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
nr_pages = 1;
vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
BUG_ON(!vaddr);
local_irq_save(flags);
local_irq_disable();
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
local_irq_restore(flags);
local_irq_enable();
vunmap(vaddr);
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but

View File

@@ -0,0 +1,19 @@
#
# Makefile for local APIC drivers and for the IO-APIC code
#
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
obj-y += apic_flat_64.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
endif
obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_ES7000) += es7000_32.o
obj-$(CONFIG_X86_SUMMIT) += summit_32.o

View File

@@ -1,7 +1,7 @@
/*
* Local APIC handling, local APIC timers
*
* (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
*
* Fixes
* Maciej W. Rozycki : Bits for genuine 82489DX APICs;
@@ -14,51 +14,69 @@
* Mikael Pettersson : PM converted to driver model.
*/
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
#include <linux/ioport.h>
#include <linux/cpu.h>
#include <linux/clockchips.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/dmar.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/bootmem.h>
#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/delay.h>
#include <linux/timex.h>
#include <linux/dmar.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/dmi.h>
#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <asm/atomic.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/pgalloc.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
#include <asm/i8253.h>
#include <asm/idle.h>
#include <asm/i8259.h>
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/i8259.h>
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
#include <mach_ipi.h>
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/*
* Sanity check
* The highest APIC ID seen during enumeration.
*
* This determines the messaging protocol we can use: if all APIC IDs
* are in the 0 ... 7 range, then we can use logical addressing which
* has some performance advantages (better broadcasting).
*
* If there's an APIC ID above 8, we use physical addressing.
*/
#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
# error SPURIOUS_APIC_VECTOR definition error
#endif
unsigned int max_physical_apicid;
/*
* Bitmask of physically existing CPUs:
*/
physid_mask_t phys_cpu_present_map;
/*
* Map cpu index to physical APIC ID
*/
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
/*
@@ -92,11 +110,7 @@ static __init int setup_apicpmtimer(char *s)
__setup("apicpmtimer", setup_apicpmtimer);
#endif
#ifdef CONFIG_X86_64
#define HAVE_X2APIC
#endif
#ifdef HAVE_X2APIC
#ifdef CONFIG_X86_X2APIC
int x2apic;
/* x2apic enabled before OS handover */
static int x2apic_preenabled;
@@ -194,18 +208,13 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
/*
* Paravirt kernels also might be using these below ops. So we still
* use generic apic_read()/apic_write(), which might be pointing to different
* ops in PARAVIRT case.
*/
void xapic_wait_icr_idle(void)
void native_apic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
u32 safe_xapic_wait_icr_idle(void)
u32 native_safe_apic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -221,13 +230,13 @@ u32 safe_xapic_wait_icr_idle(void)
return send_status;
}
void xapic_icr_write(u32 low, u32 id)
void native_apic_icr_write(u32 low, u32 id)
{
apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
apic_write(APIC_ICR, low);
}
static u64 xapic_icr_read(void)
u64 native_apic_icr_read(void)
{
u32 icr1, icr2;
@@ -237,54 +246,6 @@ static u64 xapic_icr_read(void)
return icr1 | ((u64)icr2 << 32);
}
static struct apic_ops xapic_ops = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = xapic_icr_read,
.icr_write = xapic_icr_write,
.wait_icr_idle = xapic_wait_icr_idle,
.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
};
struct apic_ops __read_mostly *apic_ops = &xapic_ops;
EXPORT_SYMBOL_GPL(apic_ops);
#ifdef HAVE_X2APIC
static void x2apic_wait_icr_idle(void)
{
/* no need to wait for icr idle in x2apic */
return;
}
static u32 safe_x2apic_wait_icr_idle(void)
{
/* no need to wait for icr idle in x2apic */
return 0;
}
void x2apic_icr_write(u32 low, u32 id)
{
wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}
static u64 x2apic_icr_read(void)
{
unsigned long val;
rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
return val;
}
static struct apic_ops x2apic_ops = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
.icr_read = x2apic_icr_read,
.icr_write = x2apic_icr_write,
.wait_icr_idle = x2apic_wait_icr_idle,
.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
};
#endif
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -457,7 +418,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
static void lapic_timer_broadcast(const struct cpumask *mask)
{
#ifdef CONFIG_SMP
send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
#endif
}
@@ -535,7 +496,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
}
}
static int __init calibrate_by_pmtimer(long deltapm, long *delta)
static int __init
calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
{
const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
const long pm_thresh = pm_100ms / 100;
@@ -546,7 +508,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
return -1;
#endif
apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
/* Check, if the PM timer is available */
if (!deltapm)
@@ -556,19 +518,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
if (deltapm > (pm_100ms - pm_thresh) &&
deltapm < (pm_100ms + pm_thresh)) {
apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
} else {
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
pr_warning("APIC calibration not consistent "
"with PM Timer: %ldms instead of 100ms\n",
(long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
return 0;
}
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
pr_warning("APIC calibration not consistent "
"with PM-Timer: %ldms instead of 100ms\n",(long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
do_div(res, deltapm);
pr_info("APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long)res, *delta);
*delta = (long)res;
/* Correct the tsc counter value */
if (cpu_has_tsc) {
res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
pr_info("APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long)res, *delta);
*delta = (long)res;
apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
"PM-Timer: %lu (%ld) \n",
(unsigned long)res, *deltatsc);
*deltatsc = (long)res;
}
return 0;
@@ -579,7 +552,7 @@ static int __init calibrate_APIC_clock(void)
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
void (*real_handler)(struct clock_event_device *dev);
unsigned long deltaj;
long delta;
long delta, deltatsc;
int pm_referenced = 0;
local_irq_disable();
@@ -609,9 +582,11 @@ static int __init calibrate_APIC_clock(void)
delta = lapic_cal_t1 - lapic_cal_t2;
apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
/* we trust the PM based calibration if possible */
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta);
&delta, &deltatsc);
/* Calculate the scaled math multiplication factor */
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -629,11 +604,10 @@ static int __init calibrate_APIC_clock(void)
calibration_result);
if (cpu_has_tsc) {
delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
(delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
(delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
(deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
}
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
@@ -991,11 +965,11 @@ int __init verify_local_APIC(void)
*/
reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK))
if (reg1 != (reg0 ^ apic->apic_id_mask))
return 0;
/*
@@ -1089,7 +1063,7 @@ static void __cpuinit lapic_setup_esr(void)
return;
}
if (esr_disable) {
if (apic->disable_esr) {
/*
* Something untraceable is creating bad interrupts on
* secondary quads ... for the moment, just leave the
@@ -1130,9 +1104,14 @@ void __cpuinit setup_local_APIC(void)
unsigned int value;
int i, j;
if (disable_apic) {
arch_disable_smp_support();
return;
}
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && esr_disable) {
if (lapic_is_integrated() && apic->disable_esr) {
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
@@ -1146,7 +1125,7 @@ void __cpuinit setup_local_APIC(void)
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
if (!apic_id_registered())
if (!apic->apic_id_registered())
BUG();
/*
@@ -1154,7 +1133,7 @@ void __cpuinit setup_local_APIC(void)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
init_apic_ldr();
apic->init_apic_ldr();
/*
* Set Task Priority to 'accept all'. We never change this
@@ -1282,17 +1261,12 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate();
}
#ifdef HAVE_X2APIC
#ifdef CONFIG_X86_X2APIC
void check_x2apic(void)
{
int msr, msr2;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (msr & X2APIC_ENABLE) {
if (x2apic_enabled()) {
pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
apic_ops = &x2apic_ops;
}
}
@@ -1300,6 +1274,9 @@ void enable_x2apic(void)
{
int msr, msr2;
if (!x2apic)
return;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
pr_info("Enabling x2apic\n");
@@ -1363,7 +1340,6 @@ void __init enable_IR_x2apic(void)
if (!x2apic) {
x2apic = 1;
apic_ops = &x2apic_ops;
enable_x2apic();
}
@@ -1401,7 +1377,7 @@ end:
return;
}
#endif /* HAVE_X2APIC */
#endif /* CONFIG_X86_X2APIC */
#ifdef CONFIG_X86_64
/*
@@ -1532,7 +1508,7 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
#ifdef HAVE_X2APIC
#ifdef CONFIG_X86_X2APIC
if (x2apic) {
boot_cpu_physical_apicid = read_apic_id();
return;
@@ -1570,11 +1546,11 @@ int apic_version[MAX_APICS];
int __init APIC_init_uniprocessor(void)
{
#ifdef CONFIG_X86_64
if (disable_apic) {
pr_info("Apic disabled\n");
return -1;
}
#ifdef CONFIG_X86_64
if (!cpu_has_apic) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
@@ -1596,11 +1572,9 @@ int __init APIC_init_uniprocessor(void)
}
#endif
#ifdef HAVE_X2APIC
enable_IR_x2apic();
#endif
#ifdef CONFIG_X86_64
setup_apic_routing();
default_setup_apic_routing();
#endif
verify_local_APIC();
@@ -1621,35 +1595,31 @@ int __init APIC_init_uniprocessor(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
setup_local_APIC();
#ifdef CONFIG_X86_64
#ifdef CONFIG_X86_IO_APIC
/*
* Now enable IO-APICs, actually call clear_IO_APIC
* We need clear_IO_APIC before enabling vector on BP
* We need clear_IO_APIC before enabling error vector
*/
if (!skip_ioapic_setup && nr_ioapics)
enable_IO_APIC();
#endif
#ifdef CONFIG_X86_IO_APIC
if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
#endif
localise_nmi_watchdog();
end_local_APIC_setup();
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
# ifdef CONFIG_X86_64
else
else {
nr_ioapics = 0;
# endif
localise_nmi_watchdog();
}
#else
localise_nmi_watchdog();
#endif
#ifdef CONFIG_X86_64
setup_boot_APIC_clock();
check_nmi_watchdog();
#else
setup_boot_clock();
#ifdef CONFIG_X86_64
check_nmi_watchdog();
#endif
return 0;
@@ -1738,7 +1708,8 @@ void __init connect_bsp_APIC(void)
outb(0x01, 0x23);
}
#endif
enable_apic_mode();
if (apic->enable_apic_mode)
apic->enable_apic_mode();
}
/**
@@ -1876,29 +1847,39 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
#endif
#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
/* are we being called early in kernel startup? */
if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid;
} else {
per_cpu(x86_cpu_to_apicid, cpu) = apicid;
per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
}
#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
#ifdef CONFIG_X86_64
int hard_smp_processor_id(void)
{
return read_apic_id();
}
void default_init_apic_ldr(void)
{
unsigned long val;
apic_write(APIC_DFR, APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
apic_write(APIC_LDR, val);
}
#ifdef CONFIG_X86_32
int default_apicid_to_node(int logical_apicid)
{
#ifdef CONFIG_SMP
return apicid_2_node[hard_smp_processor_id()];
#else
return 0;
#endif
}
#endif
/*
@@ -1976,7 +1957,7 @@ static int lapic_resume(struct sys_device *dev)
local_irq_save(flags);
#ifdef HAVE_X2APIC
#ifdef CONFIG_X86_X2APIC
if (x2apic)
enable_x2apic();
else

View File

@@ -17,9 +17,8 @@
#include <linux/init.h>
#include <linux/hardirq.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <mach_apicdef.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
@@ -74,7 +73,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
unsigned long flags;
local_irq_save(flags);
__send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
local_irq_restore(flags);
}
@@ -85,14 +84,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
_flat_send_IPI_mask(mask, vector);
}
static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
int vector)
static void
flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
{
unsigned long mask = cpumask_bits(cpumask)[0];
int cpu = smp_processor_id();
if (cpu < BITS_PER_LONG)
clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
@@ -114,23 +114,27 @@ static void flat_send_IPI_allbutself(int vector)
_flat_send_IPI_mask(mask, vector);
}
} else if (num_online_cpus() > 1) {
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
vector, apic->dest_logical);
}
}
static void flat_send_IPI_all(int vector)
{
if (vector == NMI_VECTOR)
if (vector == NMI_VECTOR) {
flat_send_IPI_mask(cpu_online_mask, vector);
else
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
} else {
__default_send_IPI_shortcut(APIC_DEST_ALLINC,
vector, apic->dest_logical);
}
}
static unsigned int get_apic_id(unsigned long x)
static unsigned int flat_get_apic_id(unsigned long x)
{
unsigned int id;
id = (((x)>>24) & 0xFFu);
return id;
}
@@ -146,7 +150,7 @@ static unsigned int read_xapic_id(void)
{
unsigned int id;
id = get_apic_id(apic_read(APIC_ID));
id = flat_get_apic_id(apic_read(APIC_ID));
return id;
}
@@ -169,31 +173,67 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
return mask1 & mask2;
}
static unsigned int phys_pkg_id(int index_msb)
static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
{
return hard_smp_processor_id() >> index_msb;
}
struct genapic apic_flat = {
.name = "flat",
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = flat_target_cpus,
.vector_allocation_domain = flat_vector_allocation_domain,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,
.send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFu<<24),
struct apic apic_flat = {
.name = "flat",
.probe = NULL,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
.target_cpus = flat_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
.vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = NULL,
.cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = flat_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFu << 24,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_all = flat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
/*
@@ -232,18 +272,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
{
send_IPI_mask_sequence(cpumask, vector);
default_send_IPI_mask_sequence_phys(cpumask, vector);
}
static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
int vector)
{
send_IPI_mask_allbutself(cpumask, vector);
default_send_IPI_mask_allbutself_phys(cpumask, vector);
}
static void physflat_send_IPI_allbutself(int vector)
{
send_IPI_mask_allbutself(cpu_online_mask, vector);
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
}
static void physflat_send_IPI_all(int vector)
@@ -276,32 +316,72 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
for_each_cpu_and(cpu, cpumask, andmask)
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
if (cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
return BAD_APICID;
}
struct genapic apic_physflat = {
.name = "physical flat",
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = physflat_target_cpus,
.vector_allocation_domain = physflat_vector_allocation_domain,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask,
.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFu<<24),
struct apic apic_physflat = {
.name = "physical flat",
.probe = NULL,
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
.target_cpus = physflat_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
.vector_allocation_domain = physflat_vector_allocation_domain,
/* not needed, but shouldn't hurt: */
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = NULL,
.cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = flat_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFu << 24,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
.send_IPI_mask = physflat_send_IPI_mask,
.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};

View File

@@ -0,0 +1,267 @@
/*
* APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
*
* Drives the local APIC in "clustered mode".
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/dmi.h>
#include <linux/smp.h>
#include <asm/apicdef.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <asm/ipi.h>
static unsigned bigsmp_get_apic_id(unsigned long x)
{
return (x >> 24) & 0xFF;
}
static int bigsmp_apic_id_registered(void)
{
return 1;
}
static const cpumask_t *bigsmp_target_cpus(void)
{
#ifdef CONFIG_SMP
return &cpu_online_map;
#else
return &cpumask_of_cpu(0);
#endif
}
static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return 0;
}
static unsigned long bigsmp_check_apicid_present(int bit)
{
return 1;
}
static inline unsigned long calculate_ldr(int cpu)
{
unsigned long val, id;
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
id = per_cpu(x86_bios_cpu_apicid, cpu);
val |= SET_APIC_LOGICAL_ID(id);
return val;
}
/*
* Set up the logical destination ID.
*
* Intel recommends to set DFR, LDR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
static void bigsmp_init_apic_ldr(void)
{
unsigned long val;
int cpu = smp_processor_id();
apic_write(APIC_DFR, APIC_DFR_FLAT);
val = calculate_ldr(cpu);
apic_write(APIC_LDR, val);
}
static void bigsmp_setup_apic_routing(void)
{
printk(KERN_INFO
"Enabling APIC mode: Physflat. Using %d I/O APICs\n",
nr_ioapics);
}
static int bigsmp_apicid_to_node(int logical_apicid)
{
return apicid_2_node[hard_smp_processor_id()];
}
static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
return BAD_APICID;
}
static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
{
return physid_mask_of_physid(phys_apicid);
}
/* Mapping from cpu number to logical apicid */
static inline int bigsmp_cpu_to_logical_apicid(int cpu)
{
if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_physical_id(cpu);
}
static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0xFFL);
}
static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
return 1;
}
/* As we are using single CPU as destination, pick only one CPU here */
static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
if (cpu < nr_cpu_ids)
return bigsmp_cpu_to_logical_apicid(cpu);
return BAD_APICID;
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
{
default_send_IPI_mask_sequence_phys(mask, vector);
}
static void bigsmp_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
}
static void bigsmp_send_IPI_all(int vector)
{
bigsmp_send_IPI_mask(cpu_online_mask, vector);
}
static int dmi_bigsmp; /* can be set by dmi scanners */
static int hp_ht_bigsmp(const struct dmi_system_id *d)
{
printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
dmi_bigsmp = 1;
return 0;
}
static const struct dmi_system_id bigsmp_dmi_table[] = {
{ hp_ht_bigsmp, "HP ProLiant DL760 G2",
{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
}
},
{ hp_ht_bigsmp, "HP ProLiant DL740",
{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
}
},
{ } /* NULL entry stops DMI scanning */
};
static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
{
cpus_clear(*retmask);
cpu_set(cpu, *retmask);
}
static int probe_bigsmp(void)
{
if (def_to_bigsmp)
dmi_bigsmp = 1;
else
dmi_check_system(bigsmp_dmi_table);
return dmi_bigsmp;
}
struct apic apic_bigsmp = {
.name = "bigsmp",
.probe = probe_bigsmp,
.acpi_madt_oem_check = NULL,
.apic_id_registered = bigsmp_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
/* phys delivery to target CPU: */
.irq_dest_mode = 0,
.target_cpus = bigsmp_target_cpus,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = bigsmp_check_apicid_used,
.check_apicid_present = bigsmp_check_apicid_present,
.vector_allocation_domain = bigsmp_vector_allocation_domain,
.init_apic_ldr = bigsmp_init_apic_ldr,
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.multi_timer_check = NULL,
.apicid_to_node = bigsmp_apicid_to_node,
.cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
.setup_portio_remap = NULL,
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = bigsmp_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = bigsmp_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
.cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
.send_IPI_mask = bigsmp_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = bigsmp_send_IPI_allbutself,
.send_IPI_all = bigsmp_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = default_wait_for_init_deassert,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};

View File

@@ -0,0 +1,780 @@
/*
* Written by: Garry Forsgren, Unisys Corporation
* Natalie Protasevich, Unisys Corporation
*
* This file contains the code to configure and interface
* with Unisys ES7000 series hardware system manager.
*
* Copyright (c) 2003 Unisys Corporation.
* Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
*
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston MA 02111-1307, USA.
*
* Contact information: Unisys Corporation, Township Line & Union Meeting
* Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
*
* http://www.unisys.com
*/
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
#include <linux/threads.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/io.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/ipi.h>
/*
* ES7000 chipsets
*/
#define NON_UNISYS 0
#define ES7000_CLASSIC 1
#define ES7000_ZORRO 2
#define MIP_REG 1
#define MIP_PSAI_REG 4
#define MIP_BUSY 1
#define MIP_SPIN 0xf0000
#define MIP_VALID 0x0100000000000000ULL
#define MIP_SW_APIC 0x1020b
#define MIP_PORT(val) ((val >> 32) & 0xffff)
#define MIP_RD_LO(val) (val & 0xffffffff)
struct mip_reg {
unsigned long long off_0x00;
unsigned long long off_0x08;
unsigned long long off_0x10;
unsigned long long off_0x18;
unsigned long long off_0x20;
unsigned long long off_0x28;
unsigned long long off_0x30;
unsigned long long off_0x38;
};
struct mip_reg_info {
unsigned long long mip_info;
unsigned long long delivery_info;
unsigned long long host_reg;
unsigned long long mip_reg;
};
struct psai {
unsigned long long entry_type;
unsigned long long addr;
unsigned long long bep_addr;
};
#ifdef CONFIG_ACPI
struct es7000_oem_table {
struct acpi_table_header Header;
u32 OEMTableAddr;
u32 OEMTableSize;
};
static unsigned long oem_addrX;
static unsigned long oem_size;
#endif
/*
* ES7000 Globals
*/
static volatile unsigned long *psai;
static struct mip_reg *mip_reg;
static struct mip_reg *host_reg;
static int mip_port;
static unsigned long mip_addr;
static unsigned long host_addr;
int es7000_plat;
/*
* GSI override for ES7000 platforms.
*/
static unsigned int base;
static int
es7000_rename_gsi(int ioapic, int gsi)
{
if (es7000_plat == ES7000_ZORRO)
return gsi;
if (!base) {
int i;
for (i = 0; i < nr_ioapics; i++)
base += nr_ioapic_registers[i];
}
if (!ioapic && (gsi < 16))
gsi += base;
return gsi;
}
static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
if (psai == NULL)
return -1;
vect = ((unsigned long)__pa(eip)/0x1000) << 16;
psaival = (0x1000000 | vect | cpu);
while (*psai & 0x1000000)
;
*psai = psaival;
return 0;
}
static int es7000_apic_is_cluster(void)
{
/* MPENTIUMIII */
if (boot_cpu_data.x86 == 6 &&
(boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
return 1;
return 0;
}
static void setup_unisys(void)
{
/*
* Determine the generation of the ES7000 currently running.
*
* es7000_plat = 1 if the machine is a 5xx ES7000 box
* es7000_plat = 2 if the machine is a x86_64 ES7000 box
*
*/
if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
es7000_plat = ES7000_ZORRO;
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
}
/*
* Parse the OEM Table:
*/
static int parse_unisys_oem(char *oemptr)
{
int i;
int success = 0;
unsigned char type, size;
unsigned long val;
char *tp = NULL;
struct psai *psaip = NULL;
struct mip_reg_info *mi;
struct mip_reg *host, *mip;
tp = oemptr;
tp += 8;
for (i = 0; i <= 6; i++) {
type = *tp++;
size = *tp++;
tp -= 2;
switch (type) {
case MIP_REG:
mi = (struct mip_reg_info *)tp;
val = MIP_RD_LO(mi->host_reg);
host_addr = val;
host = (struct mip_reg *)val;
host_reg = __va(host);
val = MIP_RD_LO(mi->mip_reg);
mip_port = MIP_PORT(mi->mip_info);
mip_addr = val;
mip = (struct mip_reg *)val;
mip_reg = __va(mip);
pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
(unsigned long)host_reg);
pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
(unsigned long)mip_reg);
success++;
break;
case MIP_PSAI_REG:
psaip = (struct psai *)tp;
if (tp != NULL) {
if (psaip->addr)
psai = __va(psaip->addr);
else
psai = NULL;
success++;
}
break;
default:
break;
}
tp += size;
}
if (success < 2)
es7000_plat = NON_UNISYS;
else
setup_unisys();
return es7000_plat;
}
#ifdef CONFIG_ACPI
static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
struct es7000_oem_table *table;
acpi_size tbl_size;
acpi_status ret;
int i = 0;
for (;;) {
ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
if (!ACPI_SUCCESS(ret))
return -1;
if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
break;
early_acpi_os_unmap_memory(header, tbl_size);
}
table = (void *)header;
oem_addrX = table->OEMTableAddr;
oem_size = table->OEMTableSize;
early_acpi_os_unmap_memory(header, tbl_size);
*oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
return 0;
}
static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
if (!oem_addr)
return;
__acpi_unmap_table((char *)oem_addr, oem_size);
}
static int es7000_check_dsdt(void)
{
struct acpi_table_header header;
if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
!strncmp(header.oem_id, "UNISYS", 6))
return 1;
return 0;
}
static int es7000_acpi_ret;
/* Hook from generic ACPI tables.c */
static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr = 0;
int check_dsdt;
int ret = 0;
/* check dsdt at first to avoid clear fix_map for oem_addr */
check_dsdt = es7000_check_dsdt();
if (!find_unisys_acpi_oem_table(&oem_addr)) {
if (check_dsdt) {
ret = parse_unisys_oem((char *)oem_addr);
} else {
setup_unisys();
ret = 1;
}
/*
* we need to unmap it
*/
unmap_unisys_acpi_oem_table(oem_addr);
}
es7000_acpi_ret = ret;
return ret && !es7000_apic_is_cluster();
}
static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
{
int ret = es7000_acpi_ret;
return ret && es7000_apic_is_cluster();
}
#else /* !CONFIG_ACPI: */
static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return 0;
}
static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
{
return 0;
}
#endif /* !CONFIG_ACPI */
static void es7000_spin(int n)
{
int i = 0;
while (i++ < n)
rep_nop();
}
static int es7000_mip_write(struct mip_reg *mip_reg)
{
int status = 0;
int spin;
spin = MIP_SPIN;
while ((host_reg->off_0x38 & MIP_VALID) != 0) {
if (--spin <= 0) {
WARN(1, "Timeout waiting for Host Valid Flag\n");
return -1;
}
es7000_spin(MIP_SPIN);
}
memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
outb(1, mip_port);
spin = MIP_SPIN;
while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
if (--spin <= 0) {
WARN(1, "Timeout waiting for MIP Valid Flag\n");
return -1;
}
es7000_spin(MIP_SPIN);
}
status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
mip_reg->off_0x38 &= ~MIP_VALID;
return status;
}
static void es7000_enable_apic_mode(void)
{
struct mip_reg es7000_mip_reg;
int mip_status;
if (!es7000_plat)
return;
printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
es7000_mip_reg.off_0x00 = MIP_SW_APIC;
es7000_mip_reg.off_0x38 = MIP_VALID;
while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
WARN(1, "Command failed, status = %x\n", mip_status);
}
static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
* priority interrupt delivery mode.
*
* In particular there was a hyperthreading cpu observed to
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
static void es7000_wait_for_init_deassert(atomic_t *deassert)
{
while (!atomic_read(deassert))
cpu_relax();
}
static unsigned int es7000_get_apic_id(unsigned long x)
{
return (x >> 24) & 0xFF;
}
static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
{
default_send_IPI_mask_sequence_phys(mask, vector);
}
static void es7000_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
}
static void es7000_send_IPI_all(int vector)
{
es7000_send_IPI_mask(cpu_online_mask, vector);
}
static int es7000_apic_id_registered(void)
{
return 1;
}
static const cpumask_t *target_cpus_cluster(void)
{
return &CPU_MASK_ALL;
}
static const cpumask_t *es7000_target_cpus(void)
{
return &cpumask_of_cpu(smp_processor_id());
}
static unsigned long
es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return 0;
}
static unsigned long es7000_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
}
static unsigned long calculate_ldr(int cpu)
{
unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
return SET_APIC_LOGICAL_ID(id);
}
/*
* Set up the logical destination ID.
*
* Intel recommends to set DFR, LdR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
static void es7000_init_apic_ldr_cluster(void)
{
unsigned long val;
int cpu = smp_processor_id();
apic_write(APIC_DFR, APIC_DFR_CLUSTER);
val = calculate_ldr(cpu);
apic_write(APIC_LDR, val);
}
static void es7000_init_apic_ldr(void)
{
unsigned long val;
int cpu = smp_processor_id();
apic_write(APIC_DFR, APIC_DFR_FLAT);
val = calculate_ldr(cpu);
apic_write(APIC_LDR, val);
}
static void es7000_setup_apic_routing(void)
{
int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
printk(KERN_INFO
"Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
"Physical Cluster" : "Logical Cluster",
nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
}
static int es7000_apicid_to_node(int logical_apicid)
{
return 0;
}
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
return boot_cpu_physical_apicid;
else if (mps_cpu < nr_cpu_ids)
return per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
}
static int cpu_id;
static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
{
physid_mask_t mask;
mask = physid_mask_of_physid(cpu_id);
++cpu_id;
return mask;
}
/* Mapping from cpu number to logical apicid */
static int es7000_cpu_to_logical_apicid(int cpu)
{
#ifdef CONFIG_SMP
if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_2_logical_apicid[cpu];
#else
return logical_smp_processor_id();
#endif
}
static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0xff);
}
static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
{
boot_cpu_physical_apicid = read_apic_id();
return 1;
}
static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
unsigned int round = 0;
int cpu, uninitialized_var(apicid);
/*
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
int new_apicid = es7000_cpu_to_logical_apicid(cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
return BAD_APICID;
}
apicid = new_apicid;
round++;
}
return apicid;
}
static unsigned int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
int apicid = es7000_cpu_to_logical_apicid(0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
return apicid;
cpumask_and(cpumask, inmask, andmask);
cpumask_and(cpumask, cpumask, cpu_online_mask);
apicid = es7000_cpu_mask_to_apicid(cpumask);
free_cpumask_var(cpumask);
return apicid;
}
static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
static int probe_es7000(void)
{
/* probed later in mptable/ACPI hooks */
return 0;
}
static int es7000_mps_ret;
static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
char *productid)
{
int ret = 0;
if (mpc->oemptr) {
struct mpc_oemtable *oem_table =
(struct mpc_oemtable *)mpc->oemptr;
if (!strncmp(oem, "UNISYS", 6))
ret = parse_unisys_oem((char *)oem_table);
}
es7000_mps_ret = ret;
return ret && !es7000_apic_is_cluster();
}
static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
char *productid)
{
int ret = es7000_mps_ret;
return ret && es7000_apic_is_cluster();
}
struct apic apic_es7000_cluster = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
/* logical delivery broadcast to all procs: */
.irq_dest_mode = 1,
.target_cpus = target_cpus_cluster,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = es7000_check_apicid_used,
.check_apicid_present = es7000_check_apicid_present,
.vector_allocation_domain = es7000_vector_allocation_domain,
.init_apic_ldr = es7000_init_apic_ldr_cluster,
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
.apicid_to_node = es7000_apicid_to_node,
.cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
.check_phys_apicid_present = es7000_check_phys_apicid_present,
.enable_apic_mode = es7000_enable_apic_mode,
.phys_pkg_id = es7000_phys_pkg_id,
.mps_oem_check = es7000_mps_oem_check_cluster,
.get_apic_id = es7000_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
.cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
.send_IPI_mask = es7000_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = es7000_send_IPI_allbutself,
.send_IPI_all = es7000_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
.trampoline_phys_low = 0x467,
.trampoline_phys_high = 0x469,
.wait_for_init_deassert = NULL,
/* Nothing to do for most platforms, since cleared by the INIT cycle: */
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
struct apic apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
/* phys delivery to target CPUs: */
.irq_dest_mode = 0,
.target_cpus = es7000_target_cpus,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = es7000_check_apicid_used,
.check_apicid_present = es7000_check_apicid_present,
.vector_allocation_domain = es7000_vector_allocation_domain,
.init_apic_ldr = es7000_init_apic_ldr,
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
.apicid_to_node = es7000_apicid_to_node,
.cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
.check_phys_apicid_present = es7000_check_phys_apicid_present,
.enable_apic_mode = es7000_enable_apic_mode,
.phys_pkg_id = es7000_phys_pkg_id,
.mps_oem_check = es7000_mps_oem_check,
.get_apic_id = es7000_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
.cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
.send_IPI_mask = es7000_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = es7000_send_IPI_allbutself,
.send_IPI_all = es7000_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.trampoline_phys_low = 0x467,
.trampoline_phys_high = 0x469,
.wait_for_init_deassert = es7000_wait_for_init_deassert,
/* Nothing to do for most platforms, since cleared by the INIT cycle: */
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};

File diff suppressed because it is too large Load Diff

164
arch/x86/kernel/apic/ipi.c Normal file
View File

@@ -0,0 +1,164 @@
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/apic.h>
#include <asm/proto.h>
#include <asm/ipi.h>
void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
{
unsigned long query_cpu;
unsigned long flags;
/*
* Hack. The clustered APIC addressing mode doesn't allow us to send
* to an arbitrary mask, so I do a unicast to each CPU instead.
* - mbligh
*/
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
query_cpu), vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector)
{
unsigned int this_cpu = smp_processor_id();
unsigned int query_cpu;
unsigned long flags;
/* See Hack comment above */
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
query_cpu), vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector)
{
unsigned long flags;
unsigned int query_cpu;
/*
* Hack. The clustered APIC addressing mode doesn't allow us to send
* to an arbitrary mask, so I do a unicasts to each CPU instead. This
* should be modified to do 1 message per cluster ID - mbligh
*/
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
apic->cpu_to_logical_apicid(query_cpu), vector,
apic->dest_logical);
local_irq_restore(flags);
}
void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
int vector)
{
unsigned long flags;
unsigned int query_cpu;
unsigned int this_cpu = smp_processor_id();
/* See Hack comment above */
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(
apic->cpu_to_logical_apicid(query_cpu), vector,
apic->dest_logical);
}
local_irq_restore(flags);
}
#ifdef CONFIG_X86_32
/*
* This is only used on smaller machines.
*/
void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
{
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
local_irq_restore(flags);
}
void default_send_IPI_allbutself(int vector)
{
/*
* if there are no other CPUs in the system then we get an APIC send
* error if we try to broadcast, thus avoid sending IPIs in this case.
*/
if (!(num_online_cpus() > 1))
return;
__default_local_send_IPI_allbutself(vector);
}
void default_send_IPI_all(int vector)
{
__default_local_send_IPI_all(vector);
}
void default_send_IPI_self(int vector)
{
__default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
}
/* must come after the send_IPI functions above for inlining */
static int convert_apicid_to_cpu(int apic_id)
{
int i;
for_each_possible_cpu(i) {
if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
return i;
}
return -1;
}
int safe_smp_processor_id(void)
{
int apicid, cpuid;
if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
apicid = hard_smp_processor_id();
if (apicid == BAD_APICID)
return 0;
cpuid = convert_apicid_to_cpu(apicid);
return cpuid >= 0 ? cpuid : 0;
}
#endif

View File

@@ -34,7 +34,7 @@
#include <asm/mce.h>
#include <mach_traps.h>
#include <asm/mach_traps.h>
int unknown_nmi_panic;
int nmi_watchdog_enabled;
@@ -61,11 +61,7 @@ static int endflag __initdata;
static inline unsigned int get_nmi_count(int cpu)
{
#ifdef CONFIG_X86_64
return cpu_pda(cpu)->__nmi_count;
#else
return nmi_count(cpu);
#endif
return per_cpu(irq_stat, cpu).__nmi_count;
}
static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
*/
static inline unsigned int get_timer_irqs(int cpu)
{
#ifdef CONFIG_X86_64
return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
#else
return per_cpu(irq_stat, cpu).apic_timer_irqs +
per_cpu(irq_stat, cpu).irq0_irqs;
#endif
}
#ifdef CONFIG_SMP

View File

@@ -0,0 +1,557 @@
/*
* Written by: Patricia Gaughen, IBM Corporation
*
* Copyright (C) 2002, IBM Corp.
* Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <gone@us.ibm.com>
*/
#include <linux/nodemask.h>
#include <linux/topology.h>
#include <linux/bootmem.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/mmzone.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/numa.h>
#include <linux/smp.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/numaq.h>
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/ipi.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
int found_numaq;
/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
* doing this ....
*/
struct mpc_trans {
unsigned char mpc_type;
unsigned char trans_len;
unsigned char trans_type;
unsigned char trans_quad;
unsigned char trans_global;
unsigned char trans_local;
unsigned short trans_reserved;
};
/* x86_quirks member */
static int mpc_record;
static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
int mp_bus_id_to_node[MAX_MP_BUSSES];
int mp_bus_id_to_local[MAX_MP_BUSSES];
int quad_local_to_mp_bus_id[NR_CPUS/4][4];
static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
{
struct eachquadmem *eq = scd->eq + node;
node_set_online(node);
/* Convert to pages */
node_start_pfn[node] =
MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] =
MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
e820_register_active_regions(node, node_start_pfn[node],
node_end_pfn[node]);
memory_present(node, node_start_pfn[node], node_end_pfn[node]);
node_remap_size[node] = node_memmap_size_bytes(node,
node_start_pfn[node],
node_end_pfn[node]);
}
/*
* Function: smp_dump_qct()
*
* Description: gets memory layout from the quad config table. This
* function also updates node_online_map with the nodes (quads) present.
*/
static void __init smp_dump_qct(void)
{
struct sys_cfg_data *scd;
int node;
scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
nodes_clear(node_online_map);
for_each_node(node) {
if (scd->quads_present31_0 & (1 << node))
numaq_register_node(node, scd);
}
}
void __cpuinit numaq_tsc_disable(void)
{
if (!found_numaq)
return;
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
}
static int __init numaq_pre_time_init(void)
{
numaq_tsc_disable();
return 0;
}
static inline int generate_logical_apicid(int quad, int phys_apicid)
{
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
}
/* x86_quirks member */
static int mpc_apic_id(struct mpc_cpu *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->apicid);
printk(KERN_DEBUG
"Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
(m->cpufeature & CPU_MODEL_MASK) >> 4,
m->apicver, quad, logical_apicid);
return logical_apicid;
}
/* x86_quirks member */
static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
mp_bus_id_to_node[m->busid] = quad;
mp_bus_id_to_local[m->busid] = local;
printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
}
/* x86_quirks member */
static void mpc_oem_pci_bus(struct mpc_bus *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
quad_local_to_mp_bus_id[quad][local] = m->busid;
}
static void __init MP_translation_info(struct mpc_trans *m)
{
printk(KERN_INFO
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
m->trans_local);
if (mpc_record >= MAX_MPC_ENTRY)
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
else
translation_table[mpc_record] = m; /* stash this for later */
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
node_set_online(m->trans_quad);
}
static int __init mpf_checksum(unsigned char *mp, int len)
{
int sum = 0;
while (len--)
sum += *mp++;
return sum & 0xFF;
}
/*
* Read/parse the MPC oem tables
*/
static void __init
smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
{
int count = sizeof(*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
mpc_record = 0;
printk(KERN_INFO
"Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
printk(KERN_WARNING
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
oemtable->signature[0], oemtable->signature[1],
oemtable->signature[2], oemtable->signature[3]);
return;
}
if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
return;
}
while (count < oemtable->length) {
switch (*oemptr) {
case MP_TRANSLATION:
{
struct mpc_trans *m = (void *)oemptr;
MP_translation_info(m);
oemptr += sizeof(*m);
count += sizeof(*m);
++mpc_record;
break;
}
default:
printk(KERN_WARNING
"Unrecognised OEM table entry type! - %d\n",
(int)*oemptr);
return;
}
}
}
static int __init numaq_setup_ioapic_ids(void)
{
/* so can skip it */
return 1;
}
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
.arch_pre_intr_init = NULL,
.arch_memory_setup = NULL,
.arch_intr_init = NULL,
.arch_trap_init = NULL,
.mach_get_smp_config = NULL,
.mach_find_smp_config = NULL,
.mpc_record = &mpc_record,
.mpc_apic_id = mpc_apic_id,
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
};
static __init void early_check_numaq(void)
{
/*
* Find possible boot-time SMP configuration:
*/
early_find_smp_config();
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
early_get_smp_config();
if (found_numaq)
x86_quirks = &numaq_x86_quirks;
}
int __init get_memcfg_numaq(void)
{
early_check_numaq();
if (!found_numaq)
return 0;
smp_dump_qct();
return 1;
}
#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
static inline unsigned int numaq_get_apic_id(unsigned long x)
{
return (x >> 24) & 0x0F;
}
static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
{
default_send_IPI_mask_sequence_logical(mask, vector);
}
static inline void numaq_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
}
static inline void numaq_send_IPI_all(int vector)
{
numaq_send_IPI_mask(cpu_online_mask, vector);
}
#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
/*
* Because we use NMIs rather than the INIT-STARTUP sequence to
* bootstrap the CPUs, the APIC may be in a weird state. Kick it:
*/
static inline void numaq_smp_callin_clear_local_apic(void)
{
clear_local_APIC();
}
static inline const cpumask_t *numaq_target_cpus(void)
{
return &CPU_MASK_ALL;
}
static inline unsigned long
numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return physid_isset(apicid, bitmap);
}
static inline unsigned long numaq_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
}
static inline int numaq_apic_id_registered(void)
{
return 1;
}
static inline void numaq_init_apic_ldr(void)
{
/* Already done in NUMA-Q firmware */
}
static inline void numaq_setup_apic_routing(void)
{
printk(KERN_INFO
"Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
nr_ioapics);
}
/*
* Skip adding the timer int on secondary nodes, which causes
* a small but painful rift in the time-space continuum.
*/
static inline int numaq_multi_timer_check(int apic, int irq)
{
return apic != 0 && irq == 0;
}
static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
{
/* We don't have a good way to do this yet - hack */
return physids_promote(0xFUL);
}
static inline int numaq_cpu_to_logical_apicid(int cpu)
{
if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_2_logical_apicid[cpu];
}
/*
* Supporting over 60 cpus on NUMA-Q requires a locality-dependent
* cpu to APIC ID relation to properly interact with the intelligent
* mode of the cluster controller.
*/
static inline int numaq_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < 60)
return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
else
return BAD_APICID;
}
static inline int numaq_apicid_to_node(int logical_apicid)
{
return logical_apicid >> 4;
}
static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
{
int node = numaq_apicid_to_node(logical_apicid);
int cpu = __ffs(logical_apicid & 0xf);
return physid_mask_of_physid(cpu + 4*node);
}
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
return 1;
}
/*
* We use physical apicids here, not logical, so just return the default
* physical broadcast to stop people from breaking us
*/
static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
return 0x0F;
}
static inline unsigned int
numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
return 0x0F;
}
/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
static int
numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
{
if (strncmp(oem, "IBM NUMA", 8))
printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
else
found_numaq = 1;
return found_numaq;
}
static int probe_numaq(void)
{
/* already know from get_memcfg_numaq() */
return found_numaq;
}
static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
* priority interrupt delivery mode.
*
* In particular there was a hyperthreading cpu observed to
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
static void numaq_setup_portio_remap(void)
{
int num_quads = num_online_nodes();
if (num_quads <= 1)
return;
printk(KERN_INFO
"Remapping cross-quad port I/O for %d quads\n", num_quads);
xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
printk(KERN_INFO
"xquad_portio vaddr 0x%08lx, len %08lx\n",
(u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
}
struct apic apic_numaq = {
.name = "NUMAQ",
.probe = probe_numaq,
.acpi_madt_oem_check = NULL,
.apic_id_registered = numaq_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
/* physical delivery on LOCAL quad: */
.irq_dest_mode = 0,
.target_cpus = numaq_target_cpus,
.disable_esr = 1,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = numaq_check_apicid_used,
.check_apicid_present = numaq_check_apicid_present,
.vector_allocation_domain = numaq_vector_allocation_domain,
.init_apic_ldr = numaq_init_apic_ldr,
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
.setup_apic_routing = numaq_setup_apic_routing,
.multi_timer_check = numaq_multi_timer_check,
.apicid_to_node = numaq_apicid_to_node,
.cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
.cpu_present_to_apicid = numaq_cpu_present_to_apicid,
.apicid_to_cpu_present = numaq_apicid_to_cpu_present,
.setup_portio_remap = numaq_setup_portio_remap,
.check_phys_apicid_present = numaq_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = numaq_phys_pkg_id,
.mps_oem_check = numaq_mps_oem_check,
.get_apic_id = numaq_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
.cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
.send_IPI_mask = numaq_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = numaq_send_IPI_allbutself,
.send_IPI_all = numaq_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
.trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
/* We don't do anything here because we use NMI's to boot instead */
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
.inquire_remote_apic = NULL,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};

View File

@@ -0,0 +1,284 @@
/*
* Default generic APIC driver. This handles up to 8 CPUs.
*
* Copyright 2003 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License, v.2
*
* Generic x86 APIC driver probe layer.
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <asm/mpspec.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <asm/ipi.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/e820.h>
#include <asm/setup.h>
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
#else
#define DEFAULT_SEND_IPI (0)
#endif
int no_broadcast = DEFAULT_SEND_IPI;
static __init int no_ipi_broadcast(char *str)
{
get_option(&str, &no_broadcast);
pr_info("Using %s mode\n",
no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
return 1;
}
__setup("no_ipi_broadcast=", no_ipi_broadcast);
static int __init print_ipi_mode(void)
{
pr_info("Using IPI %s mode\n",
no_broadcast ? "No-Shortcut" : "Shortcut");
return 0;
}
late_initcall(print_ipi_mode);
void default_setup_apic_routing(void)
{
#ifdef CONFIG_X86_IO_APIC
printk(KERN_INFO
"Enabling APIC mode: Flat. Using %d I/O APICs\n",
nr_ioapics);
#endif
}
static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/*
* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
* priority interrupt delivery mode.
*
* In particular there was a hyperthreading cpu observed to
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
}
/* should be called last. */
static int probe_default(void)
{
return 1;
}
struct apic apic_default = {
.name = "default",
.probe = probe_default,
.acpi_madt_oem_check = NULL,
.apic_id_registered = default_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
.target_cpus = default_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = default_check_apicid_used,
.check_apicid_present = default_check_apicid_present,
.vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = default_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = default_setup_apic_routing,
.multi_timer_check = NULL,
.apicid_to_node = default_apicid_to_node,
.cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = default_apicid_to_cpu_present,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = default_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = default_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
.cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = default_send_IPI_mask_logical,
.send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
.send_IPI_allbutself = default_send_IPI_allbutself,
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = default_wait_for_init_deassert,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
extern struct apic apic_numaq;
extern struct apic apic_summit;
extern struct apic apic_bigsmp;
extern struct apic apic_es7000;
extern struct apic apic_es7000_cluster;
extern struct apic apic_default;
struct apic *apic = &apic_default;
EXPORT_SYMBOL_GPL(apic);
static struct apic *apic_probe[] __initdata = {
#ifdef CONFIG_X86_NUMAQ
&apic_numaq,
#endif
#ifdef CONFIG_X86_SUMMIT
&apic_summit,
#endif
#ifdef CONFIG_X86_BIGSMP
&apic_bigsmp,
#endif
#ifdef CONFIG_X86_ES7000
&apic_es7000,
&apic_es7000_cluster,
#endif
&apic_default, /* must be last */
NULL,
};
static int cmdline_apic __initdata;
static int __init parse_apic(char *arg)
{
int i;
if (!arg)
return -EINVAL;
for (i = 0; apic_probe[i]; i++) {
if (!strcmp(apic_probe[i]->name, arg)) {
apic = apic_probe[i];
cmdline_apic = 1;
return 0;
}
}
/* Parsed again by __setup for debug/verbose */
return 0;
}
early_param("apic", parse_apic);
void __init generic_bigsmp_probe(void)
{
#ifdef CONFIG_X86_BIGSMP
/*
* This routine is used to switch to bigsmp mode when
* - There is no apic= option specified by the user
* - generic_apic_probe() has chosen apic_default as the sub_arch
* - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
*/
if (!cmdline_apic && apic == &apic_default) {
if (apic_bigsmp.probe()) {
apic = &apic_bigsmp;
printk(KERN_INFO "Overriding APIC driver with %s\n",
apic->name);
}
}
#endif
}
void __init generic_apic_probe(void)
{
if (!cmdline_apic) {
int i;
for (i = 0; apic_probe[i]; i++) {
if (apic_probe[i]->probe()) {
apic = apic_probe[i];
break;
}
}
/* Not visible without early console */
if (!apic_probe[i])
panic("Didn't find an APIC driver");
}
printk(KERN_INFO "Using APIC driver %s\n", apic->name);
}
/* These functions can switch the APIC even after the initial ->probe() */
int __init
generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
{
int i;
for (i = 0; apic_probe[i]; ++i) {
if (!apic_probe[i]->mps_oem_check)
continue;
if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
continue;
if (!cmdline_apic) {
apic = apic_probe[i];
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);
}
return 1;
}
return 0;
}
int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
int i;
for (i = 0; apic_probe[i]; ++i) {
if (!apic_probe[i]->acpi_madt_oem_check)
continue;
if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
continue;
if (!cmdline_apic) {
apic = apic_probe[i];
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
apic->name);
}
return 1;
}
return 0;
}

View File

@@ -19,22 +19,27 @@
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/setup.h>
extern struct genapic apic_flat;
extern struct genapic apic_physflat;
extern struct genapic apic_x2xpic_uv_x;
extern struct genapic apic_x2apic_phys;
extern struct genapic apic_x2apic_cluster;
extern struct apic apic_flat;
extern struct apic apic_physflat;
extern struct apic apic_x2xpic_uv_x;
extern struct apic apic_x2apic_phys;
extern struct apic apic_x2apic_cluster;
struct genapic __read_mostly *genapic = &apic_flat;
struct apic __read_mostly *apic = &apic_flat;
EXPORT_SYMBOL_GPL(apic);
static struct genapic *apic_probe[] __initdata = {
static struct apic *apic_probe[] __initdata = {
#ifdef CONFIG_X86_UV
&apic_x2apic_uv_x,
#endif
#ifdef CONFIG_X86_X2APIC
&apic_x2apic_phys,
&apic_x2apic_cluster,
#endif
&apic_physflat,
NULL,
};
@@ -42,39 +47,45 @@ static struct genapic *apic_probe[] __initdata = {
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
void __init setup_apic_routing(void)
void __init default_setup_apic_routing(void)
{
if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
if (!intr_remapping_enabled)
genapic = &apic_flat;
#ifdef CONFIG_X86_X2APIC
if (x2apic && (apic != &apic_x2apic_phys &&
#ifdef CONFIG_X86_UV
apic != &apic_x2apic_uv_x &&
#endif
apic != &apic_x2apic_cluster)) {
if (x2apic_phys)
apic = &apic_x2apic_phys;
else
apic = &apic_x2apic_cluster;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
#endif
if (genapic == &apic_flat) {
if (apic == &apic_flat) {
if (max_physical_apicid >= 8)
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
apic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
if (x86_quirks->update_genapic)
x86_quirks->update_genapic();
}
/* Same for both flat and physical. */
void apic_send_IPI_self(int vector)
{
__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
int i;
for (i = 0; apic_probe[i]; ++i) {
if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
genapic = apic_probe[i];
apic = apic_probe[i];
printk(KERN_INFO "Setting APIC routing to %s.\n",
genapic->name);
apic->name);
return 1;
}
}

View File

@@ -0,0 +1,579 @@
/*
* IBM Summit-Specific Code
*
* Written By: Matthew Dobson, IBM Corporation
*
* Copyright (c) 2003 IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <colpatch@us.ibm.com>
*
*/
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/bios_ebda.h>
/*
* APIC driver for the IBM "Summit" chipset.
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <asm/smp.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <asm/ipi.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/smp.h>
static unsigned summit_get_apic_id(unsigned long x)
{
return (x >> 24) & 0xFF;
}
static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
{
default_send_IPI_mask_sequence_logical(mask, vector);
}
static void summit_send_IPI_allbutself(int vector)
{
cpumask_t mask = cpu_online_map;
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
summit_send_IPI_mask(&mask, vector);
}
static void summit_send_IPI_all(int vector)
{
summit_send_IPI_mask(&cpu_online_map, vector);
}
#include <asm/tsc.h>
extern int use_cyclone;
#ifdef CONFIG_X86_SUMMIT_NUMA
static void setup_summit(void);
#else
static inline void setup_summit(void) {}
#endif
static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
char *productid)
{
if (!strncmp(oem, "IBM ENSW", 8) &&
(!strncmp(productid, "VIGIL SMP", 9)
|| !strncmp(productid, "EXA", 3)
|| !strncmp(productid, "RUTHLESS SMP", 12))){
mark_tsc_unstable("Summit based system");
use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
return 1;
}
return 0;
}
/* Hook from generic ACPI tables.c */
static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strncmp(oem_id, "IBM", 3) &&
(!strncmp(oem_table_id, "SERVIGIL", 8)
|| !strncmp(oem_table_id, "EXA", 3))){
mark_tsc_unstable("Summit based system");
use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
return 1;
}
return 0;
}
struct rio_table_hdr {
unsigned char version; /* Version number of this data structure */
/* Version 3 adds chassis_num & WP_index */
unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
} __attribute__((packed));
struct scal_detail {
unsigned char node_id; /* Scalability Node ID */
unsigned long CBAR; /* Address of 1MB register space */
unsigned char port0node; /* Node ID port connected to: 0xFF=None */
unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
unsigned char port1node; /* Node ID port connected to: 0xFF = None */
unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
unsigned char port2node; /* Node ID port connected to: 0xFF = None */
unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
} __attribute__((packed));
struct rio_detail {
unsigned char node_id; /* RIO Node ID */
unsigned long BBAR; /* Address of 1MB register space */
unsigned char type; /* Type of device */
unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
/* For CYC: Node ID of Twister that owns this CYC */
unsigned char port0node; /* Node ID port connected to: 0xFF=None */
unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
unsigned char port1node; /* Node ID port connected to: 0xFF=None */
unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
/* For CYC: 0 */
unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
/* = 0 : the XAPIC is not used, ie:*/
/* ints fwded to another XAPIC */
/* Bits1:7 Reserved */
/* For CYC: Bits0:7 Reserved */
unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
/* lower slot numbers/PCI bus numbers */
/* For CYC: No meaning */
unsigned char chassis_num; /* 1 based Chassis number */
/* For LookOut WPEGs this field indicates the */
/* Expansion Chassis #, enumerated from Boot */
/* Node WPEG external port, then Boot Node CYC */
/* external port, then Next Vigil chassis WPEG */
/* external port, etc. */
/* Shared Lookouts have only 1 chassis number (the */
/* first one assigned) */
} __attribute__((packed));
typedef enum {
CompatTwister = 0, /* Compatibility Twister */
AltTwister = 1, /* Alternate Twister of internal 8-way */
CompatCyclone = 2, /* Compatibility Cyclone */
AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
CompatWPEG = 4, /* Compatibility WPEG */
AltWPEG = 5, /* Second Planar WPEG */
LookOutAWPEG = 6, /* LookOut WPEG */
LookOutBWPEG = 7, /* LookOut WPEG */
} node_type;
static inline int is_WPEG(struct rio_detail *rio){
return (rio->type == CompatWPEG || rio->type == AltWPEG ||
rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
}
/* In clustered mode, the high nibble of APIC ID is a cluster number.
* The low nibble is a 4-bit bitmap. */
#define XAPIC_DEST_CPUS_SHIFT 4
#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
static const cpumask_t *summit_target_cpus(void)
{
/* CPU_MASK_ALL (0xff) has undefined behaviour with
* dest_LowestPrio mode logical clustered apic interrupt routing
* Just start on cpu 0. IRQ balancing will spread load
*/
return &cpumask_of_cpu(0);
}
static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return 0;
}
/* we don't use the phys_cpu_present_map to indicate apicid presence */
static unsigned long summit_check_apicid_present(int bit)
{
return 1;
}
static void summit_init_apic_ldr(void)
{
unsigned long val, id;
int count = 0;
u8 my_id = (u8)hard_smp_processor_id();
u8 my_cluster = APIC_CLUSTER(my_id);
#ifdef CONFIG_SMP
u8 lid;
int i;
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
lid = cpu_2_logical_apicid[i];
if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++count;
}
#endif
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
id = my_cluster | (1UL << count);
apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
apic_write(APIC_LDR, val);
}
static int summit_apic_id_registered(void)
{
return 1;
}
static void summit_setup_apic_routing(void)
{
printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
nr_ioapics);
}
static int summit_apicid_to_node(int logical_apicid)
{
#ifdef CONFIG_SMP
return apicid_2_node[hard_smp_processor_id()];
#else
return 0;
#endif
}
/* Mapping from cpu number to logical apicid */
static inline int summit_cpu_to_logical_apicid(int cpu)
{
#ifdef CONFIG_SMP
if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_2_logical_apicid[cpu];
#else
return logical_smp_processor_id();
#endif
}
static int summit_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
}
static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
{
/* For clustered we don't have a good way to do this yet - hack */
return physids_promote(0x0F);
}
static physid_mask_t summit_apicid_to_cpu_present(int apicid)
{
return physid_mask_of_physid(0);
}
static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
return 1;
}
static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
{
unsigned int round = 0;
int cpu, apicid = 0;
/*
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
int new_apicid = summit_cpu_to_logical_apicid(cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
printk("%s: Not a valid mask!\n", __func__);
return BAD_APICID;
}
apicid |= new_apicid;
round++;
}
return apicid;
}
static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
int apicid = summit_cpu_to_logical_apicid(0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
return apicid;
cpumask_and(cpumask, inmask, andmask);
cpumask_and(cpumask, cpumask, cpu_online_mask);
apicid = summit_cpu_mask_to_apicid(cpumask);
free_cpumask_var(cpumask);
return apicid;
}
/*
* cpuid returns the value latched in the HW at reset, not the APIC ID
* register's value. For any box whose BIOS changes APIC IDs, like
* clustered APIC systems, we must use hard_smp_processor_id.
*
* See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
*/
static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
{
return hard_smp_processor_id() >> index_msb;
}
static int probe_summit(void)
{
/* probed later in mptable/ACPI hooks */
return 0;
}
static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
* priority interrupt delivery mode.
*
* In particular there was a hyperthreading cpu observed to
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
#ifdef CONFIG_X86_SUMMIT_NUMA
static struct rio_table_hdr *rio_table_hdr;
static struct scal_detail *scal_devs[MAX_NUMNODES];
static struct rio_detail *rio_devs[MAX_NUMNODES*4];
#ifndef CONFIG_X86_NUMAQ
static int mp_bus_id_to_node[MAX_MP_BUSSES];
#endif
static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
{
int twister = 0, node = 0;
int i, bus, num_buses;
for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
twister = rio_devs[i]->owner_id;
break;
}
}
if (i == rio_table_hdr->num_rio_dev) {
printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
return last_bus;
}
for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
if (scal_devs[i]->node_id == twister) {
node = scal_devs[i]->node_id;
break;
}
}
if (i == rio_table_hdr->num_scal_dev) {
printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
return last_bus;
}
switch (rio_devs[wpeg_num]->type) {
case CompatWPEG:
/*
* The Compatibility Winnipeg controls the 2 legacy buses,
* the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
* a PCI-PCI bridge card is used in either slot: total 5 buses.
*/
num_buses = 5;
break;
case AltWPEG:
/*
* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
* each], their 2 "extra" buses, the 100MHz bus [2 slots] and
* the "extra" buses for each of those slots: total 7 buses.
*/
num_buses = 7;
break;
case LookOutAWPEG:
case LookOutBWPEG:
/*
* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
* & the "extra" buses for each of those slots: total 9 buses.
*/
num_buses = 9;
break;
default:
printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
return last_bus;
}
for (bus = last_bus; bus < last_bus + num_buses; bus++)
mp_bus_id_to_node[bus] = node;
return bus;
}
static int build_detail_arrays(void)
{
unsigned long ptr;
int i, scal_detail_size, rio_detail_size;
if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
return 0;
}
switch (rio_table_hdr->version) {
default:
printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
return 0;
case 2:
scal_detail_size = 11;
rio_detail_size = 13;
break;
case 3:
scal_detail_size = 12;
rio_detail_size = 15;
break;
}
ptr = (unsigned long)rio_table_hdr + 3;
for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
scal_devs[i] = (struct scal_detail *)ptr;
for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
rio_devs[i] = (struct rio_detail *)ptr;
return 1;
}
void setup_summit(void)
{
unsigned long ptr;
unsigned short offset;
int i, next_wpeg, next_bus = 0;
/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
ptr = get_bios_ebda();
ptr = (unsigned long)phys_to_virt(ptr);
rio_table_hdr = NULL;
offset = 0x180;
while (offset) {
/* The block id is stored in the 2nd word */
if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
/* set the pointer past the offset & block id */
rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
break;
}
/* The next offset is stored in the 1st word. 0 means no more */
offset = *((unsigned short *)(ptr + offset));
}
if (!rio_table_hdr) {
printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
return;
}
if (!build_detail_arrays())
return;
/* The first Winnipeg we're looking for has an index of 0 */
next_wpeg = 0;
do {
for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
/* It's the Winnipeg we're looking for! */
next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
next_wpeg++;
break;
}
}
/*
* If we go through all Rio devices and don't find one with
* the next index, it means we've found all the Winnipegs,
* and thus all the PCI buses.
*/
if (i == rio_table_hdr->num_rio_dev)
next_wpeg = 0;
} while (next_wpeg != 0);
}
#endif
struct apic apic_summit = {
.name = "summit",
.probe = probe_summit,
.acpi_madt_oem_check = summit_acpi_madt_oem_check,
.apic_id_registered = summit_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
.target_cpus = summit_target_cpus,
.disable_esr = 1,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = summit_check_apicid_used,
.check_apicid_present = summit_check_apicid_present,
.vector_allocation_domain = summit_vector_allocation_domain,
.init_apic_ldr = summit_init_apic_ldr,
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
.setup_apic_routing = summit_setup_apic_routing,
.multi_timer_check = NULL,
.apicid_to_node = summit_apicid_to_node,
.cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
.cpu_present_to_apicid = summit_cpu_present_to_apicid,
.apicid_to_cpu_present = summit_apicid_to_cpu_present,
.setup_portio_remap = NULL,
.check_phys_apicid_present = summit_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = summit_phys_pkg_id,
.mps_oem_check = summit_mps_oem_check,
.get_apic_id = summit_get_apic_id,
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
.cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
.send_IPI_mask = summit_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = summit_send_IPI_allbutself,
.send_IPI_all = summit_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = default_wait_for_init_deassert,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};

View File

@@ -7,17 +7,14 @@
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (cpu_has_x2apic)
return 1;
return 0;
return x2apic_enabled();
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
@@ -36,8 +33,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
unsigned int dest)
static void
__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
{
unsigned long cfg;
@@ -46,7 +43,7 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
/*
* send the IPI.
*/
x2apic_icr_write(cfg, apicid);
native_x2apic_icr_write(cfg, apicid);
}
/*
@@ -57,45 +54,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
*/
static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(
per_cpu(x86_cpu_to_logical_apicid, query_cpu),
vector, APIC_DEST_LOGICAL);
vector, apic->dest_logical);
}
local_irq_restore(flags);
}
static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
int vector)
static void
x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long this_cpu = smp_processor_id();
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
if (query_cpu != this_cpu)
__x2apic_send_IPI_dest(
for_each_cpu(query_cpu, mask) {
if (query_cpu == this_cpu)
continue;
__x2apic_send_IPI_dest(
per_cpu(x86_cpu_to_logical_apicid, query_cpu),
vector, APIC_DEST_LOGICAL);
vector, apic->dest_logical);
}
local_irq_restore(flags);
}
static void x2apic_send_IPI_allbutself(int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long this_cpu = smp_processor_id();
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_online_cpu(query_cpu)
if (query_cpu != this_cpu)
__x2apic_send_IPI_dest(
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)
continue;
__x2apic_send_IPI_dest(
per_cpu(x86_cpu_to_logical_apicid, query_cpu),
vector, APIC_DEST_LOGICAL);
vector, apic->dest_logical);
}
local_irq_restore(flags);
}
@@ -111,21 +113,21 @@ static int x2apic_apic_id_registered(void)
static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one logical APIC ID.
* May as well be the first.
*/
cpu = cpumask_first(cpumask);
int cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_logical_apicid, cpu);
else
return BAD_APICID;
}
static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
static unsigned int
x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
int cpu;
@@ -133,15 +135,18 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
* We're using fixed IRQ delivery, can only return one logical APIC ID.
* May as well be the first.
*/
for_each_cpu_and(cpu, cpumask, andmask)
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
if (cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_logical_apicid, cpu);
return BAD_APICID;
}
static unsigned int get_apic_id(unsigned long x)
static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
{
unsigned int id;
@@ -157,7 +162,7 @@ static unsigned long set_apic_id(unsigned int id)
return x;
}
static unsigned int phys_pkg_id(int index_msb)
static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
{
return current_cpu_data.initial_apicid >> index_msb;
}
@@ -172,27 +177,63 @@ static void init_x2apic_ldr(void)
int cpu = smp_processor_id();
per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
return;
}
struct genapic apic_x2apic_cluster = {
.name = "cluster x2apic",
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = x2apic_target_cpus,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.apic_id_registered = x2apic_apic_id_registered,
.init_apic_ldr = init_x2apic_ldr,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
struct apic apic_x2apic_cluster = {
.name = "cluster x2apic",
.probe = NULL,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
.target_cpus = x2apic_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = NULL,
.cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = x2apic_cluster_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = x2apic_cluster_phys_get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};

View File

@@ -7,10 +7,10 @@
#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
static int x2apic_phys;
int x2apic_phys;
static int set_x2apic_phys_mode(char *arg)
{
@@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (cpu_has_x2apic && x2apic_phys)
return 1;
return 0;
if (x2apic_phys)
return x2apic_enabled();
else
return 0;
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
@@ -50,13 +50,13 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
/*
* send the IPI.
*/
x2apic_icr_write(cfg, apicid);
native_x2apic_icr_write(cfg, apicid);
}
static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
@@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
local_irq_restore(flags);
}
static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
int vector)
static void
x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long this_cpu = smp_processor_id();
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
@@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
static void x2apic_send_IPI_allbutself(int vector)
{
unsigned long flags;
unsigned long query_cpu;
unsigned long this_cpu = smp_processor_id();
unsigned long query_cpu;
unsigned long flags;
local_irq_save(flags);
for_each_online_cpu(query_cpu)
if (query_cpu != this_cpu)
__x2apic_send_IPI_dest(
per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)
continue;
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
@@ -110,21 +111,21 @@ static int x2apic_apic_id_registered(void)
static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
cpu = cpumask_first(cpumask);
int cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
static unsigned int
x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
int cpu;
@@ -132,31 +133,28 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
for_each_cpu_and(cpu, cpumask, andmask)
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
if (cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
return BAD_APICID;
}
static unsigned int get_apic_id(unsigned long x)
static unsigned int x2apic_phys_get_apic_id(unsigned long x)
{
unsigned int id;
id = x;
return id;
return x;
}
static unsigned long set_apic_id(unsigned int id)
{
unsigned long x;
x = id;
return x;
return id;
}
static unsigned int phys_pkg_id(int index_msb)
static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
{
return current_cpu_data.initial_apicid >> index_msb;
}
@@ -168,27 +166,63 @@ static void x2apic_send_IPI_self(int vector)
static void init_x2apic_ldr(void)
{
return;
}
struct genapic apic_x2apic_phys = {
.name = "physical x2apic",
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = x2apic_target_cpus,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.apic_id_registered = x2apic_apic_id_registered,
.init_apic_ldr = init_x2apic_ldr,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
struct apic apic_x2apic_phys = {
.name = "physical x2apic",
.probe = NULL,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
.target_cpus = x2apic_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
.vector_allocation_domain = x2apic_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = NULL,
.cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = x2apic_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = x2apic_phys_get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};

View File

@@ -7,27 +7,28 @@
*
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/hardirq.h>
#include <linux/proc_fs.h>
#include <linux/threads.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/hardirq.h>
#include <linux/timer.h>
#include <linux/proc_fs.h>
#include <asm/current.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/pgtable.h>
#include <linux/cpu.h>
#include <linux/init.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
#include <asm/uv/uv.h>
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/smp.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
@@ -90,39 +91,43 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
#ifdef CONFIG_SMP
unsigned long val;
int pnode;
pnode = uv_apicid_to_pnode(phys_apicid);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
(((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
(((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_STARTUP;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
atomic_set(&init_deasserted, 1);
#endif
return 0;
}
static void uv_send_IPI_one(int cpu, int vector)
{
unsigned long val, apicid, lapicid;
unsigned long val, apicid;
int pnode;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
lapicid = apicid & 0x3f; /* ZZZ macro needed */
pnode = uv_apicid_to_pnode(apicid);
val =
(1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
UVH_IPI_INT_APIC_ID_SHFT) |
(vector << UVH_IPI_INT_VECTOR_SHFT);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
(vector << UVH_IPI_INT_VECTOR_SHFT);
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
@@ -136,22 +141,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned int cpu;
unsigned int this_cpu = smp_processor_id();
unsigned int cpu;
for_each_cpu(cpu, mask)
for_each_cpu(cpu, mask) {
if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
}
}
static void uv_send_IPI_allbutself(int vector)
{
unsigned int cpu;
unsigned int this_cpu = smp_processor_id();
unsigned int cpu;
for_each_online_cpu(cpu)
for_each_online_cpu(cpu) {
if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
}
}
static void uv_send_IPI_all(int vector)
@@ -170,21 +177,21 @@ static void uv_init_apic_ldr(void)
static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
cpu = cpumask_first(cpumask);
int cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
static unsigned int
uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
int cpu;
@@ -192,15 +199,17 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
for_each_cpu_and(cpu, cpumask, andmask)
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
if (cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
return BAD_APICID;
}
static unsigned int get_apic_id(unsigned long x)
static unsigned int x2apic_get_apic_id(unsigned long x)
{
unsigned int id;
@@ -222,10 +231,10 @@ static unsigned long set_apic_id(unsigned int id)
static unsigned int uv_read_apic_id(void)
{
return get_apic_id(apic_read(APIC_ID));
return x2apic_get_apic_id(apic_read(APIC_ID));
}
static unsigned int phys_pkg_id(int index_msb)
static int uv_phys_pkg_id(int initial_apicid, int index_msb)
{
return uv_read_apic_id() >> index_msb;
}
@@ -235,26 +244,64 @@ static void uv_send_IPI_self(int vector)
apic_write(APIC_SELF_IPI, vector);
}
struct genapic apic_x2apic_uv_x = {
.name = "UV large system",
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = uv_target_cpus,
.vector_allocation_domain = uv_vector_allocation_domain,
.apic_id_registered = uv_apic_id_registered,
.init_apic_ldr = uv_init_apic_ldr,
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_self = uv_send_IPI_self,
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = (0xFFFFFFFFu),
struct apic apic_x2apic_uv_x = {
.name = "UV large system",
.probe = NULL,
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
.apic_id_registered = uv_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.target_cpus = uv_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
.vector_allocation_domain = uv_vector_allocation_domain,
.init_apic_ldr = uv_init_apic_ldr,
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
.apicid_to_node = NULL,
.cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
.phys_pkg_id = uv_phys_pkg_id,
.mps_oem_check = NULL,
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
.send_IPI_mask = uv_send_IPI_mask,
.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_all = uv_send_IPI_all,
.send_IPI_self = uv_send_IPI_self,
.wakeup_secondary_cpu = uv_wakeup_secondary,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
.inquire_remote_apic = NULL,
.read = native_apic_msr_read,
.write = native_apic_msr_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};
static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -322,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
paddr = base << shift;
bytes = (1UL << shift) * (max_pnode + 1);
printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
paddr + bytes);
paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
@@ -485,7 +532,7 @@ late_initcall(uv_init_heartbeat);
/*
* Called on each cpu to initialize the per_cpu UV data area.
* ZZZ hotplug not supported yet
* FIXME: hotplug not supported yet
*/
void __cpuinit uv_cpu_init(void)
{

View File

@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int);
*/
#define APM_ZERO_SEGS
#include "apm.h"
#include <asm/apm.h>
/*
* Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.

View File

@@ -75,6 +75,7 @@ void foo(void)
OFFSET(PT_DS, pt_regs, ds);
OFFSET(PT_ES, pt_regs, es);
OFFSET(PT_FS, pt_regs, fs);
OFFSET(PT_GS, pt_regs, gs);
OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
OFFSET(PT_EIP, pt_regs, ip);
OFFSET(PT_CS, pt_regs, cs);

View File

@@ -11,7 +11,6 @@
#include <linux/hardirq.h>
#include <linux/suspend.h>
#include <linux/kbuild.h>
#include <asm/pda.h>
#include <asm/processor.h>
#include <asm/segment.h>
#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
#endif
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(pcurrent);
ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
ENTRY(data_offset);
BLANK();
#undef ENTRY
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);

View File

@@ -7,7 +7,7 @@
#include <asm/pat.h>
#include <asm/processor.h>
#include <mach_apic.h>
#include <asm/apic.h>
struct cpuid_bit {
u16 feature;
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
*/
void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_SMP
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
#ifdef CONFIG_X86_32
c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
& core_select_mask;
c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = phys_pkg_id(c->initial_apicid, 0);
#else
c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = phys_pkg_id(0);
#endif
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
return;
#endif
}
#ifdef CONFIG_X86_PAT
void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
{
if (!cpu_has_pat)
pat_disable("PAT not supported by CPU.");
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
/*
* There is a known erratum on Pentium III and Core Solo
* and Core Duo CPUs.
* " Page with PAT set to WC while associated MTRR is UC
* may consolidate to UC "
* Because of this erratum, it is better to stick with
* setting WC in MTRR rather than using PAT on these CPUs.
*
* Enable PAT WC only on P4, Core 2 or later CPUs.
*/
if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
return;
pat_disable("PAT WC disabled due to known CPU erratum.");
return;
case X86_VENDOR_AMD:
case X86_VENDOR_CENTAUR:
case X86_VENDOR_TRANSMETA:
return;
}
pat_disable("PAT disabled. Not yet verified on this CPU type.");
}
#endif

View File

@@ -12,8 +12,6 @@
# include <asm/cacheflush.h>
#endif
#include <mach_apic.h>
#include "cpu.h"
#ifdef CONFIG_X86_32

View File

@@ -21,14 +21,14 @@
#include <asm/asm.h>
#include <asm/numa.h>
#include <asm/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/cpu.h>
#include <asm/cpumask.h>
#include <asm/apic.h>
#include <mach_apic.h>
#include <asm/genapic.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
#endif
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
@@ -37,6 +37,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/stackprotector.h>
#include "cpu.h"
@@ -50,6 +51,15 @@ cpumask_var_t cpu_initialized_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
alloc_bootmem_cpumask_var(&cpu_initialized_mask);
alloc_bootmem_cpumask_var(&cpu_callin_mask);
alloc_bootmem_cpumask_var(&cpu_callout_mask);
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
@@ -62,23 +72,23 @@ cpumask_t cpu_sibling_setup_map;
static struct cpu_dev *this_cpu __cpuinitdata;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
*/
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
/*
* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
*
* The TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
} };
#else
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +120,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
} };
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32
@@ -212,6 +223,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
}
#endif
/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
* software. Add those features to this table to auto-disable them.
*/
struct cpuid_dependent_feature {
u32 feature;
u32 level;
};
static const struct cpuid_dependent_feature __cpuinitconst
cpuid_dependent_features[] = {
{ X86_FEATURE_MWAIT, 0x00000005 },
{ X86_FEATURE_DCA, 0x00000009 },
{ X86_FEATURE_XSAVE, 0x0000000d },
{ 0, 0 }
};
static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
{
const struct cpuid_dependent_feature *df;
for (df = cpuid_dependent_features; df->feature; df++) {
/*
* Note: cpuid_level is set to -1 if unavailable, but
* extended_extended_level is set to 0 if unavailable
* and the legitimate extended levels are all negative
* when signed; hence the weird messing around with
* signs here...
*/
if (cpu_has(c, df->feature) &&
((s32)df->level < 0 ?
(u32)df->level > (u32)c->extended_cpuid_level :
(s32)df->level > (s32)c->cpuid_level)) {
clear_cpu_cap(c, df->feature);
if (warn)
printk(KERN_WARNING
"CPU: CPU feature %s disabled "
"due to lack of CPUID level 0x%x\n",
x86_cap_flags[df->feature],
df->level);
}
}
}
/*
* Naming convention should be: <Name> [(<Codename>)]
* This table only is used unless init_<vendor>() below doesn't set it;
@@ -242,18 +296,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
void load_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
loadsegment(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
}
/* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one. */
void switch_to_new_gdt(void)
void switch_to_new_gdt(int cpu)
{
struct desc_ptr gdt_descr;
gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
gdt_descr.address = (long)get_cpu_gdt_table(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
#ifdef CONFIG_X86_32
asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
#endif
/* Reload the per-cpu base */
load_percpu_segment(cpu);
}
static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +448,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
}
index_msb = get_count_order(smp_num_siblings);
#ifdef CONFIG_X86_64
c->phys_proc_id = phys_pkg_id(index_msb);
#else
c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
#endif
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
@@ -395,13 +456,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
core_bits = get_count_order(c->x86_max_cores);
#ifdef CONFIG_X86_64
c->cpu_core_id = phys_pkg_id(index_msb) &
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
#else
c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
#endif
}
out:
@@ -570,11 +626,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
validate_pat_support(c);
#ifdef CONFIG_SMP
c->cpu_index = boot_cpu_id;
#endif
filter_cpuid_features(c, false);
}
void __init early_cpu_init(void)
@@ -637,7 +692,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_HT
c->apicid = phys_pkg_id(c->initial_apicid, 0);
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
# else
c->apicid = c->initial_apicid;
# endif
@@ -684,7 +739,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_identify(c);
#ifdef CONFIG_X86_64
c->apicid = phys_pkg_id(0);
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
/*
@@ -708,6 +763,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
* we do "generic changes."
*/
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
char *p;
@@ -877,54 +935,22 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct x8664_pda **_cpu_pda __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
void __cpuinit pda_init(int cpu)
{
struct x8664_pda *pda = cpu_pda(cpu);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
/* Setup up data that may be needed in __get_free_pages early */
loadsegment(fs, 0);
loadsegment(gs, 0);
/* Memory clobbers used to order PDA accessed */
mb();
wrmsrl(MSR_GS_BASE, pda);
mb();
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack = (unsigned long)stack_thread_info() -
PDA_STACKOFFSET + THREAD_SIZE;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
if (cpu == 0) {
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
if (!pda->irqstackptr) {
pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d",
cpu);
pda->irqstackptr += IRQSTACKSIZE - 64;
}
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
}
static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
DEBUG_STKSZ] __page_aligned_bss;
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
@@ -957,16 +983,21 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else
#else /* x86_64 */
/* Make sure %fs is initialized properly in idle threads */
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
#endif
#endif /* x86_64 */
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1013,14 @@ void __cpuinit cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
char *estacks = NULL;
struct task_struct *me;
int i;
/* CPU 0 is initialised in head64.c */
if (cpu != 0)
pda_init(cpu);
else
estacks = boot_exception_stacks;
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
cpu_to_node(cpu) != NUMA_NO_NODE)
percpu_write(node_number, cpu_to_node(cpu));
#endif
me = current;
@@ -1006,7 +1036,9 @@ void __cpuinit cpu_init(void)
* and set up the GDT descriptor:
*/
switch_to_new_gdt();
switch_to_new_gdt(cpu);
loadsegment(fs, 0);
load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,25 +1049,20 @@ void __cpuinit cpu_init(void)
barrier();
check_efer();
if (cpu != 0 && x2apic)
if (cpu != 0)
enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
static const unsigned int order[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
static const unsigned int sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
if (cpu) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
if (!estacks)
panic("Cannot allocate exception "
"stack %ld %d\n", v, cpu);
}
estacks += PAGE_SIZE << order[v];
estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
@@ -1069,22 +1096,19 @@ void __cpuinit cpu_init(void)
*/
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
arch_kgdb_ops.correct_hw_break();
else {
else
#endif
/*
* Clear all 6 debug registers:
*/
set_debugreg(0UL, 0);
set_debugreg(0UL, 1);
set_debugreg(0UL, 2);
set_debugreg(0UL, 3);
set_debugreg(0UL, 6);
set_debugreg(0UL, 7);
#ifdef CONFIG_KGDB
/* If the kgdb is connected no debug regs should be altered. */
{
/*
* Clear all 6 debug registers:
*/
set_debugreg(0UL, 0);
set_debugreg(0UL, 1);
set_debugreg(0UL, 2);
set_debugreg(0UL, 3);
set_debugreg(0UL, 6);
set_debugreg(0UL, 7);
}
#endif
fpu_init();
@@ -1114,7 +1138,7 @@ void __cpuinit cpu_init(void)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_idt(&idt_descr);
switch_to_new_gdt();
switch_to_new_gdt(cpu);
/*
* Set up and load the per-CPU TSS and LDT
@@ -1135,9 +1159,6 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
/* Clear %gs. */
asm volatile ("mov %0, %%gs" : : "r" (0));
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
set_debugreg(0, 1);

View File

@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (!data)
return -ENOMEM;
data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
per_cpu(drv_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))

View File

@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
}
/* Enable Enhanced PowerSaver */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
if (!(val & 1 << 16)) {
val |= 1 << 16;
if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
wrmsrl(MSR_IA32_MISC_ENABLE, val);
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
if (!(val & 1 << 16)) {
if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}

View File

@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
enable it if not. */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
if (!(l & (1<<16))) {
l |= (1<<16);
if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
wrmsr(MSR_IA32_MISC_ENABLE, l, h);
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
if (!(l & (1<<16))) {
if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO PFX
"couldn't enable Enhanced SpeedStep\n");
return -ENODEV;

View File

@@ -25,7 +25,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <mach_apic.h>
#endif
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -69,6 +68,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
sched_clock_stable = 1;
}
/*
* There is a known erratum on Pentium III and Core Solo
* and Core Duo CPUs.
* " Page with PAT set to WC while associated MTRR is UC
* may consolidate to UC "
* Because of this erratum, it is better to stick with
* setting WC in MTRR rather than using PAT on these CPUs.
*
* Enable PAT WC only on P4, Core 2 or later CPUs.
*/
if (c->x86 == 6 && c->x86_model < 15)
clear_cpu_cap(c, X86_FEATURE_PAT);
}
#ifdef CONFIG_X86_32
@@ -141,10 +152,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
*/
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
if ((lo & (1<<9)) == 0) {
if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
lo |= (1<<9); /* Disable hw prefetching */
lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
}
}

View File

@@ -147,7 +147,16 @@ struct _cpuid4_info {
union _cpuid4_leaf_ecx ecx;
unsigned long size;
unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
};
/* subset of above _cpuid4_info w/o shared_cpu_map */
struct _cpuid4_info_regs {
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
unsigned long can_disable;
};
#ifdef CONFIG_PCI
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
}
static void __cpuinit
amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
{
if (index < 3)
return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
}
static int
__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
__cpuinit cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
return 0;
}
static int
__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
struct _cpuid4_info_regs *leaf_regs =
(struct _cpuid4_info_regs *)this_leaf;
return cpuid4_cache_lookup_regs(index, leaf_regs);
}
static int __cpuinit find_num_cache_leaves(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
* parameters cpuid leaf to find the cache details
*/
for (i = 0; i < num_cache_leaves; i++) {
struct _cpuid4_info this_leaf;
struct _cpuid4_info_regs this_leaf;
int retval;
retval = cpuid4_cache_lookup(i, &this_leaf);
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
switch(this_leaf.eax.split.level) {
case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
if (num_threads_sharing == 1)
cpu_set(cpu, this_leaf->shared_cpu_map);
cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
else {
index_msb = get_count_order(num_threads_sharing);
for_each_online_cpu(i) {
if (cpu_data(i).apicid >> index_msb ==
c->apicid >> index_msb) {
cpu_set(i, this_leaf->shared_cpu_map);
cpumask_set_cpu(i,
to_cpumask(this_leaf->shared_cpu_map));
if (i != cpu && per_cpu(cpuid4_info, i)) {
sibling_leaf = CPUID4_INFO_IDX(i, index);
cpu_set(cpu, sibling_leaf->shared_cpu_map);
sibling_leaf =
CPUID4_INFO_IDX(i, index);
cpumask_set_cpu(cpu, to_cpumask(
sibling_leaf->shared_cpu_map));
}
}
}
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
int sibling;
this_leaf = CPUID4_INFO_IDX(cpu, index);
for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
sibling_leaf = CPUID4_INFO_IDX(sibling, index);
cpu_clear(cpu, sibling_leaf->shared_cpu_map);
cpumask_clear_cpu(cpu,
to_cpumask(sibling_leaf->shared_cpu_map));
}
}
#else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
int n = 0;
if (len > 1) {
cpumask_t *mask = &this_leaf->shared_cpu_map;
const struct cpumask *mask;
mask = to_cpumask(this_leaf->shared_cpu_map);
n = type?
cpulist_scnprintf(buf, len-2, mask) :
cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
{
int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
int node = cpu_to_node(cpumask_first(mask));
struct pci_dev *dev = NULL;
ssize_t ret = 0;
int i;
@@ -733,7 +757,8 @@ static ssize_t
store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
size_t count)
{
int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
int node = cpu_to_node(cpumask_first(mask));
struct pci_dev *dev = NULL;
unsigned int ret, index, val;
@@ -878,7 +903,7 @@ err_out:
return -ENOMEM;
}
static cpumask_t cache_dev_map = CPU_MASK_NONE;
static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
/* Add/Remove cache interface for CPU device */
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
cpu_set(cpu, cache_dev_map);
cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
if (per_cpu(cpuid4_info, cpu) == NULL)
return;
if (!cpu_isset(cpu, cache_dev_map))
if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
return;
cpu_clear(cpu, cache_dev_map);
cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));

View File

@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
cpumask_t cpus;
cpumask_var_t cpus;
};
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
i = first_cpu(per_cpu(cpu_core_map, cpu));
i = cpumask_first(&per_cpu(cpu_core_map, cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
b->cpus = per_cpu(cpu_core_map, cpu);
cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
}
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
err = -ENOMEM;
goto out;
}
if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
kfree(b);
err = -ENOMEM;
goto out;
}
b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
if (!b->kobj)
goto out_free;
#ifndef CONFIG_SMP
b->cpus = CPU_MASK_ALL;
cpumask_setall(b->cpus);
#else
b->cpus = per_cpu(cpu_core_map, cpu);
cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
#endif
per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out_free;
for_each_cpu_mask_nr(i, b->cpus) {
for_each_cpu(i, b->cpus) {
if (i == cpu)
continue;
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
out_free:
per_cpu(threshold_banks, cpu)[bank] = NULL;
free_cpumask_var(b->cpus);
kfree(b);
out:
return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#endif
/* remove all sibling symlinks before unregistering */
for_each_cpu_mask_nr(i, b->cpus) {
for_each_cpu(i, b->cpus) {
if (i == cpu)
continue;
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
free_out:
kobject_del(b->kobj);
kobject_put(b->kobj);
free_cpumask_var(b->cpus);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
}

View File

@@ -7,6 +7,7 @@
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
@@ -48,13 +49,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
if (h & APIC_VECTOR_MASK) {
@@ -72,7 +73,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

View File

@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
cpu);
return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
vendor_thermal_interrupt = intel_thermal_interrupt;
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

View File

@@ -19,7 +19,7 @@
#include <linux/nmi.h>
#include <linux/kprobes.h>
#include <asm/apic.h>
#include <asm/genapic.h>
#include <asm/intel_arch_perfmon.h>
struct nmi_watchdog_ctlblk {

View File

@@ -7,11 +7,10 @@
/*
* Get CPU information for use by the procfs.
*/
#ifdef CONFIG_X86_32
static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
unsigned int cpu)
{
#ifdef CONFIG_X86_HT
#ifdef CONFIG_SMP
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
#endif
}
#ifdef CONFIG_X86_32
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
/*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
c->wp_works_ok ? "yes" : "no");
}
#else
static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
unsigned int cpu)
{
#ifdef CONFIG_SMP
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
cpus_weight(per_cpu(cpu_core_map, cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
}
#endif
}
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
seq_printf(m,

View File

@@ -24,12 +24,10 @@
#include <asm/apic.h>
#include <asm/hpet.h>
#include <linux/kdebug.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
#include <mach_ipi.h>
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)

View File

@@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo,
frame = frame->next_frame;
bp = (unsigned long) frame;
} else {
ops->address(data, addr, bp == 0);
ops->address(data, addr, 0);
}
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
}

View File

@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned long *irq_stack_end =
(unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
stack = (unsigned long *) estack_end[-2];
continue;
}
if (irqstack_end) {
unsigned long *irqstack;
irqstack = irqstack_end -
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
if (irq_stack_end) {
unsigned long *irq_stack;
irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
if (stack >= irqstack && stack < irqstack_end) {
if (stack >= irq_stack && stack < irq_stack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
ops, data, irqstack_end, &graph);
ops, data, irq_stack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (irqstack_end[-1]);
irqstack_end = NULL;
stack = (unsigned long *) (irq_stack_end[-1]);
irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
}
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
unsigned long *irqstack_end =
(unsigned long *) (cpu_pda(cpu)->irqstackptr);
unsigned long *irqstack =
(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
unsigned long *irq_stack_end =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
unsigned long *irq_stack =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
if (stack >= irqstack && stack <= irqstack_end) {
if (stack == irqstack_end) {
stack = (unsigned long *) (irqstack_end[-1]);
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
printk(" <EOI> ");
}
} else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
int i;
unsigned long sp;
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
struct task_struct *cur = current;
sp = regs->sp;
printk("CPU %d ", cpu);

View File

@@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
*/
void __init reserve_early(u64 start, u64 end, char *name)
{
if (start >= end)
return;
drop_overlaps_that_are_ok(start, end);
__reserve_early(start, end, name, 0);
}

View File

@@ -13,8 +13,8 @@
#include <asm/setup.h>
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
/* Simple VGA output */

View File

@@ -366,10 +366,12 @@ void __init efi_init(void)
SMBIOS_TABLE_GUID)) {
efi.smbios = config_tables[i].table;
printk(" SMBIOS=0x%lx ", config_tables[i].table);
#ifdef CONFIG_X86_UV
} else if (!efi_guidcmp(config_tables[i].guid,
UV_SYSTEM_TABLE_GUID)) {
efi.uv_systab = config_tables[i].table;
printk(" UVsystab=0x%lx ", config_tables[i].table);
#endif
} else if (!efi_guidcmp(config_tables[i].guid,
HCDP_TABLE_GUID)) {
efi.hcdp = config_tables[i].table;

View File

@@ -36,6 +36,7 @@
#include <asm/proto.h>
#include <asm/efi.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata;

View File

@@ -6,7 +6,7 @@
*/
#include <linux/linkage.h>
#include <asm/page.h>
#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
@@ -113,6 +113,7 @@ ENTRY(efi_call_phys)
movl (%edx), %ecx
pushl %ecx
ret
ENDPROC(efi_call_phys)
.previous
.data

View File

@@ -41,6 +41,7 @@ ENTRY(efi_call0)
addq $32, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call0)
ENTRY(efi_call1)
SAVE_XMM
@@ -50,6 +51,7 @@ ENTRY(efi_call1)
addq $32, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call1)
ENTRY(efi_call2)
SAVE_XMM
@@ -59,6 +61,7 @@ ENTRY(efi_call2)
addq $32, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call2)
ENTRY(efi_call3)
SAVE_XMM
@@ -69,6 +72,7 @@ ENTRY(efi_call3)
addq $32, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call3)
ENTRY(efi_call4)
SAVE_XMM
@@ -80,6 +84,7 @@ ENTRY(efi_call4)
addq $32, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call4)
ENTRY(efi_call5)
SAVE_XMM
@@ -92,6 +97,7 @@ ENTRY(efi_call5)
addq $48, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call5)
ENTRY(efi_call6)
SAVE_XMM
@@ -107,3 +113,4 @@ ENTRY(efi_call6)
addq $48, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call6)

View File

@@ -30,12 +30,13 @@
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
* 28(%esp) - orig_eax
* 2C(%esp) - %eip
* 30(%esp) - %cs
* 34(%esp) - %eflags
* 38(%esp) - %oldesp
* 3C(%esp) - %oldss
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
* 2C(%esp) - orig_eax
* 30(%esp) - %eip
* 34(%esp) - %cs
* 38(%esp) - %eflags
* 3C(%esp) - %oldesp
* 40(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
@@ -46,7 +47,7 @@
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/desc.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
@@ -101,121 +102,221 @@
#define resume_userspace_sig resume_userspace
#endif
#define SAVE_ALL \
cld; \
pushl %fs; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET fs, 0;*/\
pushl %es; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET es, 0;*/\
pushl %ds; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET eax, 0;\
pushl %ebp; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ebp, 0;\
pushl %edi; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET edi, 0;\
pushl %esi; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET esi, 0;\
pushl %edx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET edx, 0;\
pushl %ecx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ecx, 0;\
pushl %ebx; \
CFI_ADJUST_CFA_OFFSET 4;\
CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es; \
movl $(__KERNEL_PERCPU), %edx; \
movl %edx, %fs
/*
* User gs save/restore
*
* %gs is used for userland TLS and kernel only uses it for stack
* canary which is required to be at %gs:20 by gcc. Read the comment
* at the top of stackprotector.h for more info.
*
* Local labels 98 and 99 are used.
*/
#ifdef CONFIG_X86_32_LAZY_GS
#define RESTORE_INT_REGS \
popl %ebx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ebx;\
popl %ecx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ecx;\
popl %edx; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE edx;\
popl %esi; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE esi;\
popl %edi; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE edi;\
popl %ebp; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE ebp;\
popl %eax; \
CFI_ADJUST_CFA_OFFSET -4;\
CFI_RESTORE eax
/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
pushl $0
CFI_ADJUST_CFA_OFFSET 4
.endm
.macro POP_GS pop=0
addl $(4 + \pop), %esp
CFI_ADJUST_CFA_OFFSET -(4 + \pop)
.endm
.macro POP_GS_EX
.endm
#define RESTORE_REGS \
RESTORE_INT_REGS; \
1: popl %ds; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE ds;*/\
2: popl %es; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE es;*/\
3: popl %fs; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE fs;*/\
.pushsection .fixup,"ax"; \
4: movl $0,(%esp); \
jmp 1b; \
5: movl $0,(%esp); \
jmp 2b; \
6: movl $0,(%esp); \
jmp 3b; \
.section __ex_table,"a";\
.align 4; \
.long 1b,4b; \
.long 2b,5b; \
.long 3b,6b; \
/* all the rest are no-op */
.macro PTGS_TO_GS
.endm
.macro PTGS_TO_GS_EX
.endm
.macro GS_TO_REG reg
.endm
.macro REG_TO_PTGS reg
.endm
.macro SET_KERNEL_GS reg
.endm
#else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
pushl %gs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET gs, 0*/
.endm
.macro POP_GS pop=0
98: popl %gs
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE gs*/
.if \pop <> 0
add $\pop, %esp
CFI_ADJUST_CFA_OFFSET -\pop
.endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
#define RING0_INT_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, 3*4;\
/*CFI_OFFSET cs, -2*4;*/\
.macro PTGS_TO_GS
98: mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
.section __ex_table, "a"
.align 4
.long 98b, 99b
.popsection
.endm
.macro GS_TO_REG reg
movl %gs, \reg
/*CFI_REGISTER gs, \reg*/
.endm
.macro REG_TO_PTGS reg
movl \reg, PT_GS(%esp)
/*CFI_REL_OFFSET gs, PT_GS*/
.endm
.macro SET_KERNEL_GS reg
movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs
.endm
#endif /* CONFIG_X86_32_LAZY_GS */
.macro SAVE_ALL
cld
PUSH_GS
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0;*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0;*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0;*/
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eax, 0
pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebp, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
pushl %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
SET_KERNEL_GS %edx
.endm
.macro RESTORE_INT_REGS
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ecx
popl %edx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edx
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
popl %edi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edi
popl %ebp
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebp
popl %eax
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE eax
.endm
.macro RESTORE_REGS pop=0
RESTORE_INT_REGS
1: popl %ds
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE ds;*/
2: popl %es
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE es;*/
3: popl %fs
CFI_ADJUST_CFA_OFFSET -4
/*CFI_RESTORE fs;*/
POP_GS \pop
.pushsection .fixup, "ax"
4: movl $0, (%esp)
jmp 1b
5: movl $0, (%esp)
jmp 2b
6: movl $0, (%esp)
jmp 3b
.section __ex_table, "a"
.align 4
.long 1b, 4b
.long 2b, 5b
.long 3b, 6b
.popsection
POP_GS_EX
.endm
.macro RING0_INT_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 3*4
/*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
.endm
#define RING0_EC_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, 4*4;\
/*CFI_OFFSET cs, -2*4;*/\
.macro RING0_EC_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, 4*4
/*CFI_OFFSET cs, -2*4;*/
CFI_OFFSET eip, -3*4
.endm
#define RING0_PTREGS_FRAME \
CFI_STARTPROC simple;\
CFI_SIGNAL_FRAME;\
CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
.macro RING0_PTREGS_FRAME
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
CFI_OFFSET eip, PT_EIP-PT_OLDESP
/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
CFI_OFFSET eax, PT_EAX-PT_OLDESP
CFI_OFFSET ebp, PT_EBP-PT_OLDESP
CFI_OFFSET edi, PT_EDI-PT_OLDESP
CFI_OFFSET esi, PT_ESI-PT_OLDESP
CFI_OFFSET edx, PT_EDX-PT_OLDESP
CFI_OFFSET ecx, PT_ECX-PT_OLDESP
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
.endm
ENTRY(ret_from_fork)
CFI_STARTPROC
@@ -362,6 +463,7 @@ sysenter_exit:
xorl %ebp,%ebp
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT
#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@ sysexit_audit:
.align 4
.long 1b,2b
.popsection
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
# system call handler stub
@@ -452,8 +555,7 @@ restore_all:
restore_nocheck:
TRACE_IRQS_IRET
restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
@@ -595,28 +697,50 @@ syscall_badsys:
END(syscall_badsys)
CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
PER_CPU(gdt_page, %ebx); \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
addl %esp, %eax; \
pushl $__KERNEL_DS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl %eax; \
CFI_ADJUST_CFA_OFFSET 4; \
lss (%esp), %esp; \
CFI_ADJUST_CFA_OFFSET -8;
#define UNWIND_ESPFIX_STACK \
movl %ss, %eax; \
/* see if on espfix stack */ \
cmpw $__ESPFIX_SS, %ax; \
jne 27f; \
movl $__KERNEL_DS, %eax; \
movl %eax, %ds; \
movl %eax, %es; \
/* switch to normal stack */ \
FIXUP_ESPFIX_STACK; \
27:;
/*
* System calls that need a pt_regs pointer.
*/
#define PTREGSCALL(name) \
ALIGN; \
ptregs_##name: \
leal 4(%esp),%eax; \
jmp sys_##name;
PTREGSCALL(iopl)
PTREGSCALL(fork)
PTREGSCALL(clone)
PTREGSCALL(vfork)
PTREGSCALL(execve)
PTREGSCALL(sigaltstack)
PTREGSCALL(sigreturn)
PTREGSCALL(rt_sigreturn)
PTREGSCALL(vm86)
PTREGSCALL(vm86old)
.macro FIXUP_ESPFIX_STACK
/* since we are on a wrong stack, we cant make it a C code :( */
PER_CPU(gdt_page, %ebx)
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
addl %esp, %eax
pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
jne 27f
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
/* switch to normal stack */
FIXUP_ESPFIX_STACK
27:
.endm
/*
* Build the entry stubs and pointer table with some assembler magic.
@@ -672,7 +796,7 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
pushl $~(nr); \
@@ -680,13 +804,15 @@ ENTRY(name) \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
call smp_##name; \
call fn; \
jmp ret_from_intr; \
CFI_ENDPROC; \
ENDPROC(name)
#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
#include <asm/entry_arch.h>
ENTRY(coprocessor_error)
RING0_INT_FRAME
@@ -1068,7 +1194,10 @@ ENTRY(page_fault)
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
/* the function address is in %fs's slot on the stack */
/* the function address is in %gs's slot on the stack */
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
pushl %es
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET es, 0*/
@@ -1097,20 +1226,15 @@ error_code:
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
movl PT_FS(%esp), %edi # get the function address
GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
mov %ecx, PT_FS(%esp)
/*CFI_REL_OFFSET fs, ES*/
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
@@ -1134,26 +1258,27 @@ END(page_fault)
* by hand onto the new stack - while updating the return eip past
* the instruction that would have done it for sysenter.
*/
#define FIX_STACK(offset, ok, label) \
cmpw $__KERNEL_CS,4(%esp); \
jne ok; \
label: \
movl TSS_sysenter_sp0+offset(%esp),%esp; \
CFI_DEF_CFA esp, 0; \
CFI_UNDEFINED eip; \
pushfl; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $__KERNEL_CS; \
CFI_ADJUST_CFA_OFFSET 4; \
pushl $sysenter_past_esp; \
CFI_ADJUST_CFA_OFFSET 4; \
.macro FIX_STACK offset ok label
cmpw $__KERNEL_CS, 4(%esp)
jne \ok
\label:
movl TSS_sysenter_sp0 + \offset(%esp), %esp
CFI_DEF_CFA esp, 0
CFI_UNDEFINED eip
pushfl
CFI_ADJUST_CFA_OFFSET 4
pushl $__KERNEL_CS
CFI_ADJUST_CFA_OFFSET 4
pushl $sysenter_past_esp
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
.endm
ENTRY(debug)
RING0_INT_FRAME
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
pushl $-1 # mark this as an int
CFI_ADJUST_CFA_OFFSET 4
@@ -1211,7 +1336,7 @@ nmi_stack_correct:
nmi_stack_fixup:
RING0_INT_FRAME
FIX_STACK(12,nmi_stack_correct, 1)
FIX_STACK 12, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_debug_stack_check:
@@ -1222,7 +1347,7 @@ nmi_debug_stack_check:
jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1)
FIX_STACK 24, nmi_stack_correct, 1
jmp nmi_stack_correct
nmi_espfix_stack:
@@ -1234,7 +1359,7 @@ nmi_espfix_stack:
CFI_ADJUST_CFA_OFFSET 4
pushl %esp
CFI_ADJUST_CFA_OFFSET 4
addw $4, (%esp)
addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)

View File

@@ -48,10 +48,11 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -76,20 +77,17 @@ ENTRY(ftrace_caller)
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
.globl ftrace_call
ftrace_call:
GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
GLOBAL(ftrace_graph_call)
jmp ftrace_stub
#endif
.globl ftrace_stub
ftrace_stub:
GLOBAL(ftrace_stub)
retq
END(ftrace_caller)
@@ -109,8 +107,7 @@ ENTRY(mcount)
jnz ftrace_graph_caller
#endif
.globl ftrace_stub
ftrace_stub:
GLOBAL(ftrace_stub)
retq
trace:
@@ -147,9 +144,7 @@ ENTRY(ftrace_graph_caller)
retq
END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
GLOBAL(return_to_handler)
subq $80, %rsp
movq %rax, (%rsp)
@@ -187,6 +182,7 @@ return_to_handler:
ENTRY(native_usergs_sysret64)
swapgs
sysretq
ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
@@ -209,7 +205,7 @@ ENTRY(native_usergs_sysret64)
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
movq %gs:pda_oldrsp,\tmp
movq PER_CPU_VAR(old_rsp),\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +216,7 @@ ENTRY(native_usergs_sysret64)
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
movq \tmp,%gs:pda_oldrsp
movq \tmp,PER_CPU_VAR(old_rsp)
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
@@ -336,15 +332,15 @@ ENTRY(save_args)
je 1f
SWAPGS
/*
* irqcount is used to check if a CPU is already on an interrupt stack
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
1: incl %gs:pda_irqcount
1: incl PER_CPU_VAR(irq_count)
jne 2f
popq_cfi %rax /* move return address... */
mov %gs:pda_irqstackptr,%rsp
mov PER_CPU_VAR(irq_stack_ptr),%rsp
EMPTY_FRAME 0
pushq_cfi %rbp /* backlink for unwinder */
pushq_cfi %rax /* ... to the new stack */
@@ -409,6 +405,8 @@ END(save_paranoid)
ENTRY(ret_from_fork)
DEFAULT_FRAME
LOCK ; btr $TIF_FORK,TI_flags(%r8)
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
popf # reset kernel eflags
@@ -468,7 +466,7 @@ END(ret_from_fork)
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
@@ -479,8 +477,8 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
@@ -523,7 +521,7 @@ sysret_check:
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
movq %gs:pda_oldrsp, %rsp
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
@@ -630,16 +628,14 @@ tracesys:
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
.globl int_ret_from_sys_call
.globl int_with_check
int_ret_from_sys_call:
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
int_with_check:
GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%edx
@@ -833,11 +829,11 @@ common_interrupt:
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
/* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
@@ -982,8 +978,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
#ifdef CONFIG_X86_UV
apicinterrupt UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
@@ -1073,10 +1071,10 @@ ENTRY(\sym)
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
movq %gs:pda_data_offset, %rbp
subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
PER_CPU(init_tss, %rbp)
subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
call \do_sym
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
@@ -1138,7 +1136,7 @@ ENTRY(native_load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
gs_change:
movl %edi,%gs
@@ -1260,14 +1258,14 @@ ENTRY(call_softirq)
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
incl %gs:pda_irqcount
cmove %gs:pda_irqstackptr,%rsp
incl PER_CPU_VAR(irq_count)
cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
END(call_softirq)
@@ -1297,15 +1295,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
DEFAULT_FRAME
11: incl %gs:pda_irqcount
11: incl PER_CPU_VAR(irq_count)
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
cmovzq %gs:pda_irqstackptr,%rsp
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)

View File

@@ -1,378 +0,0 @@
/*
* Written by: Garry Forsgren, Unisys Corporation
* Natalie Protasevich, Unisys Corporation
* This file contains the code to configure and interface
* with Unisys ES7000 series hardware system manager.
*
* Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston MA 02111-1307, USA.
*
* Contact information: Unisys Corporation, Township Line & Union Meeting
* Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
*
* http://www.unisys.com
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/atomic.h>
#include <asm/apicdef.h>
#include <mach_mpparse.h>
#include <asm/genapic.h>
#include <asm/setup.h>
/*
* ES7000 chipsets
*/
#define NON_UNISYS 0
#define ES7000_CLASSIC 1
#define ES7000_ZORRO 2
#define MIP_REG 1
#define MIP_PSAI_REG 4
#define MIP_BUSY 1
#define MIP_SPIN 0xf0000
#define MIP_VALID 0x0100000000000000ULL
#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
struct mip_reg_info {
unsigned long long mip_info;
unsigned long long delivery_info;
unsigned long long host_reg;
unsigned long long mip_reg;
};
struct part_info {
unsigned char type;
unsigned char length;
unsigned char part_id;
unsigned char apic_mode;
unsigned long snum;
char ptype[16];
char sname[64];
char pname[64];
};
struct psai {
unsigned long long entry_type;
unsigned long long addr;
unsigned long long bep_addr;
};
struct es7000_mem_info {
unsigned char type;
unsigned char length;
unsigned char resv[6];
unsigned long long start;
unsigned long long size;
};
struct es7000_oem_table {
unsigned long long hdr;
struct mip_reg_info mip;
struct part_info pif;
struct es7000_mem_info shm;
struct psai psai;
};
#ifdef CONFIG_ACPI
struct oem_table {
struct acpi_table_header Header;
u32 OEMTableAddr;
u32 OEMTableSize;
};
extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
#endif
struct mip_reg {
unsigned long long off_0;
unsigned long long off_8;
unsigned long long off_10;
unsigned long long off_18;
unsigned long long off_20;
unsigned long long off_28;
unsigned long long off_30;
unsigned long long off_38;
};
#define MIP_SW_APIC 0x1020b
#define MIP_FUNC(VALUE) (VALUE & 0xff)
/*
* ES7000 Globals
*/
static volatile unsigned long *psai = NULL;
static struct mip_reg *mip_reg;
static struct mip_reg *host_reg;
static int mip_port;
static unsigned long mip_addr, host_addr;
int es7000_plat;
/*
* GSI override for ES7000 platforms.
*/
static unsigned int base;
static int
es7000_rename_gsi(int ioapic, int gsi)
{
if (es7000_plat == ES7000_ZORRO)
return gsi;
if (!base) {
int i;
for (i = 0; i < nr_ioapics; i++)
base += nr_ioapic_registers[i];
}
if (!ioapic && (gsi < 16))
gsi += base;
return gsi;
}
static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
if (psai == NULL)
return -1;
vect = ((unsigned long)__pa(eip)/0x1000) << 16;
psaival = (0x1000000 | vect | cpu);
while (*psai & 0x1000000)
;
*psai = psaival;
return 0;
}
static void noop_wait_for_deassert(atomic_t *deassert_not_used)
{
}
static int __init es7000_update_genapic(void)
{
genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
/* MPENTIUMIII */
if (boot_cpu_data.x86 == 6 &&
(boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
es7000_update_genapic_to_cluster();
genapic->wait_for_init_deassert = noop_wait_for_deassert;
genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
}
return 0;
}
void __init
setup_unisys(void)
{
/*
* Determine the generation of the ES7000 currently running.
*
* es7000_plat = 1 if the machine is a 5xx ES7000 box
* es7000_plat = 2 if the machine is a x86_64 ES7000 box
*
*/
if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
es7000_plat = ES7000_ZORRO;
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
x86_quirks->update_genapic = es7000_update_genapic;
}
/*
* Parse the OEM Table
*/
int __init
parse_unisys_oem (char *oemptr)
{
int i;
int success = 0;
unsigned char type, size;
unsigned long val;
char *tp = NULL;
struct psai *psaip = NULL;
struct mip_reg_info *mi;
struct mip_reg *host, *mip;
tp = oemptr;
tp += 8;
for (i=0; i <= 6; i++) {
type = *tp++;
size = *tp++;
tp -= 2;
switch (type) {
case MIP_REG:
mi = (struct mip_reg_info *)tp;
val = MIP_RD_LO(mi->host_reg);
host_addr = val;
host = (struct mip_reg *)val;
host_reg = __va(host);
val = MIP_RD_LO(mi->mip_reg);
mip_port = MIP_PORT(mi->mip_info);
mip_addr = val;
mip = (struct mip_reg *)val;
mip_reg = __va(mip);
pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
(unsigned long)host_reg);
pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
(unsigned long)mip_reg);
success++;
break;
case MIP_PSAI_REG:
psaip = (struct psai *)tp;
if (tp != NULL) {
if (psaip->addr)
psai = __va(psaip->addr);
else
psai = NULL;
success++;
}
break;
default:
break;
}
tp += size;
}
if (success < 2) {
es7000_plat = NON_UNISYS;
} else
setup_unisys();
return es7000_plat;
}
#ifdef CONFIG_ACPI
static unsigned long oem_addrX;
static unsigned long oem_size;
int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
int i = 0;
while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
struct oem_table *t = (struct oem_table *)header;
oem_addrX = t->OEMTableAddr;
oem_size = t->OEMTableSize;
*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
oem_size);
return 0;
}
}
return -1;
}
void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
}
#endif
static void
es7000_spin(int n)
{
int i = 0;
while (i++ < n)
rep_nop();
}
static int __init
es7000_mip_write(struct mip_reg *mip_reg)
{
int status = 0;
int spin;
spin = MIP_SPIN;
while (((unsigned long long)host_reg->off_38 &
(unsigned long long)MIP_VALID) != 0) {
if (--spin <= 0) {
printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
return -1;
}
es7000_spin(MIP_SPIN);
}
memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
outb(1, mip_port);
spin = MIP_SPIN;
while (((unsigned long long)mip_reg->off_38 &
(unsigned long long)MIP_VALID) == 0) {
if (--spin <= 0) {
printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
return -1;
}
es7000_spin(MIP_SPIN);
}
status = ((unsigned long long)mip_reg->off_0 &
(unsigned long long)0xffff0000000000ULL) >> 48;
mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
(unsigned long long)~MIP_VALID);
return status;
}
void __init
es7000_sw_apic(void)
{
if (es7000_plat) {
int mip_status;
struct mip_reg es7000_mip_reg;
printk("ES7000: Enabling APIC mode.\n");
memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
es7000_mip_reg.off_0 = MIP_SW_APIC;
es7000_mip_reg.off_38 = (MIP_VALID);
while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
printk("es7000_sw_apic: command failed, status = %x\n",
mip_status);
return;
}
}

View File

@@ -26,27 +26,6 @@
#include <asm/bios_ebda.h>
#include <asm/trampoline.h>
/* boot cpu pda */
static struct x8664_pda _boot_cpu_pda;
#ifdef CONFIG_SMP
/*
* We install an empty cpu_pda pointer table to indicate to early users
* (numa_set_node) that the cpu_pda pointer table for cpus other than
* the boot cpu is not yet setup.
*/
static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
#else
static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
#endif
void __init x86_64_init_pda(void)
{
_cpu_pda = __cpu_pda;
cpu_pda(0) = &_boot_cpu_pda;
pda_init(0);
}
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (console_loglevel == 10)
early_printk("Kernel alive\n");
x86_64_init_pda();
x86_64_start_reservations(real_mode_data);
}

View File

@@ -11,14 +11,15 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/page_types.h>
#include <asm/pgtable_types.h>
#include <asm/desc.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/percpu.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -429,14 +430,34 @@ is386: movl $2,%ecx # set MP
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
xorl %eax,%eax # Clear GS and LDT
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
cmpb $0,ready
jne 1f
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
subl $20, %ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
1:
#endif
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
xorl %eax,%eax # Clear LDT
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -446,8 +467,6 @@ is386: movl $2,%ecx # set MP
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
@@ -548,11 +567,7 @@ early_fault:
pushl %eax
pushl %edx /* trapno */
pushl $fault_msg
#ifdef CONFIG_EARLY_PRINTK
call early_printk
#else
call printk
#endif
#endif
call dump_stack
hlt_loop:
@@ -580,11 +595,10 @@ ignore_int:
pushl 32(%esp)
pushl 40(%esp)
pushl $int_msg
#ifdef CONFIG_EARLY_PRINTK
call early_printk
#else
call printk
#endif
call dump_stack
addl $(5*4),%esp
popl %ds
popl %es
@@ -660,7 +674,7 @@ early_recursion_flag:
.long 0
int_msg:
.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
.asciz "Unknown interrupt or fault at: %p %p %p\n"
fault_msg:
/* fault info: */

View File

@@ -19,6 +19,7 @@
#include <asm/msr.h>
#include <asm/cache.h>
#include <asm/processor-flags.h>
#include <asm/percpu.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -226,12 +227,15 @@ ENTRY(secondary_startup_64)
movl %eax,%fs
movl %eax,%gs
/*
* Setup up a dummy PDA. this is just for some early bootup code
* that does in_interrupt()
*/
/* Set up %gs.
*
* The base of %gs always points to the bottom of the irqstack
* union. If the stack protector canary is enabled, it is
* located at %gs:40. Note that, on SMP, the boot cpu uses
* init data section till per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movq $empty_zero_page,%rax
movq initial_gs(%rip),%rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
@@ -257,6 +261,8 @@ ENTRY(secondary_startup_64)
.align 8
ENTRY(initial_code)
.quad x86_64_start_kernel
ENTRY(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
__FINITDATA
ENTRY(stack_start)
@@ -323,8 +329,6 @@ early_idt_ripmsg:
#endif /* CONFIG_EARLY_PRINTK */
.previous
.balign PAGE_SIZE
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
ENTRY(name)
@@ -401,7 +405,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
.quad per_cpu__gdt_page
early_gdt_descr_base:
.quad INIT_PER_CPU_VAR(gdt_page)
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
@@ -412,7 +417,7 @@ ENTRY(phys_base)
.section .bss, "aw", @nobits
.align L1_CACHE_BYTES
ENTRY(idt_table)
.skip 256 * 16
.skip IDT_ENTRIES * 16
.section .bss.page_aligned, "aw", @nobits
.align PAGE_SIZE

View File

@@ -22,7 +22,6 @@
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
#include <asm/arch_hooks.h>
#include <asm/i8259.h>
/*

View File

@@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
t->io_bitmap_max = bytes;
#ifdef CONFIG_X86_32
/*
* Sets the lazy trigger so that the next I/O operation will
* reload the correct bitmap.
* Reset the owner so that a process switch will not set
* tss->io_bitmap_base to IO_BITMAP_OFFSET.
*/
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
tss->io_bitmap_owner = NULL;
#else
/* Update the TSS: */
memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
#endif
put_cpu();
@@ -131,9 +120,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
}
#ifdef CONFIG_X86_32
asmlinkage long sys_iopl(unsigned long regsp)
long sys_iopl(struct pt_regs *regs)
{
struct pt_regs *regs = (struct pt_regs *)&regsp;
unsigned int level = regs->bx;
struct thread_struct *t = &current->thread;
int rc;

View File

@@ -1,190 +0,0 @@
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/apic.h>
#include <asm/proto.h>
#ifdef CONFIG_X86_32
#include <mach_apic.h>
#include <mach_ipi.h>
/*
* the following functions deal with sending IPIs between CPUs.
*
* We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
*/
static inline int __prepare_ICR(unsigned int shortcut, int vector)
{
unsigned int icr = shortcut | APIC_DEST_LOGICAL;
switch (vector) {
default:
icr |= APIC_DM_FIXED | vector;
break;
case NMI_VECTOR:
icr |= APIC_DM_NMI;
break;
}
return icr;
}
static inline int __prepare_ICR2(unsigned int mask)
{
return SET_APIC_DEST_FIELD(mask);
}
void __send_IPI_shortcut(unsigned int shortcut, int vector)
{
/*
* Subtle. In the case of the 'never do double writes' workaround
* we have to lock out interrupts to be safe. As we don't care
* of the value read we use an atomic rmw access to avoid costly
* cli/sti. Otherwise we use an even cheaper single atomic write
* to the APIC.
*/
unsigned int cfg;
/*
* Wait for idle.
*/
apic_wait_icr_idle();
/*
* No need to touch the target chip field
*/
cfg = __prepare_ICR(shortcut, vector);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
apic_write(APIC_ICR, cfg);
}
void send_IPI_self(int vector)
{
__send_IPI_shortcut(APIC_DEST_SELF, vector);
}
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
static inline void __send_IPI_dest_field(unsigned long mask, int vector)
{
unsigned long cfg;
/*
* Wait for idle.
*/
if (unlikely(vector == NMI_VECTOR))
safe_apic_wait_icr_idle();
else
apic_wait_icr_idle();
/*
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
apic_write(APIC_ICR2, cfg);
/*
* program the ICR
*/
cfg = __prepare_ICR(0, vector);
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
apic_write(APIC_ICR, cfg);
}
/*
* This is only used on smaller machines.
*/
void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
{
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
__send_IPI_dest_field(mask, vector);
local_irq_restore(flags);
}
void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned int query_cpu;
/*
* Hack. The clustered APIC addressing mode doesn't allow us to send
* to an arbitrary mask, so I do a unicasts to each CPU instead. This
* should be modified to do 1 message per cluster ID - mbligh
*/
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
local_irq_restore(flags);
}
void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned int query_cpu;
unsigned int this_cpu = smp_processor_id();
/* See Hack comment above */
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
if (query_cpu != this_cpu)
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
vector);
local_irq_restore(flags);
}
/* must come after the send_IPI functions above for inlining */
static int convert_apicid_to_cpu(int apic_id)
{
int i;
for_each_possible_cpu(i) {
if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
return i;
}
return -1;
}
int safe_smp_processor_id(void)
{
int apicid, cpuid;
if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
apicid = hard_smp_processor_id();
if (apicid == BAD_APICID)
return 0;
cpuid = convert_apicid_to_cpu(apicid);
return cpuid >= 0 ? cpuid : 0;
}
#endif

View File

@@ -6,10 +6,12 @@
#include <linux/kernel_stat.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/ftrace.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/idle.h>
atomic_t irq_err_count;
@@ -36,11 +38,7 @@ void ack_bad_irq(unsigned int irq)
#endif
}
#ifdef CONFIG_X86_32
# define irq_stats(x) (&per_cpu(irq_stat, x))
#else
# define irq_stats(x) cpu_pda(x)
#endif
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing:
*/
@@ -192,4 +190,40 @@ u64 arch_irq_stat(void)
return sum;
}
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
unsigned irq;
exit_idle();
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
if (!handle_irq(irq, regs)) {
#ifdef CONFIG_X86_64
if (!disable_apic)
ack_APIC_irq();
#endif
if (printk_ratelimit())
printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
__func__, smp_processor_id(), vector, irq);
}
irq_exit();
set_irq_regs(old_regs);
return 1;
}
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);

View File

@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <linux/percpu.h>
#include <asm/apic.h>
@@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { }
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
};
} __attribute__((aligned(PAGE_SIZE)));
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
u32 *isp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info();
irqctx = hardirq_ctx[smp_processor_id()];
irqctx = __get_cpu_var(hardirq_ctx);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu)
{
union irq_ctx *irqctx;
if (hardirq_ctx[cpu])
if (per_cpu(hardirq_ctx, cpu))
return;
irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
irqctx = &per_cpu(hardirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
hardirq_ctx[cpu] = irqctx;
per_cpu(hardirq_ctx, cpu) = irqctx;
irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
irqctx = &per_cpu(softirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = 0;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
softirq_ctx[cpu] = irqctx;
per_cpu(softirq_ctx, cpu) = irqctx;
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
void irq_ctx_exit(int cpu)
{
hardirq_ctx[cpu] = NULL;
per_cpu(hardirq_ctx, cpu) = NULL;
}
asmlinkage void do_softirq(void)
@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void)
if (local_softirq_pending()) {
curctx = current_thread_info();
irqctx = softirq_ctx[smp_processor_id()];
irqctx = __get_cpu_var(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
@@ -191,33 +192,16 @@ static inline int
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
#endif
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
unsigned int do_IRQ(struct pt_regs *regs)
bool handle_irq(unsigned irq, struct pt_regs *regs)
{
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
int overflow;
unsigned vector = ~regs->orig_ax;
struct irq_desc *desc;
unsigned irq;
old_regs = set_irq_regs(regs);
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
int overflow;
overflow = check_stack_overflow();
desc = irq_to_desc(irq);
if (unlikely(!desc)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
__func__, irq, vector, smp_processor_id());
BUG();
}
if (unlikely(!desc))
return false;
if (!execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
@@ -225,13 +209,10 @@ unsigned int do_IRQ(struct pt_regs *regs)
desc->handle_irq(irq, desc);
}
irq_exit();
set_irq_regs(old_regs);
return 1;
return true;
}
#ifdef CONFIG_HOTPLUG_CPU
#include <mach_apic.h>
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
@@ -248,7 +229,7 @@ void fixup_irqs(void)
if (irq == 2)
continue;
affinity = &desc->affinity;
affinity = desc->affinity;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
printk("Breaking affinity for irq %i\n", irq);
affinity = cpu_all_mask;

View File

@@ -18,6 +18,13 @@
#include <linux/smp.h>
#include <asm/io_apic.h>
#include <asm/idle.h>
#include <asm/apic.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
/*
* Probabilistic stack overflow check:
@@ -41,42 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs)
#endif
}
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
bool handle_irq(unsigned irq, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc *desc;
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
unsigned irq;
exit_idle();
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
stack_overflow_check(regs);
desc = irq_to_desc(irq);
if (likely(desc))
generic_handle_irq_desc(irq, desc);
else {
if (!disable_apic)
ack_APIC_irq();
if (unlikely(!desc))
return false;
if (printk_ratelimit())
printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(), vector);
}
irq_exit();
set_irq_regs(old_regs);
return 1;
generic_handle_irq_desc(irq, desc);
return true;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +83,7 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
affinity = &desc->affinity;
affinity = desc->affinity;
if (!irq_has_action(irq) ||
cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);

View File

@@ -18,7 +18,7 @@
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
#include <asm/arch_hooks.h>
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
@@ -78,6 +78,15 @@ void __init init_ISA_irqs(void)
}
}
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
static struct irqaction irq2 = {
.handler = no_action,
.mask = CPU_MASK_NONE,
.name = "cascade",
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... IRQ0_VECTOR - 1] = -1,
[IRQ0_VECTOR] = 0,
@@ -118,8 +127,8 @@ void __init native_init_IRQ(void)
{
int i;
/* all the set up before the call gates are initialised */
pre_intr_init_hook();
/* Execute any quirks before the call gates are initialised: */
x86_quirk_pre_intr_init();
/*
* Cover the whole vector space, no vector can escape
@@ -140,8 +149,15 @@ void __init native_init_IRQ(void)
*/
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */
alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
/* IPIs for invalidation */
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -169,10 +185,14 @@ void __init native_init_IRQ(void)
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
/* setup after call gates are initialised (usually add in
* the architecture specific gates)
if (!acpi_ioapic)
setup_irq(2, &irq2);
/*
* Call quirks after call gates are initialised (usually add in
* the architecture specific gates):
*/
intr_init_hook();
x86_quirk_intr_init();
/*
* External FPU? Set up irq13 if so, for

View File

@@ -46,7 +46,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
#include <mach_ipi.h>
#include <asm/apic.h>
/*
* Put the error code here just in case the user cares:
@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
*/
void kgdb_roundup_cpus(unsigned long flags)
{
send_IPI_allbutself(APIC_DM_NMI);
apic->send_IPI_allbutself(APIC_DM_NMI);
}
#endif

View File

@@ -19,7 +19,6 @@
#include <linux/clocksource.h>
#include <linux/kvm_para.h>
#include <asm/pvclock.h>
#include <asm/arch_hooks.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>

View File

@@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one(
static void machine_kexec_prepare_page_tables(struct kimage *image)
{
void *control_page;
pmd_t *pmd = 0;
pmd_t *pmd = NULL;
control_page = page_address(image->control_code_page);
#ifdef CONFIG_X86_PAE

View File

@@ -18,15 +18,6 @@
#include <asm/mmu_context.h>
#include <asm/io.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u64 kexec_pgd[512] PAGE_ALIGNED;
static u64 kexec_pud0[512] PAGE_ALIGNED;
static u64 kexec_pmd0[512] PAGE_ALIGNED;
static u64 kexec_pte0[512] PAGE_ALIGNED;
static u64 kexec_pud1[512] PAGE_ALIGNED;
static u64 kexec_pmd1[512] PAGE_ALIGNED;
static u64 kexec_pte1[512] PAGE_ALIGNED;
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
unsigned long end_addr;
@@ -107,12 +98,65 @@ out:
return result;
}
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.pud);
free_page((unsigned long)image->arch.pmd);
free_page((unsigned long)image->arch.pte);
}
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long vaddr, paddr;
int result = -ENOMEM;
vaddr = (unsigned long)relocate_kernel;
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
pgd += pgd_index(vaddr);
if (!pgd_present(*pgd)) {
pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
if (!pud)
goto err;
image->arch.pud = pud;
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
}
pud = pud_offset(pgd, vaddr);
if (!pud_present(*pud)) {
pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
if (!pmd)
goto err;
image->arch.pmd = pmd;
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
pmd = pmd_offset(pud, vaddr);
if (!pmd_present(*pmd)) {
pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
if (!pte)
goto err;
image->arch.pte = pte;
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
return 0;
err:
free_transition_pgtable(image);
return result;
}
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
pgd_t *level4p;
int result;
level4p = (pgd_t *)__va(start_pgtable);
return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
return init_transition_pgtable(image, level4p);
}
static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)
void machine_kexec_cleanup(struct kimage *image)
{
return;
free_transition_pgtable(image);
}
/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
memcpy(control_page, relocate_kernel, PAGE_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));

View File

@@ -51,7 +51,6 @@
#include <linux/ioport.h>
#include <asm/uaccess.h>
#include <linux/init.h>
#include <asm/arch_hooks.h>
static unsigned char which_scsi;
@@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void)
* adapter was responsible for the error.
*/
bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
mca_nmi_hook();
} /* mca_handle_nmi */
}

View File

@@ -87,9 +87,9 @@
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
}
static inline int
static inline int
update_match_revision(struct microcode_header_intel *mc_header, int rev)
{
return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
return ret;
}
ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
&get_ucode_fw);
ret = generic_load_microcode(cpu, (void *)firmware->data,
firmware->size, &get_ucode_fw);
release_firmware(firmware);
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
}
static void microcode_fini_cpu(int cpu)

View File

@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
{
vfree(module_region);
/* FIXME: If module_region == mod->init_region, trim exception
table entries. */
table entries. */
}
/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
*para = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".text", secstrings + s->sh_name))
text = s;
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s;
locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
}

View File

@@ -30,14 +30,14 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#define DEBUGP(fmt...)
#define DEBUGP(fmt...)
#ifndef CONFIG_UML
void module_free(struct module *mod, void *module_region)
{
vfree(module_region);
/* FIXME: If module_region == mod->init_region, trim exception
table entries. */
table entries. */
}
void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
Elf64_Sym *sym;
void *loc;
u64 val;
u64 val;
DEBUGP("Applying relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rel[i].r_info);
DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info),
sym->st_value, rel[i].r_addend, (u64)loc);
DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info),
sym->st_value, rel[i].r_addend, (u64)loc);
val = sym->st_value + rel[i].r_addend;
val = sym->st_value + rel[i].r_addend;
switch (ELF64_R_TYPE(rel[i].r_info)) {
case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
case R_X86_64_PC32:
case R_X86_64_PC32:
val -= (u64)loc;
*(u32 *)loc = val;
#if 0
if ((s64)val != *(s32 *)loc)
goto overflow;
goto overflow;
#endif
break;
default:
printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n",
printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return 0;
overflow:
printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
unsigned int relsec,
struct module *me)
{
printk("non add relocation not supported\n");
printk(KERN_ERR "non add relocation not supported\n");
return -ENOSYS;
}
}
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s;
locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
}

View File

@@ -3,7 +3,7 @@
* compliant MP-table parsing routines.
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
* (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
*/
@@ -29,12 +29,7 @@
#include <asm/setup.h>
#include <asm/smp.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
#include <mach_apicdef.h>
#include <mach_mpparse.h>
#endif
#include <asm/apic.h>
/*
* Checksum an MP configuration block.
*/
@@ -144,11 +139,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
if (bad_ioapic(m->apicaddr))
return;
mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
mp_ioapics[nr_ioapics].mp_type = m->type;
mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
mp_ioapics[nr_ioapics].mp_flags = m->flags;
mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
mp_ioapics[nr_ioapics].apicid = m->apicid;
mp_ioapics[nr_ioapics].type = m->type;
mp_ioapics[nr_ioapics].apicver = m->apicver;
mp_ioapics[nr_ioapics].flags = m->flags;
nr_ioapics++;
}
@@ -160,55 +155,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
m->srcbusirq, m->dstapic, m->dstirq);
}
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
{
apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
mp_irq->irqtype, mp_irq->irqflag & 3,
(mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
}
static void __init assign_to_mp_irq(struct mpc_intsrc *m,
struct mp_config_intsrc *mp_irq)
struct mpc_intsrc *mp_irq)
{
mp_irq->mp_dstapic = m->dstapic;
mp_irq->mp_type = m->type;
mp_irq->mp_irqtype = m->irqtype;
mp_irq->mp_irqflag = m->irqflag;
mp_irq->mp_srcbus = m->srcbus;
mp_irq->mp_srcbusirq = m->srcbusirq;
mp_irq->mp_dstirq = m->dstirq;
mp_irq->dstapic = m->dstapic;
mp_irq->type = m->type;
mp_irq->irqtype = m->irqtype;
mp_irq->irqflag = m->irqflag;
mp_irq->srcbus = m->srcbus;
mp_irq->srcbusirq = m->srcbusirq;
mp_irq->dstirq = m->dstirq;
}
static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
struct mpc_intsrc *m)
{
m->dstapic = mp_irq->mp_dstapic;
m->type = mp_irq->mp_type;
m->irqtype = mp_irq->mp_irqtype;
m->irqflag = mp_irq->mp_irqflag;
m->srcbus = mp_irq->mp_srcbus;
m->srcbusirq = mp_irq->mp_srcbusirq;
m->dstirq = mp_irq->mp_dstirq;
m->dstapic = mp_irq->dstapic;
m->type = mp_irq->type;
m->irqtype = mp_irq->irqtype;
m->irqflag = mp_irq->irqflag;
m->srcbus = mp_irq->srcbus;
m->srcbusirq = mp_irq->srcbusirq;
m->dstirq = mp_irq->dstirq;
}
static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
struct mpc_intsrc *m)
{
if (mp_irq->mp_dstapic != m->dstapic)
if (mp_irq->dstapic != m->dstapic)
return 1;
if (mp_irq->mp_type != m->type)
if (mp_irq->type != m->type)
return 2;
if (mp_irq->mp_irqtype != m->irqtype)
if (mp_irq->irqtype != m->irqtype)
return 3;
if (mp_irq->mp_irqflag != m->irqflag)
if (mp_irq->irqflag != m->irqflag)
return 4;
if (mp_irq->mp_srcbus != m->srcbus)
if (mp_irq->srcbus != m->srcbus)
return 5;
if (mp_irq->mp_srcbusirq != m->srcbusirq)
if (mp_irq->srcbusirq != m->srcbusirq)
return 6;
if (mp_irq->mp_dstirq != m->dstirq)
if (mp_irq->dstirq != m->dstirq)
return 7;
return 0;
@@ -292,16 +287,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
return 0;
#ifdef CONFIG_X86_32
/*
* need to make sure summit and es7000's mps_oem_check is safe to be
* called early via genericarch 's mps_oem_check
*/
if (early) {
#ifdef CONFIG_X86_NUMAQ
numaq_mps_oem_check(mpc, oem, str);
#endif
} else
mps_oem_check(mpc, oem, str);
generic_mps_oem_check(mpc, oem, str);
#endif
/* save the local APIC address, it might be non-default */
if (!acpi_lapic)
@@ -386,13 +372,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
(*x86_quirks->mpc_record)++;
}
#ifdef CONFIG_X86_GENERICARCH
generic_bigsmp_probe();
#ifdef CONFIG_X86_BIGSMP
generic_bigsmp_probe();
#endif
#ifdef CONFIG_X86_32
setup_apic_routing();
#endif
if (apic->setup_apic_routing)
apic->setup_apic_routing();
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
@@ -417,7 +403,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
intsrc.type = MP_INTSRC;
intsrc.irqflag = 0; /* conforming */
intsrc.srcbus = 0;
intsrc.dstapic = mp_ioapics[0].mp_apicid;
intsrc.dstapic = mp_ioapics[0].apicid;
intsrc.irqtype = mp_INT;
@@ -570,14 +556,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
}
}
static struct intel_mp_floating *mpf_found;
static struct mpf_intel *mpf_found;
/*
* Scan the memory blocks for an SMP configuration block.
*/
static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
struct mpf_intel *mpf = mpf_found;
if (!mpf)
return;
@@ -598,9 +584,9 @@ static void __init __get_smp_config(unsigned int early)
}
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
if (mpf->mpf_feature2 & (1 << 7)) {
if (mpf->feature2 & (1 << 7)) {
printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
pic_mode = 1;
} else {
@@ -611,7 +597,7 @@ static void __init __get_smp_config(unsigned int early)
/*
* Now see if we need to read further.
*/
if (mpf->mpf_feature1 != 0) {
if (mpf->feature1 != 0) {
if (early) {
/*
* local APIC has default address
@@ -621,16 +607,16 @@ static void __init __get_smp_config(unsigned int early)
}
printk(KERN_INFO "Default MP configuration #%d\n",
mpf->mpf_feature1);
construct_default_ISA_mptable(mpf->mpf_feature1);
mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->mpf_physptr) {
} else if (mpf->physptr) {
/*
* Read the physical hardware table. Anything here will
* override the defaults.
*/
if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
@@ -688,32 +674,32 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned reserve)
{
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
struct mpf_intel *mpf;
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
mpf = (struct intel_mp_floating *)bp;
mpf = (struct mpf_intel *)bp;
if ((*bp == SMP_MAGIC_IDENT) &&
(mpf->mpf_length == 1) &&
(mpf->length == 1) &&
!mpf_checksum((unsigned char *)bp, 16) &&
((mpf->mpf_specification == 1)
|| (mpf->mpf_specification == 4))) {
((mpf->specification == 1)
|| (mpf->specification == 4))) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
#endif
mpf_found = mpf;
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
mpf, virt_to_phys(mpf));
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
mpf, (u64)virt_to_phys(mpf));
if (!reserve)
return 1;
reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
BOOTMEM_DEFAULT);
if (mpf->mpf_physptr) {
if (mpf->physptr) {
unsigned long size = PAGE_SIZE;
#ifdef CONFIG_X86_32
/*
@@ -722,15 +708,24 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
* the bottom is mapped now.
* PC-9800's MPC table places on the very last
* of physical memory; so that simply reserving
* PAGE_SIZE from mpg->mpf_physptr yields BUG()
* PAGE_SIZE from mpf->physptr yields BUG()
* in reserve_bootmem.
* also need to make sure physptr is below than
* max_low_pfn
* we don't need reserve the area above max_low_pfn
*/
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->mpf_physptr + size > end)
size = end - mpf->mpf_physptr;
#endif
reserve_bootmem_generic(mpf->mpf_physptr, size,
if (mpf->physptr < end) {
if (mpf->physptr + size > end)
size = end - mpf->physptr;
reserve_bootmem_generic(mpf->physptr, size,
BOOTMEM_DEFAULT);
}
#else
reserve_bootmem_generic(mpf->physptr, size,
BOOTMEM_DEFAULT);
#endif
}
return 1;
@@ -809,15 +804,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
/* not legacy */
for (i = 0; i < mp_irq_entries; i++) {
if (mp_irqs[i].mp_irqtype != mp_INT)
if (mp_irqs[i].irqtype != mp_INT)
continue;
if (mp_irqs[i].mp_irqflag != 0x0f)
if (mp_irqs[i].irqflag != 0x0f)
continue;
if (mp_irqs[i].mp_srcbus != m->srcbus)
if (mp_irqs[i].srcbus != m->srcbus)
continue;
if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
if (mp_irqs[i].srcbusirq != m->srcbusirq)
continue;
if (irq_used[i]) {
/* already claimed */
@@ -922,10 +917,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
if (irq_used[i])
continue;
if (mp_irqs[i].mp_irqtype != mp_INT)
if (mp_irqs[i].irqtype != mp_INT)
continue;
if (mp_irqs[i].mp_irqflag != 0x0f)
if (mp_irqs[i].irqflag != 0x0f)
continue;
if (nr_m_spare > 0) {
@@ -1001,7 +996,7 @@ static int __init update_mp_table(void)
{
char str[16];
char oem[10];
struct intel_mp_floating *mpf;
struct mpf_intel *mpf;
struct mpc_table *mpc, *mpc_new;
if (!enable_update_mptable)
@@ -1014,19 +1009,19 @@ static int __init update_mp_table(void)
/*
* Now see if we need to go further.
*/
if (mpf->mpf_feature1 != 0)
if (mpf->feature1 != 0)
return 0;
if (!mpf->mpf_physptr)
if (!mpf->physptr)
return 0;
mpc = phys_to_virt(mpf->mpf_physptr);
mpc = phys_to_virt(mpf->physptr);
if (!smp_check_mpc(mpc, oem, str))
return 0;
printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
printk(KERN_INFO "physptr: %x\n", mpf->physptr);
if (mpc_new_phys && mpc->length > mpc_new_length) {
mpc_new_phys = 0;
@@ -1047,23 +1042,23 @@ static int __init update_mp_table(void)
}
printk(KERN_INFO "use in-positon replacing\n");
} else {
mpf->mpf_physptr = mpc_new_phys;
mpf->physptr = mpc_new_phys;
mpc_new = phys_to_virt(mpc_new_phys);
memcpy(mpc_new, mpc, mpc->length);
mpc = mpc_new;
/* check if we can modify that */
if (mpc_new_phys - mpf->mpf_physptr) {
struct intel_mp_floating *mpf_new;
if (mpc_new_phys - mpf->physptr) {
struct mpf_intel *mpf_new;
/* steal 16 bytes from [0, 1k) */
printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
mpf_new = phys_to_virt(0x400 - 16);
memcpy(mpf_new, mpf, 16);
mpf = mpf_new;
mpf->mpf_physptr = mpc_new_phys;
mpf->physptr = mpc_new_phys;
}
mpf->mpf_checksum = 0;
mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
mpf->checksum = 0;
mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
}
/*

View File

@@ -35,10 +35,10 @@
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
static struct class *msr_class;

View File

@@ -1,293 +0,0 @@
/*
* Written by: Patricia Gaughen, IBM Corporation
*
* Copyright (C) 2002, IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <gone@us.ibm.com>
*/
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/mmzone.h>
#include <linux/module.h>
#include <linux/nodemask.h>
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
#include <asm/genapic.h>
#include <asm/e820.h>
#include <asm/setup.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
/*
* Function: smp_dump_qct()
*
* Description: gets memory layout from the quad config table. This
* function also updates node_online_map with the nodes (quads) present.
*/
static void __init smp_dump_qct(void)
{
int node;
struct eachquadmem *eq;
struct sys_cfg_data *scd =
(struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
nodes_clear(node_online_map);
for_each_node(node) {
if (scd->quads_present31_0 & (1 << node)) {
node_set_online(node);
eq = &scd->eq[node];
/* Convert to pages */
node_start_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
e820_register_active_regions(node, node_start_pfn[node],
node_end_pfn[node]);
memory_present(node,
node_start_pfn[node], node_end_pfn[node]);
node_remap_size[node] = node_memmap_size_bytes(node,
node_start_pfn[node],
node_end_pfn[node]);
}
}
}
void __cpuinit numaq_tsc_disable(void)
{
if (!found_numaq)
return;
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
}
static int __init numaq_pre_time_init(void)
{
numaq_tsc_disable();
return 0;
}
int found_numaq;
/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
* doing this ....
*/
struct mpc_config_translation {
unsigned char mpc_type;
unsigned char trans_len;
unsigned char trans_type;
unsigned char trans_quad;
unsigned char trans_global;
unsigned char trans_local;
unsigned short trans_reserved;
};
/* x86_quirks member */
static int mpc_record;
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
__cpuinitdata;
static inline int generate_logical_apicid(int quad, int phys_apicid)
{
return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
}
/* x86_quirks member */
static int mpc_apic_id(struct mpc_cpu *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int logical_apicid = generate_logical_apicid(quad, m->apicid);
printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
(m->cpufeature & CPU_MODEL_MASK) >> 4,
m->apicver, quad, logical_apicid);
return logical_apicid;
}
int mp_bus_id_to_node[MAX_MP_BUSSES];
int mp_bus_id_to_local[MAX_MP_BUSSES];
/* x86_quirks member */
static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
mp_bus_id_to_node[m->busid] = quad;
mp_bus_id_to_local[m->busid] = local;
printk(KERN_INFO "Bus #%d is %s (node %d)\n",
m->busid, name, quad);
}
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
/* x86_quirks member */
static void mpc_oem_pci_bus(struct mpc_bus *m)
{
int quad = translation_table[mpc_record]->trans_quad;
int local = translation_table[mpc_record]->trans_local;
quad_local_to_mp_bus_id[quad][local] = m->busid;
}
static void __init MP_translation_info(struct mpc_config_translation *m)
{
printk(KERN_INFO
"Translation: record %d, type %d, quad %d, global %d, local %d\n",
mpc_record, m->trans_type, m->trans_quad, m->trans_global,
m->trans_local);
if (mpc_record >= MAX_MPC_ENTRY)
printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
else
translation_table[mpc_record] = m; /* stash this for later */
if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
node_set_online(m->trans_quad);
}
static int __init mpf_checksum(unsigned char *mp, int len)
{
int sum = 0;
while (len--)
sum += *mp++;
return sum & 0xFF;
}
/*
* Read/parse the MPC oem tables
*/
static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
unsigned short oemsize)
{
int count = sizeof(*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
mpc_record = 0;
printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
oemtable);
if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
printk(KERN_WARNING
"SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
oemtable->signature[0], oemtable->signature[1],
oemtable->signature[2], oemtable->signature[3]);
return;
}
if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
return;
}
while (count < oemtable->length) {
switch (*oemptr) {
case MP_TRANSLATION:
{
struct mpc_config_translation *m =
(struct mpc_config_translation *)oemptr;
MP_translation_info(m);
oemptr += sizeof(*m);
count += sizeof(*m);
++mpc_record;
break;
}
default:
{
printk(KERN_WARNING
"Unrecognised OEM table entry type! - %d\n",
(int)*oemptr);
return;
}
}
}
}
static int __init numaq_setup_ioapic_ids(void)
{
/* so can skip it */
return 1;
}
static int __init numaq_update_genapic(void)
{
genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
return 0;
}
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
.arch_pre_intr_init = NULL,
.arch_memory_setup = NULL,
.arch_intr_init = NULL,
.arch_trap_init = NULL,
.mach_get_smp_config = NULL,
.mach_find_smp_config = NULL,
.mpc_record = &mpc_record,
.mpc_apic_id = mpc_apic_id,
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
.update_genapic = numaq_update_genapic,
};
void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
{
if (strncmp(oem, "IBM NUMA", 8))
printk("Warning! Not a NUMA-Q system!\n");
else
found_numaq = 1;
}
static __init void early_check_numaq(void)
{
/*
* Find possible boot-time SMP configuration:
*/
early_find_smp_config();
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
early_get_smp_config();
if (found_numaq)
x86_quirks = &numaq_x86_quirks;
}
int __init get_memcfg_numaq(void)
{
early_check_numaq();
if (!found_numaq)
return 0;
smp_dump_qct();
return 1;
}

View File

@@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = {
};
EXPORT_SYMBOL(pv_lock_ops);
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}

View File

@@ -28,7 +28,6 @@
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/pgtable.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
@@ -44,6 +43,17 @@ void _paravirt_nop(void)
{
}
/* identity function, which can be inlined */
u32 _paravirt_ident_32(u32 x)
{
return x;
}
u64 _paravirt_ident_64(u64 x)
{
return x;
}
static void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +148,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == paravirt_nop)
else if (opfunc == _paravirt_nop)
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_32)
ret = paravirt_patch_ident_32(insnbuf, len);
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -318,10 +335,10 @@ struct pv_time_ops pv_time_ops = {
struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ,
.save_fl = native_save_fl,
.restore_fl = native_restore_fl,
.irq_disable = native_irq_disable,
.irq_enable = native_irq_enable,
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.safe_halt = native_safe_halt,
.halt = native_halt,
#ifdef CONFIG_X86_64
@@ -399,6 +416,14 @@ struct pv_apic_ops pv_apic_ops = {
#endif
};
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
/* 32-bit pagetable entries */
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
#else
/* 64-bit pagetable entries */
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
#endif
struct pv_mmu_ops pv_mmu_ops = {
#ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start,
@@ -450,22 +475,23 @@ struct pv_mmu_ops pv_mmu_ops = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
.pmd_val = native_pmd_val,
.make_pmd = native_make_pmd,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
#if PAGETABLE_LEVELS == 4
.pud_val = native_pud_val,
.make_pud = native_make_pud,
.pud_val = PTE_IDENT,
.make_pud = PTE_IDENT,
.set_pgd = native_set_pgd,
#endif
#endif /* PAGETABLE_LEVELS >= 3 */
.pte_val = native_pte_val,
.pte_flags = native_pte_flags,
.pgd_val = native_pgd_val,
.pte_val = PTE_IDENT,
.pgd_val = PTE_IDENT,
.make_pte = native_make_pte,
.make_pgd = native_make_pgd,
.make_pte = PTE_IDENT,
.make_pgd = PTE_IDENT,
.dup_mmap = paravirt_nop,
.exit_mmap = paravirt_nop,

View File

@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
/* arg in %eax, return in %eax */
return 0;
}
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
{
/* arg in %edx:%eax, return in %edx:%eax */
return 0;
}
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{

View File

@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
return paravirt_patch_insns(insnbuf, len,
start__mov32, end__mov32);
}
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
{
return paravirt_patch_insns(insnbuf, len,
start__mov64, end__mov64);
}
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{

View File

@@ -18,7 +18,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/io.h>
#include <setup_arch.h>
#include <asm/setup_arch.h>
static struct resource system_rom_resource = {
.name = "System ROM",

View File

@@ -1,8 +1,8 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/idle.h>
#include <linux/smp.h>
#include <linux/prctl.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/module.h>
@@ -11,6 +11,9 @@
#include <linux/ftrace.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -55,6 +58,192 @@ void arch_task_cache_init(void)
SLAB_PANIC, NULL);
}
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
if (me->thread.io_bitmap_ptr) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
t->io_bitmap_max = 0;
put_cpu();
}
ds_exit_thread(current);
}
void flush_thread(void)
{
struct task_struct *tsk = current;
#ifdef CONFIG_X86_64
if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
if (test_tsk_thread_flag(tsk, TIF_IA32)) {
clear_tsk_thread_flag(tsk, TIF_IA32);
} else {
set_tsk_thread_flag(tsk, TIF_IA32);
current_thread_info()->status |= TS_COMPAT;
}
}
#endif
clear_tsk_thread_flag(tsk, TIF_DEBUG);
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
tsk->thread.debugreg2 = 0;
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
static void hard_disable_TSC(void)
{
write_cr4(read_cr4() | X86_CR4_TSD);
}
void disable_TSC(void)
{
preempt_disable();
if (!test_and_set_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_disable_TSC();
preempt_enable();
}
static void hard_enable_TSC(void)
{
write_cr4(read_cr4() & ~X86_CR4_TSD);
}
static void enable_TSC(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_enable_TSC();
preempt_enable();
}
int get_tsc_mode(unsigned long adr)
{
unsigned int val;
if (test_thread_flag(TIF_NOTSC))
val = PR_TSC_SIGSEGV;
else
val = PR_TSC_ENABLE;
return put_user(val, (unsigned int __user *)adr);
}
int set_tsc_mode(unsigned int val)
{
if (val == PR_TSC_SIGSEGV)
disable_TSC();
else if (val == PR_TSC_ENABLE)
enable_TSC();
else
return -EINVAL;
return 0;
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
prev = &prev_p->thread;
next = &next_p->thread;
if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
ds_switch_to(prev_p, next_p);
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
set_debugreg(next->debugreg0, 0);
set_debugreg(next->debugreg1, 1);
set_debugreg(next->debugreg2, 2);
set_debugreg(next->debugreg3, 3);
/* no 4 and 5 */
set_debugreg(next->debugreg6, 6);
set_debugreg(next->debugreg7, 7);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
hard_disable_TSC();
else
hard_enable_TSC();
}
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
}
int sys_fork(struct pt_regs *regs)
{
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
/*
* This is trivial, and on the face of it looks like it
* could equally well be done in user mode.
*
* Not so, for quite unobvious reasons - register pressure.
* In user mode vfork() cannot have a stack frame, and if
* done by calling the "clone()" system call directly, you
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
int sys_vfork(struct pt_regs *regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
/*
* Idle related variables and functions
*/
@@ -350,7 +539,7 @@ static void c1e_idle(void)
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_SMP
#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");

View File

@@ -11,6 +11,7 @@
#include <stdarg.h>
#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -66,9 +67,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
/*
* Return saved PC of a blocked thread.
*/
@@ -94,6 +92,15 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. CPU0 already has it initialized but no harm in
* doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
@@ -108,7 +115,6 @@ void cpu_idle(void)
play_dead();
local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
/* Don't trace irqs off for idle */
stop_critical_timings();
pm_idle();
@@ -132,7 +138,7 @@ void __show_regs(struct pt_regs *regs, int all)
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
savesegment(gs, gs);
gs = get_user_gs(regs);
} else {
sp = (unsigned long) (&regs->sp);
savesegment(ss, ss);
@@ -213,6 +219,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.ds = __USER_DS;
regs.es = __USER_DS;
regs.fs = __KERNEL_PERCPU;
regs.gs = __KERNEL_STACK_CANARY;
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -223,55 +230,6 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
}
EXPORT_SYMBOL(kernel_thread);
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
tss->io_bitmap_max = 0;
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
ds_exit_thread(current);
}
void flush_thread(void)
{
struct task_struct *tsk = current;
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
tsk->thread.debugreg2 = 0;
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
clear_tsk_thread_flag(tsk, TIF_DEBUG);
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
@@ -305,7 +263,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
p->thread.ip = (unsigned long) ret_from_fork;
savesegment(gs, p->thread.gs);
task_user_gs(p) = get_user_gs(regs);
tsk = current;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -343,7 +301,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
__asm__("movl %0, %%gs" : : "r"(0));
set_user_gs(regs, 0);
regs->fs = 0;
set_fs(USER_DS);
regs->ds = __USER_DS;
@@ -359,127 +317,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
static void hard_disable_TSC(void)
{
write_cr4(read_cr4() | X86_CR4_TSD);
}
void disable_TSC(void)
{
preempt_disable();
if (!test_and_set_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_disable_TSC();
preempt_enable();
}
static void hard_enable_TSC(void)
{
write_cr4(read_cr4() & ~X86_CR4_TSD);
}
static void enable_TSC(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_enable_TSC();
preempt_enable();
}
int get_tsc_mode(unsigned long adr)
{
unsigned int val;
if (test_thread_flag(TIF_NOTSC))
val = PR_TSC_SIGSEGV;
else
val = PR_TSC_ENABLE;
return put_user(val, (unsigned int __user *)adr);
}
int set_tsc_mode(unsigned int val)
{
if (val == PR_TSC_SIGSEGV)
disable_TSC();
else if (val == PR_TSC_ENABLE)
enable_TSC();
else
return -EINVAL;
return 0;
}
static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
prev = &prev_p->thread;
next = &next_p->thread;
if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
ds_switch_to(prev_p, next_p);
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
set_debugreg(next->debugreg0, 0);
set_debugreg(next->debugreg1, 1);
set_debugreg(next->debugreg2, 2);
set_debugreg(next->debugreg3, 3);
/* no 4 and 5 */
set_debugreg(next->debugreg6, 6);
set_debugreg(next->debugreg7, 7);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
hard_disable_TSC();
else
hard_enable_TSC();
}
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Disable the bitmap via an invalid offset. We still cache
* the previous bitmap owner and the IO bitmap contents:
*/
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
return;
}
if (likely(next == tss->io_bitmap_owner)) {
/*
* Previous owner of the bitmap (hence the bitmap content)
* matches the next task, we dont have to do anything but
* to set a valid offset in the TSS:
*/
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
return;
}
/*
* Lazy TSS's I/O bitmap copy. We set an invalid offset here
* and we let the task to get a GPF in case an I/O instruction
* is performed. The handler of the GPF will verify that the
* faulting task has a valid I/O bitmap and, it true, does the
* real copy and restart the instruction. This will save us
* redundant copies when the currently switched task does not
* perform any I/O during its timeslice.
*/
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
}
/*
* switch_to(x,yn) should switch tasks from x to y.
@@ -540,7 +377,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
savesegment(gs, prev->gs);
lazy_save_gs(prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
@@ -586,64 +423,44 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
lazy_load_gs(next->gs);
x86_write_percpu(current_task, next_p);
percpu_write(current_task, next_p);
return prev_p;
}
asmlinkage int sys_fork(struct pt_regs regs)
{
return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
}
asmlinkage int sys_clone(struct pt_regs regs)
int sys_clone(struct pt_regs *regs)
{
unsigned long clone_flags;
unsigned long newsp;
int __user *parent_tidptr, *child_tidptr;
clone_flags = regs.bx;
newsp = regs.cx;
parent_tidptr = (int __user *)regs.dx;
child_tidptr = (int __user *)regs.di;
clone_flags = regs->bx;
newsp = regs->cx;
parent_tidptr = (int __user *)regs->dx;
child_tidptr = (int __user *)regs->di;
if (!newsp)
newsp = regs.sp;
return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}
/*
* This is trivial, and on the face of it looks like it
* could equally well be done in user mode.
*
* Not so, for quite unobvious reasons - register pressure.
* In user mode vfork() cannot have a stack frame, and if
* done by calling the "clone()" system call directly, you
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
asmlinkage int sys_vfork(struct pt_regs regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
}
/*
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(struct pt_regs regs)
int sys_execve(struct pt_regs *regs)
{
int error;
char *filename;
filename = getname((char __user *) regs.bx);
filename = getname((char __user *) regs->bx);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename,
(char __user * __user *) regs.cx,
(char __user * __user *) regs.dx,
&regs);
(char __user * __user *) regs->cx,
(char __user * __user *) regs->dx,
regs);
if (error == 0) {
/* Make sure we don't return using sysenter.. */
set_thread_flag(TIF_IRET);

View File

@@ -16,6 +16,7 @@
#include <stdarg.h>
#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -47,7 +48,6 @@
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/pda.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
@@ -58,6 +58,12 @@
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -76,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
void enter_idle(void)
{
write_pda(isidle, 1);
percpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
if (test_and_clear_bit_pda(0, isidle) == 0)
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
@@ -112,6 +118,16 @@ static inline void play_dead(void)
void cpu_idle(void)
{
current_thread_info()->status |= TS_POLLING;
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. CPU0 already has it initialized but no harm in
* doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick(1);
@@ -221,61 +237,6 @@ void show_regs(struct pt_regs *regs)
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
if (me->thread.io_bitmap_ptr) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
t->io_bitmap_max = 0;
put_cpu();
}
ds_exit_thread(current);
}
void flush_thread(void)
{
struct task_struct *tsk = current;
if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
if (test_tsk_thread_flag(tsk, TIF_IA32)) {
clear_tsk_thread_flag(tsk, TIF_IA32);
} else {
set_tsk_thread_flag(tsk, TIF_IA32);
current_thread_info()->status |= TS_COMPAT;
}
}
clear_tsk_thread_flag(tsk, TIF_DEBUG);
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
tsk->thread.debugreg2 = 0;
tsk->thread.debugreg3 = 0;
tsk->thread.debugreg6 = 0;
tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
*/
tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
@@ -397,7 +358,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
write_pda(oldrsp, new_sp);
percpu_write(old_rsp, new_sp);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
regs->flags = 0x200;
@@ -409,118 +370,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
static void hard_disable_TSC(void)
{
write_cr4(read_cr4() | X86_CR4_TSD);
}
void disable_TSC(void)
{
preempt_disable();
if (!test_and_set_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_disable_TSC();
preempt_enable();
}
static void hard_enable_TSC(void)
{
write_cr4(read_cr4() & ~X86_CR4_TSD);
}
static void enable_TSC(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
hard_enable_TSC();
preempt_enable();
}
int get_tsc_mode(unsigned long adr)
{
unsigned int val;
if (test_thread_flag(TIF_NOTSC))
val = PR_TSC_SIGSEGV;
else
val = PR_TSC_ENABLE;
return put_user(val, (unsigned int __user *)adr);
}
int set_tsc_mode(unsigned int val)
{
if (val == PR_TSC_SIGSEGV)
disable_TSC();
else if (val == PR_TSC_ENABLE)
enable_TSC();
else
return -EINVAL;
return 0;
}
/*
* This special macro can be used to load a debugging register
*/
#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
static inline void __switch_to_xtra(struct task_struct *prev_p,
struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
prev = &prev_p->thread,
next = &next_p->thread;
if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
ds_switch_to(prev_p, next_p);
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
loaddebug(next, 0);
loaddebug(next, 1);
loaddebug(next, 2);
loaddebug(next, 3);
/* no 4 and 5 */
loaddebug(next, 6);
loaddebug(next, 7);
}
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
if (test_tsk_thread_flag(next_p, TIF_NOTSC))
hard_disable_TSC();
else
hard_enable_TSC();
}
if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
}
/*
* switch_to(x,y) should switch tasks from x to y.
*
@@ -618,21 +467,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
write_pda(pcurrent, next_p);
prev->usersp = percpu_read(old_rsp);
percpu_write(old_rsp, next->usersp);
percpu_write(current_task, next_p);
write_pda(kernelstack,
percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - PDA_STACKOFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary);
/*
* Build time only check to make sure the stack_canary is at
* offset 40 in the pda; this is a gcc ABI requirement
*/
BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
#endif
THREAD_SIZE - KERNEL_STACK_OFFSET);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
@@ -686,11 +527,6 @@ void set_personality_64bit(void)
current->personality &= ~READ_IMPLIES_EXEC;
}
asmlinkage long sys_fork(struct pt_regs *regs)
{
return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
}
asmlinkage long
sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
@@ -700,22 +536,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
* This is trivial, and on the face of it looks like it
* could equally well be done in user mode.
*
* Not so, for quite unobvious reasons - register pressure.
* In user mode vfork() cannot have a stack frame, and if
* done by calling the "clone()" system call directly, you
* do not have enough call-clobbered registers to hold all
* the information you need.
*/
asmlinkage long sys_vfork(struct pt_regs *regs)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
NULL, NULL);
}
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;

View File

@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
if (regno > FS)
--regno;
return &regs->bx + regno;
return &regs->bx + (regno >> 2);
}
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
if (offset != offsetof(struct user_regs_struct, gs))
retval = *pt_regs_access(task_pt_regs(task), offset);
else {
retval = task->thread.gs;
if (task == current)
savesegment(gs, retval);
retval = get_user_gs(task_pt_regs(task));
else
retval = task_user_gs(task);
}
return retval;
}
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
break;
case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
* The user-mode %gs is not affected by
* kernel entry, so we must update the CPU.
*/
loadsegment(gs, value);
set_user_gs(task_pt_regs(task), value);
else
task_user_gs(task) = value;
}
return 0;
@@ -273,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
return TASK_SIZE64 - 7;
return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */

View File

@@ -14,6 +14,7 @@
#include <asm/reboot.h>
#include <asm/pci_x86.h>
#include <asm/virtext.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@@ -23,8 +24,6 @@
# include <asm/iommu.h>
#endif
#include <mach_ipi.h>
/*
* Power off function, if any
*/
@@ -658,7 +657,7 @@ static int crash_nmi_callback(struct notifier_block *self,
static void smp_send_nmi_allbutself(void)
{
send_IPI_allbutself(NMI_VECTOR);
apic->send_IPI_allbutself(NMI_VECTOR);
}
static struct notifier_block crash_nmi_nb = {

View File

@@ -7,7 +7,7 @@
*/
#include <linux/linkage.h>
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>

View File

@@ -7,10 +7,10 @@
*/
#include <linux/linkage.h>
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
#include <asm/pgtable_types.h>
/*
* Must be relocatable PIC code callable as a C function
@@ -29,122 +29,6 @@ relocate_kernel:
* %rdx start address
*/
/* map the control page at its virtual address */
movq $0x0000ff8000000000, %r10 /* mask */
mov $(39 - 3), %cl /* bits to shift */
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PGD)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PUD_0)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PUD_0)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PMD_0)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PMD_0)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PTE_0)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PTE_0)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
/* identity map the control page at its physical address */
movq $0x0000ff8000000000, %r10 /* mask */
mov $(39 - 3), %cl /* bits to shift */
movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PGD)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PUD_1)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PUD_1)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PMD_1)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PMD_1)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_PTE_1)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
shrq $9, %r10
sub $9, %cl
movq %r11, %r9
andq %r10, %r9
shrq %cl, %r9
movq PTR(VA_PTE_1)(%rsi), %r8
addq %r8, %r9
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
orq $PAGE_ATTR, %r8
movq %r8, (%r9)
relocate_new_kernel:
/* %rdi indirection_page
* %rsi page_list
* %rdx start address
*/
/* zero out flags, and disable interrupts */
pushq $0
popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
/* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
/* switch to new set of page tables */
movq PTR(PA_PGD)(%rsi), %r9
movq %r9, %cr3
/* Switch to the identity mapped page tables */
movq %rcx, %cr3
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
jmp 1f
1:
/* Switch to the identity mapped page tables,
* and flush the TLB.
*/
/* Flush the TLB (needed?) */
movq %rcx, %cr3
/* Do the copies */

View File

@@ -74,14 +74,15 @@
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/efi.h>
#include <asm/timer.h>
#include <asm/i8259.h>
#include <asm/sections.h>
#include <asm/dmi.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/vmi.h>
#include <setup_arch.h>
#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
@@ -89,7 +90,7 @@
#include <asm/system.h>
#include <asm/vsyscall.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/desc.h>
#include <asm/dma.h>
#include <asm/iommu.h>
@@ -97,7 +98,6 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <mach_apic.h>
#include <asm/paravirt.h>
#include <asm/hypervisor.h>
@@ -112,6 +112,20 @@
#define ARCH_SETUP
#endif
unsigned int boot_cpu_id __read_mostly;
#ifdef CONFIG_X86_64
int default_cpu_present_to_apicid(int mps_cpu)
{
return __default_cpu_present_to_apicid(mps_cpu);
}
int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
{
return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
}
#endif
#ifndef CONFIG_DEBUG_BOOT_PARAMS
struct boot_params __initdata boot_params;
#else
@@ -586,20 +600,7 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
static int __init default_update_genapic(void)
{
#ifdef CONFIG_X86_SMP
# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
# endif
#endif
return 0;
}
static struct x86_quirks default_x86_quirks __initdata = {
.update_genapic = default_update_genapic,
};
static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@@ -656,7 +657,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
pre_setup_arch_hook();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
@@ -824,8 +824,7 @@ void __init setup_arch(char **cmdline_p)
#else
num_physpages = max_pfn;
if (cpu_has_x2apic)
check_x2apic();
check_x2apic();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
@@ -865,9 +864,7 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
#ifdef CONFIG_X86_64
vsmp_init();
#endif
io_delay_init();
@@ -893,12 +890,11 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_reserve_bootmem();
#endif
#ifdef CONFIG_X86_FIND_SMP_CONFIG
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
reserve_crashkernel();
#ifdef CONFIG_X86_64
@@ -925,9 +921,7 @@ void __init setup_arch(char **cmdline_p)
map_vsyscall();
#endif
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
early_quirks();
@@ -978,4 +972,95 @@ void __init setup_arch(char **cmdline_p)
#endif
}
#ifdef CONFIG_X86_32
/**
* x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
*
* Description:
* Perform any necessary interrupt initialisation prior to setting up
* the "ordinary" interrupt call gates. For legacy reasons, the ISA
* interrupts should be initialised here if the machine emulates a PC
* in any way.
**/
void __init x86_quirk_pre_intr_init(void)
{
if (x86_quirks->arch_pre_intr_init) {
if (x86_quirks->arch_pre_intr_init())
return;
}
init_ISA_irqs();
}
/**
* x86_quirk_intr_init - post gate setup interrupt initialisation
*
* Description:
* Fill in any interrupts that may have been left out by the general
* init_IRQ() routine. interrupts having to do with the machine rather
* than the devices on the I/O bus (like APIC interrupts in intel MP
* systems) are started here.
**/
void __init x86_quirk_intr_init(void)
{
if (x86_quirks->arch_intr_init) {
if (x86_quirks->arch_intr_init())
return;
}
}
/**
* x86_quirk_trap_init - initialise system specific traps
*
* Description:
* Called as the final act of trap_init(). Used in VISWS to initialise
* the various board specific APIC traps.
**/
void __init x86_quirk_trap_init(void)
{
if (x86_quirks->arch_trap_init) {
if (x86_quirks->arch_trap_init())
return;
}
}
static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
.mask = CPU_MASK_NONE,
.name = "timer"
};
/**
* x86_quirk_pre_time_init - do any specific initialisations before.
*
**/
void __init x86_quirk_pre_time_init(void)
{
if (x86_quirks->arch_pre_time_init)
x86_quirks->arch_pre_time_init();
}
/**
* x86_quirk_time_init - do any specific initialisations for the system timer.
*
* Description:
* Must plug the system timer interrupt source at HZ into the IRQ listed
* in irq_vectors.h:TIMER_IRQ
**/
void __init x86_quirk_time_init(void)
{
if (x86_quirks->arch_time_init) {
/*
* A nonzero return code does not mean failure, it means
* that the architecture quirk does not want any
* generic (timer) setup to be performed after this:
*/
if (x86_quirks->arch_time_init())
return;
}
irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
#endif /* CONFIG_X86_32 */

View File

@@ -7,150 +7,356 @@
#include <linux/crash_dump.h>
#include <linux/smp.h>
#include <linux/topology.h>
#include <linux/pfn.h>
#include <asm/sections.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/highmem.h>
#include <asm/proto.h>
#include <asm/cpumask.h>
#include <asm/cpu.h>
#include <asm/stackprotector.h>
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
EXPORT_SYMBOL(boot_cpu_physical_apicid);
unsigned int max_physical_apicid;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
#endif
/* map cpu index to physical APIC ID */
DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1
/* map cpu index to node index */
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
/* which logical CPUs are on which nodes */
cpumask_t *node_to_cpumask_map;
EXPORT_SYMBOL(node_to_cpumask_map);
/* setup node_to_cpumask_map */
static void __init setup_node_to_cpumask_map(void);
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
# define DBG(x...) printk(KERN_DEBUG x)
#else
static inline void setup_node_to_cpumask_map(void) { }
# define DBG(x...)
#endif
#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
* *_early_ptr's are zeroed indicating that the static arrays are gone.
*/
static void __init setup_per_cpu_maps(void)
{
int cpu;
for_each_possible_cpu(cpu) {
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#ifdef X86_64_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
}
/* indicate the early static arrays will soon be gone */
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#ifdef X86_64_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
}
#ifdef CONFIG_X86_32
/*
* Great future not-so-futuristic plan: make i386 and x86_64 do it
* the same way
*/
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
static inline void setup_cpu_pda_map(void) { }
#elif !defined(CONFIG_SMP)
static inline void setup_cpu_pda_map(void) { }
#else /* CONFIG_SMP && CONFIG_X86_64 */
/*
* Allocate cpu_pda pointer table and array via alloc_bootmem.
*/
static void __init setup_cpu_pda_map(void)
{
char *pda;
struct x8664_pda **new_cpu_pda;
unsigned long size;
int cpu;
size = roundup(sizeof(struct x8664_pda), cache_line_size());
/* allocate cpu_pda array and pointer table */
{
unsigned long tsize = nr_cpu_ids * sizeof(void *);
unsigned long asize = size * (nr_cpu_ids - 1);
tsize = roundup(tsize, cache_line_size());
new_cpu_pda = alloc_bootmem(tsize + asize);
pda = (char *)new_cpu_pda + tsize;
}
/* initialize pointer table to static pda's */
for_each_possible_cpu(cpu) {
if (cpu == 0) {
/* leave boot cpu pda in place */
new_cpu_pda[0] = cpu_pda(0);
continue;
}
new_cpu_pda[cpu] = (struct x8664_pda *)pda;
new_cpu_pda[cpu]->in_bootmem = 1;
pda += size;
}
/* point to new pointer table */
_cpu_pda = new_cpu_pda;
}
#endif /* CONFIG_SMP && CONFIG_X86_64 */
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
#endif
/* correctly size the local cpu masks */
static void __init setup_cpu_local_masks(void)
DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
EXPORT_SYMBOL(__per_cpu_offset);
/*
* On x86_64 symbols referenced from code should be reachable using
* 32bit relocations. Reserve space for static percpu variables in
* modules so that they are always served from the first chunk which
* is located at the percpu segment base. On x86_32, anything can
* address anywhere. No need to reserve space in the first chunk.
*/
#ifdef CONFIG_X86_64
#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE
#else
#define PERCPU_FIRST_CHUNK_RESERVE 0
#endif
/**
* pcpu_need_numa - determine percpu allocation needs to consider NUMA
*
* If NUMA is not configured or there is only one NUMA node available,
* there is no reason to consider NUMA. This function determines
* whether percpu allocation should consider NUMA or not.
*
* RETURNS:
* true if NUMA should be considered; otherwise, false.
*/
static bool __init pcpu_need_numa(void)
{
alloc_bootmem_cpumask_var(&cpu_initialized_mask);
alloc_bootmem_cpumask_var(&cpu_callin_mask);
alloc_bootmem_cpumask_var(&cpu_callout_mask);
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
#ifdef CONFIG_NEED_MULTIPLE_NODES
pg_data_t *last = NULL;
unsigned int cpu;
for_each_possible_cpu(cpu) {
int node = early_cpu_to_node(cpu);
if (node_online(node) && NODE_DATA(node) &&
last && last != NODE_DATA(node))
return true;
last = NODE_DATA(node);
}
#endif
return false;
}
#else /* CONFIG_X86_32 */
static inline void setup_cpu_local_masks(void)
/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
* @cpu: cpu to allocate for
* @size: size allocation in bytes
* @align: alignment
*
* Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
* does the right thing for NUMA regardless of the current
* configuration.
*
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
unsigned long align)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NEED_MULTIPLE_NODES
int node = early_cpu_to_node(cpu);
void *ptr;
if (!node_online(node) || !NODE_DATA(node)) {
ptr = __alloc_bootmem_nopanic(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
cpu, size, __pa(ptr));
} else {
ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
size, align, goal);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
"%016lx\n", cpu, size, node, __pa(ptr));
}
return ptr;
#else
return __alloc_bootmem_nopanic(size, align, goal);
#endif
}
#endif /* CONFIG_X86_32 */
/*
* Remap allocator
*
* This allocator uses PMD page as unit. A PMD page is allocated for
* each cpu and each is remapped into vmalloc area using PMD mapping.
* As PMD page is quite large, only part of it is used for the first
* chunk. Unused part is returned to the bootmem allocator.
*
* So, the PMD pages are mapped twice - once to the physical mapping
* and to the vmalloc area for the first percpu chunk. The double
* mapping does add one more PMD TLB entry pressure but still is much
* better than only using 4k mappings while still being NUMA friendly.
*/
#ifdef CONFIG_NEED_MULTIPLE_NODES
static size_t pcpur_size __initdata;
static void **pcpur_ptrs __initdata;
static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpur_size)
return NULL;
return virt_to_page(pcpur_ptrs[cpu] + off);
}
static ssize_t __init setup_pcpu_remap(size_t static_size)
{
static struct vm_struct vm;
pg_data_t *last;
size_t ptrs_size, dyn_size;
unsigned int cpu;
ssize_t ret;
/*
* If large page isn't supported, there's no benefit in doing
* this. Also, on non-NUMA, embedding is better.
*/
if (!cpu_has_pse || pcpu_need_numa())
return -EINVAL;
last = NULL;
for_each_possible_cpu(cpu) {
int node = early_cpu_to_node(cpu);
if (node_online(node) && NODE_DATA(node) &&
last && last != NODE_DATA(node))
goto proceed;
last = NODE_DATA(node);
}
return -EINVAL;
proceed:
/*
* Currently supports only single page. Supporting multiple
* pages won't be too difficult if it ever becomes necessary.
*/
pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
if (pcpur_size > PMD_SIZE) {
pr_warning("PERCPU: static data is larger than large page, "
"can't use large page\n");
return -EINVAL;
}
dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
/* allocate pointer array and alloc large pages */
ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
pcpur_ptrs = alloc_bootmem(ptrs_size);
for_each_possible_cpu(cpu) {
pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE);
if (!pcpur_ptrs[cpu])
goto enomem;
/*
* Only use pcpur_size bytes and give back the rest.
*
* Ingo: The 2MB up-rounding bootmem is needed to make
* sure the partial 2MB page is still fully RAM - it's
* not well-specified to have a PAT-incompatible area
* (unmapped RAM, device memory, etc.) in that hole.
*/
free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
PMD_SIZE - pcpur_size);
memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
}
/* allocate address and map */
vm.flags = VM_ALLOC;
vm.size = num_possible_cpus() * PMD_SIZE;
vm_area_register_early(&vm, PMD_SIZE);
for_each_possible_cpu(cpu) {
pmd_t *pmd;
pmd = populate_extra_pmd((unsigned long)vm.addr
+ cpu * PMD_SIZE);
set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])),
PAGE_KERNEL_LARGE));
}
/* we're ready, commit */
pr_info("PERCPU: Remapped at %p with large pages, static data "
"%zu bytes\n", vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, vm.addr, NULL);
goto out_free_ar;
enomem:
for_each_possible_cpu(cpu)
if (pcpur_ptrs[cpu])
free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE);
ret = -ENOMEM;
out_free_ar:
free_bootmem(__pa(pcpur_ptrs), ptrs_size);
return ret;
}
#else
static ssize_t __init setup_pcpu_remap(size_t static_size)
{
return -EINVAL;
}
#endif
/*
* Embedding allocator
*
* The first chunk is sized to just contain the static area plus
* module and dynamic reserves and embedded into linear physical
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
static ssize_t __init setup_pcpu_embed(size_t static_size)
{
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
/*
* If large page isn't supported, there's no benefit in doing
* this. Also, embedding allocation doesn't play well with
* NUMA.
*/
if (!cpu_has_pse || pcpu_need_numa())
return -EINVAL;
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
}
/*
* 4k page allocator
*
* This is the basic allocator. Static percpu area is allocated
* page-by-page and most of initialization is done by the generic
* setup function.
*/
static struct page **pcpu4k_pages __initdata;
static int pcpu4k_nr_static_pages __initdata;
static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
{
if (pageno < pcpu4k_nr_static_pages)
return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
return NULL;
}
static void __init pcpu4k_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
static ssize_t __init setup_pcpu_4k(size_t static_size)
{
size_t pages_size;
unsigned int cpu;
int i, j;
ssize_t ret;
pcpu4k_nr_static_pages = PFN_UP(static_size);
/* unaligned allocations can't be freed, round up to page size */
pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
* sizeof(pcpu4k_pages[0]));
pcpu4k_pages = alloc_bootmem(pages_size);
/* allocate and copy */
j = 0;
for_each_possible_cpu(cpu)
for (i = 0; i < pcpu4k_nr_static_pages; i++) {
void *ptr;
ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
if (!ptr)
goto enomem;
memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
pcpu4k_pages[j++] = virt_to_page(ptr);
}
/* we're ready, commit */
pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, -1,
-1, NULL, pcpu4k_populate_pte);
goto out_free_ar;
enomem:
while (--j >= 0)
free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
ret = -ENOMEM;
out_free_ar:
free_bootmem(__pa(pcpu4k_pages), pages_size);
return ret;
}
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
struct desc_struct gdt;
pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
gdt.s = 1;
write_gdt_entry(get_cpu_gdt_table(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
/*
* Great future plan:
@@ -159,50 +365,77 @@ static inline void setup_cpu_local_masks(void)
*/
void __init setup_per_cpu_areas(void)
{
ssize_t size, old_size;
char *ptr;
int cpu;
unsigned long align = 1;
/* Setup cpu_pda map */
setup_cpu_pda_map();
/* Copy section for each CPU (we discard the original) */
old_size = PERCPU_ENOUGH_ROOM;
align = max_t(unsigned long, PAGE_SIZE, align);
size = roundup(old_size, align);
size_t static_size = __per_cpu_end - __per_cpu_start;
unsigned int cpu;
unsigned long delta;
size_t pcpu_unit_size;
ssize_t ret;
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
/*
* Allocate percpu area. If PSE is supported, try to make use
* of large page mappings. Please read comments on top of
* each allocator for details.
*/
ret = setup_pcpu_remap(static_size);
if (ret < 0)
ret = setup_pcpu_embed(static_size);
if (ret < 0)
ret = setup_pcpu_4k(static_size);
if (ret < 0)
panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
static_size, ret);
pcpu_unit_size = ret;
/* alrighty, percpu areas up and running */
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = __alloc_bootmem(size, align,
__pa(MAX_DMA_ADDRESS));
#else
int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) {
ptr = __alloc_bootmem(size, align,
__pa(MAX_DMA_ADDRESS));
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d at %016lx\n",
cpu, __pa(ptr));
} else {
ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
__pa(MAX_DMA_ADDRESS));
pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
cpu, node, __pa(ptr));
}
per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/*
* Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These
* arrays then become expendable and the *_early_ptr's
* are zeroed indicating that the static arrays are
* gone.
*/
#ifdef CONFIG_X86_LOCAL_APIC
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
#endif
/*
* Up to this point, the boot CPU has been using .data.init
* area. Reload any changed state for the boot CPU.
*/
if (cpu == boot_cpu_id)
switch_to_new_gdt(cpu);
}
/* Setup percpu data maps */
setup_per_cpu_maps();
/* indicate the early static arrays will soon be gone */
#ifdef CONFIG_X86_LOCAL_APIC
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
@@ -210,199 +443,3 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
}
#endif
#ifdef X86_64_NUMA
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
* Note: node_to_cpumask() is not valid until after this is done.
*/
static void __init setup_node_to_cpumask_map(void)
{
unsigned int node, num = 0;
cpumask_t *map;
/* setup nr_node_ids if not done yet */
if (nr_node_ids == MAX_NUMNODES) {
for_each_node_mask(node, node_possible_map)
num = node;
nr_node_ids = num + 1;
}
/* allocate the map */
map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
pr_debug("Node to cpumask map at %p for %d nodes\n",
map, nr_node_ids);
/* node_to_cpumask() will now work */
node_to_cpumask_map = map;
}
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
if (cpu_pda(cpu) && node != NUMA_NO_NODE)
cpu_pda(cpu)->nodenumber = node;
if (cpu_to_node_map)
cpu_to_node_map[cpu] = node;
else if (per_cpu_offset(cpu))
per_cpu(x86_cpu_to_node_map, cpu) = node;
else
pr_debug("Setting node for non-present cpu %d\n", cpu);
}
void __cpuinit numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
void __cpuinit numa_add_cpu(int cpu)
{
cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
void __cpuinit numa_remove_cpu(int cpu)
{
cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
}
#else /* CONFIG_DEBUG_PER_CPU_MAPS */
/*
* --------- debug versions of the numa functions ---------
*/
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
int node = cpu_to_node(cpu);
cpumask_t *mask;
char buf[64];
if (node_to_cpumask_map == NULL) {
printk(KERN_ERR "node_to_cpumask_map NULL\n");
dump_stack();
return;
}
mask = &node_to_cpumask_map[node];
if (enable)
cpu_set(cpu, *mask);
else
cpu_clear(cpu, *mask);
cpulist_scnprintf(buf, sizeof(buf), mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
}
void __cpuinit numa_add_cpu(int cpu)
{
numa_set_cpumask(cpu, 1);
}
void __cpuinit numa_remove_cpu(int cpu)
{
numa_set_cpumask(cpu, 0);
}
int cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
printk(KERN_WARNING
"cpu_to_node(%d): usage too early!\n", cpu);
dump_stack();
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
EXPORT_SYMBOL(cpu_to_node);
/*
* Same function as cpu_to_node() but used if called before the
* per_cpu areas are setup.
*/
int early_cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (!per_cpu_offset(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
return NUMA_NO_NODE;
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
/* empty cpumask */
static const cpumask_t cpu_mask_none;
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
const cpumask_t *cpumask_of_node(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"cpumask_of_node(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
return (const cpumask_t *)&cpu_online_map;
}
if (node >= nr_node_ids) {
printk(KERN_WARNING
"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
return &cpu_mask_none;
}
return &node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
/*
* Returns a bitmask of CPUs on Node 'node'.
*
* Side note: this function creates the returned cpumask on the stack
* so with a high NR_CPUS count, excessive stack space is used. The
* node_to_cpumask_ptr function should be used whenever possible.
*/
cpumask_t node_to_cpumask(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
dump_stack();
return cpu_online_map;
}
if (node >= nr_node_ids) {
printk(KERN_WARNING
"node_to_cpumask(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
return cpu_mask_none;
}
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(node_to_cpumask);
/*
* --------- end of debug versions of the numa functions ---------
*/
#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
#endif /* X86_64_NUMA */

View File

@@ -50,27 +50,23 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
} while (0)
#define COPY_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp; \
}
#define GET_SEG(seg) ({ \
unsigned short tmp; \
get_user_ex(tmp, &sc->seg); \
tmp; \
})
#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define COPY_SEG(seg) do { \
regs->seg = GET_SEG(seg); \
} while (0)
#define GET_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
loadsegment(seg, tmp); \
}
#define COPY_SEG_CPL3(seg) do { \
regs->seg = GET_SEG(seg) | 3; \
} while (0)
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
get_user_try {
#ifdef CONFIG_X86_32
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
set_user_gs(regs, GET_SEG(gs));
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
#endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
#ifdef CONFIG_X86_64
COPY(r8);
COPY(r9);
COPY(r10);
COPY(r11);
COPY(r12);
COPY(r13);
COPY(r14);
COPY(r15);
COPY(r8);
COPY(r9);
COPY(r10);
COPY(r11);
COPY(r12);
COPY(r13);
COPY(r14);
COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
get_user_ex(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= __get_user(*pax, &sc->ax);
return err;
}
@@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
{
int err = 0;
#ifdef CONFIG_X86_32
{
unsigned int tmp;
put_user_try {
savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
}
err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
#ifdef CONFIG_X86_32
put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
put_user_ex(regs->es, (unsigned int __user *)&sc->es);
put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
#endif /* CONFIG_X86_32 */
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
err |= __put_user(regs->sp, &sc->sp);
err |= __put_user(regs->bx, &sc->bx);
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
put_user_ex(regs->di, &sc->di);
put_user_ex(regs->si, &sc->si);
put_user_ex(regs->bp, &sc->bp);
put_user_ex(regs->sp, &sc->sp);
put_user_ex(regs->bx, &sc->bx);
put_user_ex(regs->dx, &sc->dx);
put_user_ex(regs->cx, &sc->cx);
put_user_ex(regs->ax, &sc->ax);
#ifdef CONFIG_X86_64
err |= __put_user(regs->r8, &sc->r8);
err |= __put_user(regs->r9, &sc->r9);
err |= __put_user(regs->r10, &sc->r10);
err |= __put_user(regs->r11, &sc->r11);
err |= __put_user(regs->r12, &sc->r12);
err |= __put_user(regs->r13, &sc->r13);
err |= __put_user(regs->r14, &sc->r14);
err |= __put_user(regs->r15, &sc->r15);
put_user_ex(regs->r8, &sc->r8);
put_user_ex(regs->r9, &sc->r9);
put_user_ex(regs->r10, &sc->r10);
put_user_ex(regs->r11, &sc->r11);
put_user_ex(regs->r12, &sc->r12);
put_user_ex(regs->r13, &sc->r13);
put_user_ex(regs->r14, &sc->r14);
put_user_ex(regs->r15, &sc->r15);
#endif /* CONFIG_X86_64 */
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
put_user_ex(current->thread.trap_no, &sc->trapno);
put_user_ex(current->thread.error_code, &sc->err);
put_user_ex(regs->ip, &sc->ip);
#ifdef CONFIG_X86_32
err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->sp, &sc->sp_at_signal);
put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
#else /* !CONFIG_X86_32 */
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->cs, &sc->cs);
err |= __put_user(0, &sc->gs);
err |= __put_user(0, &sc->fs);
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
#endif /* CONFIG_X86_32 */
err |= __put_user(fpstate, &sc->fpstate);
put_user_ex(fpstate, &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
err |= __put_user(current->thread.cr2, &sc->cr2);
/* non-iBCS2 extensions.. */
put_user_ex(mask, &sc->oldmask);
put_user_ex(current->thread.cr2, &sc->cr2);
} put_user_catch(err);
return err;
}
@@ -189,6 +187,71 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
/*
* Set up a signal frame.
*/
/*
* Determine which stack to use..
*/
static unsigned long align_sigframe(unsigned long sp)
{
#ifdef CONFIG_X86_32
/*
* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0.
*/
sp = ((sp + 4) & -16ul) - 4;
#else /* !CONFIG_X86_32 */
sp = round_down(sp, 16) - 8;
#endif
return sp;
}
static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate)
{
/* Default to using normal stack */
unsigned long sp = regs->sp;
#ifdef CONFIG_X86_64
/* redzone */
sp -= 128;
#endif /* CONFIG_X86_64 */
/*
* If we are on the alternate signal stack and would overflow it, don't.
* Return an always-bogus address instead so we will die with SIGSEGV.
*/
if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
return (void __user *) -1L;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
} else {
#ifdef CONFIG_X86_32
/* This is the legacy signal stack switching. */
if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer;
#endif /* CONFIG_X86_32 */
}
if (used_math()) {
sp -= sig_xstate_size;
#ifdef CONFIG_X86_64
sp = round_down(sp, 64);
#endif /* CONFIG_X86_64 */
*fpstate = (void __user *)sp;
if (save_i387_xstate(*fpstate) < 0)
return (void __user *)-1L;
}
return (void __user *)align_sigframe(sp - frame_size);
}
#ifdef CONFIG_X86_32
static const struct {
u16 poplmovl;
@@ -212,54 +275,6 @@ static const struct {
0
};
/*
* Determine which stack to use..
*/
static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void **fpstate)
{
unsigned long sp;
/* Default to using normal stack */
sp = regs->sp;
/*
* If we are on the alternate signal stack and would overflow it, don't.
* Return an always-bogus address instead so we will die with SIGSEGV.
*/
if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
return (void __user *) -1L;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
} else {
/* This is the legacy signal stack switching. */
if ((regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer;
}
if (used_math()) {
sp = sp - sig_xstate_size;
*fpstate = (struct _fpstate *) sp;
if (save_i387_xstate(*fpstate) < 0)
return (void __user *)-1L;
}
sp -= frame_size;
/*
* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0.
*/
sp = ((sp + 4) & -16ul) - 4;
return (void __user *) sp;
}
static int
__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
struct pt_regs *regs)
@@ -336,43 +351,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
err |= __put_user(sig, &frame->sig);
err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
return -EFAULT;
put_user_try {
put_user_ex(sig, &frame->sig);
put_user_ex(&frame->info, &frame->pinfo);
put_user_ex(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
/* Create the ucontext. */
if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
err |= __put_user(restorer, &frame->pretcode);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
put_user_ex(restorer, &frame->pretcode);
/*
* This is movl $__NR_rt_sigreturn, %ax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
/*
* This is movl $__NR_rt_sigreturn, %ax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
} put_user_catch(err);
if (err)
return -EFAULT;
@@ -392,24 +405,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return 0;
}
#else /* !CONFIG_X86_32 */
/*
* Determine which stack to use..
*/
static void __user *
get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
{
/* Default to using normal stack - redzone*/
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
}
return (void __user *)round_down(sp - size, 64);
}
static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
@@ -418,15 +413,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
int err = 0;
struct task_struct *me = current;
if (used_math()) {
fp = get_stack(ka, regs->sp, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
return -EFAULT;
} else
frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -436,28 +423,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return -EFAULT;
}
/* Create the ucontext. */
if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
put_user_try {
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
if (ka->sa.sa_flags & SA_RESTORER) {
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
return -EFAULT;
}
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
if (ka->sa.sa_flags & SA_RESTORER) {
put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
err |= -EFAULT;
}
} put_user_catch(err);
if (err)
return -EFAULT;
@@ -509,31 +498,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
int ret = 0;
if (act) {
old_sigset_t mask;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
if (!access_ok(VERIFY_READ, act, sizeof(*act)))
return -EFAULT;
__get_user(new_ka.sa.sa_flags, &act->sa_flags);
__get_user(mask, &act->sa_mask);
get_user_try {
get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
get_user_ex(mask, &act->sa_mask);
get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
} get_user_catch(ret);
if (ret)
return -EFAULT;
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
return -EFAULT;
__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
put_user_try {
put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
} put_user_catch(ret);
if (ret)
return -EFAULT;
}
return ret;
@@ -541,14 +540,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_32
asmlinkage int sys_sigaltstack(unsigned long bx)
int sys_sigaltstack(struct pt_regs *regs)
{
/*
* This is needed to make gcc realize it doesn't own the
* "struct pt_regs"
*/
struct pt_regs *regs = (struct pt_regs *)&bx;
const stack_t __user *uss = (const stack_t __user *)bx;
const stack_t __user *uss = (const stack_t __user *)regs->bx;
stack_t __user *uoss = (stack_t __user *)regs->cx;
return do_sigaltstack(uss, uoss, regs->sp);
@@ -566,14 +560,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
unsigned long sys_sigreturn(struct pt_regs *regs)
{
struct sigframe __user *frame;
struct pt_regs *regs;
unsigned long ax;
sigset_t set;
regs = (struct pt_regs *) &__unused;
frame = (struct sigframe __user *)(regs->sp - 8);
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -600,7 +592,7 @@ badframe:
}
#endif /* CONFIG_X86_32 */
static long do_rt_sigreturn(struct pt_regs *regs)
long sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
unsigned long ax;
@@ -631,25 +623,6 @@ badframe:
return 0;
}
#ifdef CONFIG_X86_32
/*
* Note: do not pass in pt_regs directly as with tail-call optimization
* GCC will incorrectly stomp on the caller's frame and corrupt user-space
* register state:
*/
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
#endif /* CONFIG_X86_32 */
/*
* OK, we're invoking a handler:
*/

View File

@@ -2,7 +2,7 @@
* Intel SMP support routines.
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
* (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* (c) 2002,2003 Andi Kleen, SuSE Labs.
*
* i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
@@ -26,8 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <mach_ipi.h>
#include <mach_apic.h>
#include <asm/apic.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -118,12 +117,12 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
void native_send_call_func_ipi(const struct cpumask *mask)
@@ -131,7 +130,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
cpumask_var_t allbutself;
if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
return;
}
@@ -140,9 +139,9 @@ void native_send_call_func_ipi(const struct cpumask *mask)
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
free_cpumask_var(allbutself);
}

View File

@@ -2,7 +2,7 @@
* x86 SMP booting functions
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
* (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
* Copyright 2001 Andi Kleen, SuSE Labs.
*
* Much of the core SMP work is based on previous work by Thomas Radke, to
@@ -53,7 +53,6 @@
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
#include <asm/numa.h>
@@ -61,13 +60,12 @@
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
#include <asm/smpboot_hooks.h>
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
@@ -114,7 +112,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
static atomic_t init_deasserted;
atomic_t init_deasserted;
/* Set if we find a B stepping CPU */
@@ -163,7 +161,7 @@ static void map_cpu_to_logical_apicid(void)
{
int cpu = smp_processor_id();
int apicid = logical_smp_processor_id();
int node = apicid_to_node(apicid);
int node = apic->apicid_to_node(apicid);
if (!node_online(node))
node = first_online_node;
@@ -196,7 +194,8 @@ static void __cpuinit smp_callin(void)
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
*/
wait_for_init_deassert(&init_deasserted);
if (apic->wait_for_init_deassert)
apic->wait_for_init_deassert(&init_deasserted);
/*
* (This works even if the APIC is not enabled.)
@@ -243,7 +242,8 @@ static void __cpuinit smp_callin(void)
*/
pr_debug("CALLIN, before setup_local_APIC().\n");
smp_callin_clear_local_apic();
if (apic->smp_callin_clear_local_apic)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
map_cpu_to_logical_apicid();
@@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
/* Target chip */
/* Boot on the stack */
/* Kick the second */
apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -614,12 +614,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
send_status = uv_wakeup_secondary(phys_apicid, start_eip);
atomic_set(&init_deasserted, 1);
return send_status;
}
maxlvt = lapic_get_maxlvt();
/*
@@ -745,78 +739,23 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
#ifdef CONFIG_X86_64
/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
{
if (!after_bootmem)
free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
}
/*
* Allocate node local memory for the AP pda.
*
* Must be called after the _cpu_pda pointer table is initialized.
*/
int __cpuinit get_local_pda(int cpu)
{
struct x8664_pda *oldpda, *newpda;
unsigned long size = sizeof(struct x8664_pda);
int node = cpu_to_node(cpu);
if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
return 0;
oldpda = cpu_pda(cpu);
newpda = kmalloc_node(size, GFP_ATOMIC, node);
if (!newpda) {
printk(KERN_ERR "Could not allocate node local PDA "
"for CPU %d on node %d\n", cpu, node);
if (oldpda)
return 0; /* have a usable pda */
else
return -1;
}
if (oldpda) {
memcpy(newpda, oldpda, size);
free_bootmem_pda(oldpda);
}
newpda->in_bootmem = 0;
cpu_pda(cpu) = newpda;
return 0;
}
#endif /* CONFIG_X86_64 */
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
* Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
static int __cpuinit do_boot_cpu(int apicid, int cpu)
{
unsigned long boot_error = 0;
int timeout;
unsigned long start_ip;
unsigned short nmi_high = 0, nmi_low = 0;
int timeout;
struct create_idle c_idle = {
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
INIT_WORK(&c_idle.work, do_fork_idle);
#ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */
if (cpu > 0) {
boot_error = get_local_pda(cpu);
if (boot_error)
goto restore_state;
/* if can't get pda memory, can't start cpu */
}
#endif
INIT_WORK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -847,14 +786,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
set_idle_for_cpu(cpu, c_idle.idle);
do_rest:
#ifdef CONFIG_X86_32
per_cpu(current_task, cpu) = c_idle.idle;
init_gdt(cpu);
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(c_idle.idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
@@ -878,8 +819,6 @@ do_rest:
pr_debug("Setting warm reset code and vector.\n");
store_NMI_vector(&nmi_high, &nmi_low);
smpboot_setup_warm_reset_vector(start_ip);
/*
* Be paranoid about clearing APIC errors.
@@ -891,9 +830,13 @@ do_rest:
}
/*
* Starting actual IPI sequence...
* Kick the secondary CPU. Use the method in the APIC driver
* if it's defined - or use an INIT boot APIC message otherwise:
*/
boot_error = wakeup_secondary_cpu(apicid, start_ip);
if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
if (!boot_error) {
/*
@@ -927,13 +870,11 @@ do_rest:
else
/* trampoline code not run */
printk(KERN_ERR "Not responding.\n");
if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
inquire_remote_apic(apicid);
if (apic->inquire_remote_apic)
apic->inquire_remote_apic(apicid);
}
}
#ifdef CONFIG_X86_64
restore_state:
#endif
if (boot_error) {
/* Try to put things back the way they were before ... */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -961,7 +902,7 @@ restore_state:
int __cpuinit native_cpu_up(unsigned int cpu)
{
int apicid = cpu_present_to_apicid(cpu);
int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
int err;
@@ -1054,14 +995,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
{
preempt_disable();
#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
if (def_to_bigsmp && nr_cpu_ids > 8) {
unsigned int cpu;
unsigned nr;
printk(KERN_WARNING
"More than 8 CPUs detected - skipping them.\n"
"Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
"Use CONFIG_X86_BIGSMP.\n");
nr = 0;
for_each_present_cpu(cpu) {
@@ -1107,7 +1048,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
*/
if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
printk(KERN_NOTICE
"weird, boot CPU (#%d) not listed by the BIOS.\n",
boot_cpu_physical_apicid);
@@ -1125,6 +1066,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
printk(KERN_ERR "... forcing use of dummy APIC emulation."
"(tell your hw vendor)\n");
smpboot_clear_io_apic();
arch_disable_smp_support();
return -1;
}
@@ -1181,9 +1123,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
#ifdef CONFIG_X86_64
enable_IR_x2apic();
setup_apic_routing();
#ifdef CONFIG_X86_64
default_setup_apic_routing();
#endif
if (smp_sanity_check(max_cpus) < 0) {
@@ -1207,18 +1149,18 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
*/
setup_local_APIC();
#ifdef CONFIG_X86_64
/*
* Enable IO APIC before setting up error vector
*/
if (!skip_ioapic_setup && nr_ioapics)
enable_IO_APIC();
#endif
end_local_APIC_setup();
map_cpu_to_logical_apicid();
setup_portio_remap();
if (apic->setup_portio_remap)
apic->setup_portio_remap();
smpboot_setup_io_apic();
/*
@@ -1240,10 +1182,7 @@ out:
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
#ifdef CONFIG_X86_32
init_gdt(me);
#endif
switch_to_new_gdt();
switch_to_new_gdt(me);
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
per_cpu(cpu_state, me) = CPU_ONLINE;

View File

@@ -1,30 +0,0 @@
/*
* SMP stuff which is common to all sub-architectures.
*/
#include <linux/module.h>
#include <asm/smp.h>
#ifdef CONFIG_X86_32
DEFINE_PER_CPU(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
/*
* Initialize the CPU's GDT. This is either the boot CPU doing itself
* (still using the master per-cpu area), or a CPU doing it for a
* secondary which will soon come up.
*/
__cpuinit void init_gdt(int cpu)
{
struct desc_struct gdt;
pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
gdt.s = 1;
write_gdt_entry(get_cpu_gdt_table(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(cpu_number, cpu) = cpu;
}
#endif

View File

@@ -1,7 +1,7 @@
/*
* Stack trace management functions
*
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>

View File

@@ -1,188 +0,0 @@
/*
* IBM Summit-Specific Code
*
* Written By: Matthew Dobson, IBM Corporation
*
* Copyright (c) 2003 IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <colpatch@us.ibm.com>
*
*/
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/bios_ebda.h>
#include <asm/summit/mpparse.h>
static struct rio_table_hdr *rio_table_hdr __initdata;
static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
#ifndef CONFIG_X86_NUMAQ
static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
#endif
static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
{
int twister = 0, node = 0;
int i, bus, num_buses;
for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
twister = rio_devs[i]->owner_id;
break;
}
}
if (i == rio_table_hdr->num_rio_dev) {
printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
return last_bus;
}
for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
if (scal_devs[i]->node_id == twister) {
node = scal_devs[i]->node_id;
break;
}
}
if (i == rio_table_hdr->num_scal_dev) {
printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
return last_bus;
}
switch (rio_devs[wpeg_num]->type) {
case CompatWPEG:
/*
* The Compatibility Winnipeg controls the 2 legacy buses,
* the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
* a PCI-PCI bridge card is used in either slot: total 5 buses.
*/
num_buses = 5;
break;
case AltWPEG:
/*
* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
* each], their 2 "extra" buses, the 100MHz bus [2 slots] and
* the "extra" buses for each of those slots: total 7 buses.
*/
num_buses = 7;
break;
case LookOutAWPEG:
case LookOutBWPEG:
/*
* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
* & the "extra" buses for each of those slots: total 9 buses.
*/
num_buses = 9;
break;
default:
printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
return last_bus;
}
for (bus = last_bus; bus < last_bus + num_buses; bus++)
mp_bus_id_to_node[bus] = node;
return bus;
}
static int __init build_detail_arrays(void)
{
unsigned long ptr;
int i, scal_detail_size, rio_detail_size;
if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
return 0;
}
switch (rio_table_hdr->version) {
default:
printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
return 0;
case 2:
scal_detail_size = 11;
rio_detail_size = 13;
break;
case 3:
scal_detail_size = 12;
rio_detail_size = 15;
break;
}
ptr = (unsigned long)rio_table_hdr + 3;
for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
scal_devs[i] = (struct scal_detail *)ptr;
for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
rio_devs[i] = (struct rio_detail *)ptr;
return 1;
}
void __init setup_summit(void)
{
unsigned long ptr;
unsigned short offset;
int i, next_wpeg, next_bus = 0;
/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
ptr = get_bios_ebda();
ptr = (unsigned long)phys_to_virt(ptr);
rio_table_hdr = NULL;
offset = 0x180;
while (offset) {
/* The block id is stored in the 2nd word */
if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
/* set the pointer past the offset & block id */
rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
break;
}
/* The next offset is stored in the 1st word. 0 means no more */
offset = *((unsigned short *)(ptr + offset));
}
if (!rio_table_hdr) {
printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
return;
}
if (!build_detail_arrays())
return;
/* The first Winnipeg we're looking for has an index of 0 */
next_wpeg = 0;
do {
for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
/* It's the Winnipeg we're looking for! */
next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
next_wpeg++;
break;
}
}
/*
* If we go through all Rio devices and don't find one with
* the next index, it means we've found all the Winnipegs,
* and thus all the PCI buses.
*/
if (i == rio_table_hdr->num_rio_dev)
next_wpeg = 0;
} while (next_wpeg != 0);
}

View File

@@ -1,7 +1,7 @@
ENTRY(sys_call_table)
.long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
.long sys_exit
.long sys_fork
.long ptregs_fork
.long sys_read
.long sys_write
.long sys_open /* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
.long sys_creat
.long sys_link
.long sys_unlink /* 10 */
.long sys_execve
.long ptregs_execve
.long sys_chdir
.long sys_time
.long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
.long sys_newlstat
.long sys_newfstat
.long sys_uname
.long sys_iopl /* 110 */
.long ptregs_iopl /* 110 */
.long sys_vhangup
.long sys_ni_syscall /* old "idle" system call */
.long sys_vm86old
.long ptregs_vm86old
.long sys_wait4
.long sys_swapoff /* 115 */
.long sys_sysinfo
.long sys_ipc
.long sys_fsync
.long sys_sigreturn
.long sys_clone /* 120 */
.long ptregs_sigreturn
.long ptregs_clone /* 120 */
.long sys_setdomainname
.long sys_newuname
.long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
.long sys_mremap
.long sys_setresuid16
.long sys_getresuid16 /* 165 */
.long sys_vm86
.long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
.long sys_nfsservctl
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
.long sys_rt_sigreturn
.long ptregs_rt_sigreturn
.long sys_rt_sigaction
.long sys_rt_sigprocmask /* 175 */
.long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
.long sys_getcwd
.long sys_capget
.long sys_capset /* 185 */
.long sys_sigaltstack
.long ptregs_sigaltstack
.long sys_sendfile
.long sys_ni_syscall /* reserved for streams1 */
.long sys_ni_syscall /* reserved for streams2 */
.long sys_vfork /* 190 */
.long ptregs_vfork /* 190 */
.long sys_getrlimit
.long sys_mmap2
.long sys_truncate64

View File

@@ -33,12 +33,12 @@
#include <linux/time.h>
#include <linux/mca.h>
#include <asm/arch_hooks.h>
#include <asm/setup.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/timer.h>
#include "do_timer.h"
#include <asm/do_timer.h>
int timer_ack;
@@ -118,7 +118,7 @@ void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
time_init_hook();
x86_quirk_time_init();
}
/*
@@ -131,7 +131,7 @@ void __init hpet_time_init(void)
*/
void __init time_init(void)
{
pre_time_init_hook();
x86_quirk_pre_time_init();
tsc_init();
late_time_init = choose_time_init();
}

View File

@@ -1,256 +0,0 @@
#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <asm/tlbflush.h>
DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
____cacheline_aligned = { &init_mm, 0, };
/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
*/
static cpumask_t flush_cpumask;
static struct mm_struct *flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
void leave_mm(int cpu)
{
BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
/*
*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu_tlbstate[].active_mm
* Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu_tlbstate[].active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu_tlbstate to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
* The interrupt must handle 2 special cases:
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
* runs in kernel space, the cpu could load tlb entries for user space
* pages.
*
* The good news is that cpu_tlbstate is local to each cpu, no
* write/read ordering problems.
*/
/*
* TLB flush IPI:
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*/
void smp_invalidate_interrupt(struct pt_regs *regs)
{
unsigned long cpu;
cpu = get_cpu();
if (!cpu_isset(cpu, flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG();
*/
if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
} else
leave_mm(cpu);
}
ack_APIC_irq();
smp_mb__before_clear_bit();
cpu_clear(cpu, flush_cpumask);
smp_mb__after_clear_bit();
out:
put_cpu_no_resched();
inc_irq_stat(irq_tlb_count);
}
void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
unsigned long va)
{
cpumask_t cpumask = *cpumaskp;
/*
* A couple of (to be removed) sanity checks:
*
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
#ifdef CONFIG_HOTPLUG_CPU
/* If a CPU which we ran on has gone down, OK. */
cpus_and(cpumask, cpumask, cpu_online_map);
if (unlikely(cpus_empty(cpumask)))
return;
#endif
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
* AK: x86-64 has a faster method that could be ported.
*/
spin_lock(&tlbstate_lock);
flush_mm = mm;
flush_va = va;
cpus_or(flush_cpumask, cpumask, flush_cpumask);
/*
* Make the above memory operations globally visible before
* sending the IPI.
*/
smp_mb();
/*
* We have to send the IPI only to
* CPUs affected.
*/
send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
cpu_relax();
flush_mm = NULL;
flush_va = 0;
spin_unlock(&tlbstate_lock);
}
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
local_flush_tlb();
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm(struct mm_struct *mm)
{
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
local_flush_tlb();
else
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
__flush_tlb_one(va);
else
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, va);
preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_page);
static void do_flush_tlb_all(void *info)
{
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
void reset_lazy_tlbstate(void)
{
int cpu = raw_smp_processor_id();
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}

View File

@@ -1,284 +0,0 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/interrupt.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apicdef.h>
#include <asm/idle.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
*
* More scalable flush, from Andi Kleen
*
* To avoid global state use 8 different call vectors.
* Each CPU uses a specific vector to trigger flushes on other
* CPUs. Depending on the received vector the target CPUs look into
* the right per cpu variable for the flush data.
*
* With more than 8 CPUs they are hashed to the 8 available
* vectors. The limited global vector space forces us to this right now.
* In future when interrupts are split into per CPU domains this could be
* fixed, at the cost of triggering multiple IPIs in some cases.
*/
union smp_flush_state {
struct {
cpumask_t flush_cpumask;
struct mm_struct *flush_mm;
unsigned long flush_va;
spinlock_t tlbstate_lock;
};
char pad[SMP_CACHE_BYTES];
} ____cacheline_aligned;
/* State is put into the per CPU data section, but padded
to a full cache line because other CPUs can access it and we don't
want false sharing in the per cpu data segment. */
static DEFINE_PER_CPU(union smp_flush_state, flush_state);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*/
void leave_mm(int cpu)
{
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
/*
*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu mmu_state to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu active_mm
* Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu mmu_state to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
* The interrupt must handle 2 special cases:
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
* runs in kernel space, the cpu could load tlb entries for user space
* pages.
*
* The good news is that cpu mmu_state is local to each cpu, no
* write/read ordering problems.
*/
/*
* TLB flush IPI:
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*
* Interrupts are disabled.
*/
asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
{
int cpu;
int sender;
union smp_flush_state *f;
cpu = smp_processor_id();
/*
* orig_rax contains the negated interrupt vector.
* Use that to determine where the sender put the data.
*/
sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
f = &per_cpu(flush_state, sender);
if (!cpu_isset(cpu, f->flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG();
*/
if (f->flush_mm == read_pda(active_mm)) {
if (read_pda(mmu_state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(f->flush_va);
} else
leave_mm(cpu);
}
out:
ack_APIC_irq();
cpu_clear(cpu, f->flush_cpumask);
inc_irq_stat(irq_tlb_count);
}
void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
unsigned long va)
{
int sender;
union smp_flush_state *f;
cpumask_t cpumask = *cpumaskp;
if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
return;
/* Caller has disabled preemption */
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
f = &per_cpu(flush_state, sender);
/*
* Could avoid this lock when
* num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
* probably not worth checking this for a cache-hot lock.
*/
spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
/*
* Make the above memory operations globally visible before
* sending the IPI.
*/
smp_mb();
/*
* We have to send the IPI only to
* CPUs affected.
*/
send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
while (!cpus_empty(f->flush_cpumask))
cpu_relax();
f->flush_mm = NULL;
f->flush_va = 0;
spin_unlock(&f->tlbstate_lock);
}
static int __cpuinit init_smp_flush(void)
{
int i;
for_each_possible_cpu(i)
spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
return 0;
}
core_initcall(init_smp_flush);
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
local_flush_tlb();
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm(struct mm_struct *mm)
{
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
local_flush_tlb();
else
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
cpumask_t cpu_mask;
preempt_disable();
cpu_mask = mm->cpu_vm_mask;
cpu_clear(smp_processor_id(), cpu_mask);
if (current->active_mm == mm) {
if (current->mm)
__flush_tlb_one(va);
else
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, va);
preempt_enable();
}
static void do_flush_tlb_all(void *info)
{
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (read_pda(mmu_state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, NULL, 1);
}

View File

@@ -11,16 +11,15 @@
#include <linux/kernel.h>
#include <asm/mmu_context.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
#include <asm/genapic.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/tsc.h>
#include <asm/irq_vectors.h>
#include <mach_apic.h>
static struct bau_control **uv_bau_table_bases __read_mostly;
static int uv_bau_retry_limit __read_mostly;
@@ -210,14 +209,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
*
* Send a broadcast and wait for a broadcast message to complete.
*
* The cpumaskp mask contains the cpus the broadcast was sent to.
* The flush_mask contains the cpus the broadcast was sent to.
*
* Returns 1 if all remote flushing was done. The mask is zeroed.
* Returns 0 if some remote flushing remains to be done. The mask is left
* unchanged.
* Returns NULL if all remote flushing was done. The mask is zeroed.
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
cpumask_t *cpumaskp)
const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
@@ -257,66 +257,74 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
* the cpu's, all of which are still in the mask.
*/
__get_cpu_var(ptcstats).ptc_i++;
return 0;
return flush_mask;
}
/*
* Success, so clear the remote cpu's from the mask so we don't
* use the IPI method of shootdown on them.
*/
for_each_cpu_mask(bit, *cpumaskp) {
for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
if (blade == this_blade)
continue;
cpu_clear(bit, *cpumaskp);
cpumask_clear_cpu(bit, flush_mask);
}
if (!cpus_empty(*cpumaskp))
return 0;
return 1;
if (!cpumask_empty(flush_mask))
return flush_mask;
return NULL;
}
/**
* uv_flush_tlb_others - globally purge translation cache of a virtual
* address or all TLB's
* @cpumaskp: mask of all cpu's in which the address is to be removed
* @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
* @cpu: the current cpu
*
* This is the entry point for initiating any UV global TLB shootdown.
*
* Purges the translation caches of all specified processors of the given
* virtual address, or purges all TLB's on specified processors.
*
* The caller has derived the cpumaskp from the mm_struct and has subtracted
* the local cpu from the mask. This function is called only if there
* are bits set in the mask. (e.g. flush_tlb_page())
* The caller has derived the cpumask from the mm_struct. This function
* is called only if there are bits set in the mask. (e.g. flush_tlb_page())
*
* The cpumaskp is converted into a nodemask of the nodes containing
* The cpumask is converted into a nodemask of the nodes containing
* the cpus.
*
* Returns 1 if all remote flushing was done.
* Returns 0 if some remote flushing remains to be done.
* Note that this function should be called with preemption disabled.
*
* Returns NULL if all remote flushing was done.
* Returns pointer to cpumask if some remote flushing remains to be
* done. The returned pointer is valid till preemption is re-enabled.
*/
int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
unsigned long va)
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
int i;
int bit;
int blade;
int cpu;
int uv_cpu;
int this_blade;
int locals = 0;
struct bau_desc *bau_desc;
cpu = uv_blade_processor_id();
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
this_blade = uv_numa_blade_id();
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
i = 0;
for_each_cpu_mask(bit, *cpumaskp) {
for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
if (blade == this_blade) {
@@ -331,17 +339,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
* no off_node flushing; return status for local node
*/
if (locals)
return 0;
return flush_mask;
else
return 1;
return NULL;
}
__get_cpu_var(ptcstats).requestor++;
__get_cpu_var(ptcstats).ntargeted += i;
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = smp_processor_id();
bau_desc->payload.sending_cpu = cpu;
return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
}
/*

View File

@@ -29,7 +29,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/page_types.h>
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU

View File

@@ -25,10 +25,11 @@
*/
#include <linux/linkage.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
.section .rodata, "a", @progbits
@@ -37,7 +38,7 @@
ENTRY(trampoline_data)
r_base = .
cli # We should be safe anyway
wbinvd
wbinvd
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
mov %ax, %es
@@ -73,9 +74,8 @@ r_base = .
lidtl tidt - r_base # load idt with 0, 0
lgdtl tgdt - r_base # load gdt with whatever is appropriate
xor %ax, %ax
inc %ax # protected mode (PE) bit
lmsw %ax # into protected mode
mov $X86_CR0_PE, %ax # protected mode (PE) bit
lmsw %ax # into protected mode
# flush prefetch and jump to startup_32
ljmpl *(startup_32_vector - r_base)
@@ -86,9 +86,8 @@ startup_32:
movl $__KERNEL_DS, %eax # Initialize the %ds segment register
movl %eax, %ds
xorl %eax, %eax
btsl $5, %eax # Enable PAE mode
movl %eax, %cr4
movl $X86_CR4_PAE, %eax
movl %eax, %cr4 # Enable PAE mode
# Setup trampoline 4 level pagetables
leal (trampoline_level4_pgt - r_base)(%esi), %eax
@@ -99,9 +98,9 @@ startup_32:
xorl %edx, %edx
wrmsr
xorl %eax, %eax
btsl $31, %eax # Enable paging and in turn activate Long Mode
btsl $0, %eax # Enable protected mode
# Enable paging and in turn activate Long Mode
# Enable protected mode
movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0
/*

View File

@@ -54,15 +54,14 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <mach_traps.h>
#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
#else
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
#include <asm/setup.h>
#include <asm/traps.h>
#include "cpu/mcheck/mce.h"
@@ -119,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
if (!user_mode_vm(regs))
die(str, regs, err);
}
/*
* Perform the lazy TSS's I/O bitmap copy. If the TSS has an
* invalid offset set (the LAZY one) and the faulting thread has
* a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
* we set the offset field correctly and return 1.
*/
static int lazy_iobitmap_copy(void)
{
struct thread_struct *thread;
struct tss_struct *tss;
int cpu;
cpu = get_cpu();
tss = &per_cpu(init_tss, cpu);
thread = &current->thread;
if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
thread->io_bitmap_ptr) {
memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
thread->io_bitmap_max);
/*
* If the previously set map was extending to higher ports
* than the current one, pad extra space with 0xff (no access).
*/
if (thread->io_bitmap_max < tss->io_bitmap_max) {
memset((char *) tss->io_bitmap +
thread->io_bitmap_max, 0xff,
tss->io_bitmap_max - thread->io_bitmap_max);
}
tss->io_bitmap_max = thread->io_bitmap_max;
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
tss->io_bitmap_owner = thread;
put_cpu();
return 1;
}
put_cpu();
return 0;
}
#endif
static void __kprobes
@@ -310,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
conditional_sti(regs);
#ifdef CONFIG_X86_32
if (lazy_iobitmap_copy()) {
/* restart the faulting instruction */
return;
}
if (regs->flags & X86_VM_MASK)
goto gp_in_vm86;
#endif
@@ -914,19 +867,20 @@ void math_emulate(struct math_emu_info *info)
}
#endif /* CONFIG_MATH_EMULATION */
dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_X86_32
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
conditional_sti(&regs);
conditional_sti(regs);
info.regs = &regs;
info.regs = regs;
math_emulate(&info);
} else {
math_state_restore(); /* interrupts still off */
conditional_sti(&regs);
conditional_sti(regs);
}
#else
math_state_restore();
@@ -942,7 +896,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_BADSTK;
info.si_addr = 0;
info.si_addr = NULL;
if (notify_die(DIE_TRAP, "iret exception",
regs, error_code, 32, SIGILL) == NOTIFY_STOP)
return;
@@ -1026,6 +980,6 @@ void __init trap_init(void)
cpu_init();
#ifdef CONFIG_X86_32
trap_init_hook();
x86_quirk_trap_init();
#endif
}

View File

@@ -791,7 +791,7 @@ __cpuinit int unsynchronized_tsc(void)
if (!cpu_has_tsc || tsc_unstable)
return 1;
#ifdef CONFIG_X86_SMP
#ifdef CONFIG_SMP
if (apic_is_clustered_box())
return 1;
#endif

Some files were not shown because too many files have changed in this diff Show More