Merge commit 'perf/core' into perf/hw-breakpoint

Conflicts:
	kernel/Makefile
	kernel/trace/Makefile
	kernel/trace/trace.h
	samples/Makefile

Merge reason: We need to be uptodate with the perf events development
branch because we plan to rewrite the breakpoints API on top of
perf events.
This commit is contained in:
Frederic Weisbecker
2009-10-18 01:09:09 +02:00
7303 changed files with 755773 additions and 372702 deletions

View File

@@ -31,8 +31,8 @@ GCOV_PROFILE_paravirt.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit.o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -52,9 +52,11 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
obj-$(CONFIG_INTEL_TXT) += tboot.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
obj-$(CONFIG_SFI) += sfi.o
obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
@@ -104,6 +106,7 @@ obj-$(CONFIG_SCx200) += scx200.o
scx200-y += scx200_32.o
obj-$(CONFIG_OLPC) += olpc.o
obj-$(CONFIG_X86_MRST) += mrst.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o

View File

@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
* P4, Core and beyond CPUs
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
(c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
flags->bm_control = 0;
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);

View File

@@ -56,6 +56,6 @@ SECTIONS
/DISCARD/ : {
*(.note*)
}
. = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
}
ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");

View File

@@ -14,7 +14,7 @@
* Mikael Pettersson : PM converted to driver model.
*/
#include <linux/perf_counter.h>
#include <linux/perf_event.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
@@ -35,7 +35,8 @@
#include <linux/smp.h>
#include <linux/mm.h>
#include <asm/perf_counter.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/atomic.h>
#include <asm/mpspec.h>
@@ -61,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U;
/*
* The highest APIC ID seen during enumeration.
*
* This determines the messaging protocol we can use: if all APIC IDs
* On AMD, this determines the messaging protocol we can use: if all APIC IDs
* are in the 0 ... 7 range, then we can use logical addressing which
* has some performance advantages (better broadcasting).
*
@@ -978,7 +979,7 @@ void lapic_shutdown(void)
{
unsigned long flags;
if (!cpu_has_apic)
if (!cpu_has_apic && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1188,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
perf_counters_lapic_init();
perf_events_lapic_init();
preempt_disable();
@@ -1196,8 +1197,7 @@ void __cpuinit setup_local_APIC(void)
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
if (!apic->apic_id_registered())
BUG();
BUG_ON(!apic->apic_id_registered());
/*
* Intel recommends to set DFR, LDR and TPR before enabling
@@ -1709,7 +1709,7 @@ int __init APIC_init_uniprocessor(void)
localise_nmi_watchdog();
#endif
setup_boot_clock();
x86_init.timers.setup_percpu_clockev();
#ifdef CONFIG_X86_64
check_nmi_watchdog();
#endif
@@ -1916,24 +1916,14 @@ void __cpuinit generic_processor_info(int apicid, int version)
max_physical_apicid = apicid;
#ifdef CONFIG_X86_32
/*
* Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
* but we need to work other dependencies like SMP_SUSPEND etc
* before this can be done without some confusion.
* if (CPU_HOTPLUG_ENABLED || num_processors > 8)
* - Ashok Raj <ashok.raj@intel.com>
*/
if (max_physical_apicid >= 8) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!APIC_XAPIC(version)) {
def_to_bigsmp = 0;
break;
}
/* If P4 and above fall through */
case X86_VENDOR_AMD:
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (num_processors > 8)
def_to_bigsmp = 1;
break;
case X86_VENDOR_AMD:
if (max_physical_apicid >= 8)
def_to_bigsmp = 1;
}
}
#endif

View File

@@ -112,7 +112,7 @@ static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
return physids_promote(0xFFL);
}
static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
static int bigsmp_check_phys_apicid_present(int phys_apicid)
{
return 1;
}

View File

@@ -96,6 +96,11 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
/* Number of legacy interrupts */
static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
/* GSI interrupts */
static int nr_irqs_gsi = NR_IRQS_LEGACY;
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
@@ -173,6 +178,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
[15] = { .vector = IRQ15_VECTOR, },
};
void __init io_apic_disable_legacy(void)
{
nr_legacy_irqs = 0;
nr_irqs_gsi = 0;
}
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -190,7 +201,7 @@ int __init arch_early_irq_init(void)
desc->chip_data = &cfg[i];
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
if (i < NR_IRQS_LEGACY)
if (i < nr_legacy_irqs)
cpumask_setall(cfg[i].domain);
}
@@ -216,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
cfg = NULL;
} else if (!alloc_cpumask_var_node(&cfg->old_domain,
} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
cfg = NULL;
} else {
cpumask_clear(cfg->domain);
cpumask_clear(cfg->old_domain);
}
}
@@ -867,7 +875,7 @@ static int __init find_isa_irq_apic(int irq, int type)
*/
static int EISA_ELCR(unsigned int irq)
{
if (irq < NR_IRQS_LEGACY) {
if (irq < nr_legacy_irqs) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -1464,7 +1472,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
}
ioapic_register_intr(irq, desc, trigger);
if (irq < NR_IRQS_LEGACY)
if (irq < nr_legacy_irqs)
disable_8259A_irq(irq);
ioapic_write_entry(apic_id, pin, entry);
@@ -1831,7 +1839,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
if (apic_verbosity == APIC_QUIET)
if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1863,7 +1871,7 @@ __apicdebuginit(int) print_all_ICs(void)
print_PIC();
/* don't print out if apic is not there */
if (!cpu_has_apic || disable_apic)
if (!cpu_has_apic && !apic_from_smp_config())
return 0;
print_all_local_APICs();
@@ -1894,6 +1902,10 @@ void __init enable_IO_APIC(void)
spin_unlock_irqrestore(&ioapic_lock, flags);
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
}
if (!nr_legacy_irqs)
return;
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
@@ -1948,6 +1960,9 @@ void disable_IO_APIC(void)
*/
clear_IO_APIC();
if (!nr_legacy_irqs)
return;
/*
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
@@ -1981,7 +1996,7 @@ void disable_IO_APIC(void)
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
if (cpu_has_apic)
if (cpu_has_apic || apic_from_smp_config())
disconnect_bsp_APIC(!intr_remapping_enabled &&
ioapic_i8259.pin != -1);
}
@@ -1994,7 +2009,7 @@ void disable_IO_APIC(void)
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
static void __init setup_ioapic_ids_from_mpc(void)
void __init setup_ioapic_ids_from_mpc(void)
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
@@ -2003,9 +2018,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
if (acpi_ioapic)
return;
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
* no meaning without the serial APIC bus.
@@ -2179,7 +2193,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
struct irq_cfg *cfg;
spin_lock_irqsave(&ioapic_lock, flags);
if (irq < NR_IRQS_LEGACY) {
if (irq < nr_legacy_irqs) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
@@ -2657,7 +2671,7 @@ static inline void init_IO_APIC_traps(void)
* so default to an old-fashioned 8259
* interrupt if we can..
*/
if (irq < NR_IRQS_LEGACY)
if (irq < nr_legacy_irqs)
make_8259A_irq(irq);
else
/* Strange. Oh, well.. */
@@ -2993,7 +3007,7 @@ out:
* the I/O APIC in all cases now. No actual device should request
* it anyway. --macro
*/
#define PIC_IRQS (1 << PIC_CASCADE_IR)
#define PIC_IRQS (1UL << PIC_CASCADE_IR)
void __init setup_IO_APIC(void)
{
@@ -3001,21 +3015,19 @@ void __init setup_IO_APIC(void)
/*
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
io_apic_irqs = ~PIC_IRQS;
io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
/*
* Set up IO-APIC IRQ routing.
*/
#ifdef CONFIG_X86_32
if (!acpi_ioapic)
setup_ioapic_ids_from_mpc();
#endif
x86_init.mpparse.setup_ioapic_ids();
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();
if (nr_legacy_irqs)
check_timer();
}
/*
@@ -3116,7 +3128,6 @@ static int __init ioapic_init_sysfs(void)
device_initcall(ioapic_init_sysfs);
static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
@@ -3856,7 +3867,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
if (irq >= NR_IRQS_LEGACY) {
if (irq >= nr_legacy_irqs) {
cfg = desc->chip_data;
if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
printk(KERN_INFO "can not add pin %d for irq %d\n",

View File

@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
static inline int mce_in_progress(void)
{
#if defined(CONFIG_X86_NEW_MCE)
#if defined(CONFIG_X86_MCE)
return atomic_read(&mce_entry) > 0;
#endif
return 0;
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
/*
* proc handler for /proc/sys/kernel/nmi
*/
int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int old_state;
nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
old_state = nmi_watchdog_enabled;
proc_dointvec(table, write, file, buffer, length, ppos);
proc_dointvec(table, write, buffer, length, ppos);
if (!!old_state == !!nmi_watchdog_enabled)
return 0;

View File

@@ -66,7 +66,6 @@ struct mpc_trans {
unsigned short trans_reserved;
};
/* x86_quirks member */
static int mpc_record;
static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
@@ -130,10 +129,9 @@ void __cpuinit numaq_tsc_disable(void)
}
}
static int __init numaq_pre_time_init(void)
static void __init numaq_tsc_init(void)
{
numaq_tsc_disable();
return 0;
}
static inline int generate_logical_apicid(int quad, int phys_apicid)
@@ -177,6 +175,19 @@ static void mpc_oem_pci_bus(struct mpc_bus *m)
quad_local_to_mp_bus_id[quad][local] = m->busid;
}
/*
* Called from mpparse code.
* mode = 0: prescan
* mode = 1: one mpc entry scanned
*/
static void numaq_mpc_record(unsigned int mode)
{
if (!mode)
mpc_record = 0;
else
mpc_record++;
}
static void __init MP_translation_info(struct mpc_trans *m)
{
printk(KERN_INFO
@@ -206,9 +217,9 @@ static int __init mpf_checksum(unsigned char *mp, int len)
/*
* Read/parse the MPC oem tables
*/
static void __init
smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
static void __init smp_read_mpc_oem(struct mpc_table *mpc)
{
struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
int count = sizeof(*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable) + count;
@@ -250,29 +261,6 @@ static void __init
}
}
static int __init numaq_setup_ioapic_ids(void)
{
/* so can skip it */
return 1;
}
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
.arch_pre_intr_init = NULL,
.arch_memory_setup = NULL,
.arch_intr_init = NULL,
.arch_trap_init = NULL,
.mach_get_smp_config = NULL,
.mach_find_smp_config = NULL,
.mpc_record = &mpc_record,
.mpc_apic_id = mpc_apic_id,
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
};
static __init void early_check_numaq(void)
{
/*
@@ -286,8 +274,15 @@ static __init void early_check_numaq(void)
if (smp_found_config)
early_get_smp_config();
if (found_numaq)
x86_quirks = &numaq_x86_quirks;
if (found_numaq) {
x86_init.mpparse.mpc_record = numaq_mpc_record;
x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
x86_init.mpparse.mpc_apic_id = mpc_apic_id;
x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
x86_init.timers.tsc_pre_init = numaq_tsc_init;
}
}
int __init get_memcfg_numaq(void)
@@ -418,7 +413,7 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
static inline int numaq_check_phys_apicid_present(int phys_apicid)
{
return 1;
}

View File

@@ -64,16 +64,23 @@ void __init default_setup_apic_routing(void)
apic = &apic_x2apic_phys;
else
apic = &apic_x2apic_cluster;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
#endif
if (apic == &apic_flat) {
if (max_physical_apicid >= 8)
apic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (num_processors > 8)
apic = &apic_physflat;
break;
case X86_VENDOR_AMD:
if (max_physical_apicid >= 8)
apic = &apic_physflat;
}
}
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
if (is_vsmp_box()) {
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;

View File

@@ -272,7 +272,7 @@ static physid_mask_t summit_apicid_to_cpu_present(int apicid)
return physid_mask_of_physid(0);
}
static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
static int summit_check_phys_apicid_present(int physical_apicid)
{
return 1;
}

View File

@@ -389,6 +389,16 @@ static __init void map_gru_high(int max_pnode)
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
}
static __init void map_mmr_high(int max_pnode)
{
union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
if (mmr.s.enable)
map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
}
static __init void map_mmioh_high(int max_pnode)
{
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -643,6 +653,7 @@ void __init uv_system_init(void)
}
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_mmioh_high(max_pnode);
uv_cpu_init();

View File

@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o
obj-y += vmware.o hypervisor.o sched.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/

View File

@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
* approved Athlon
*/
WARN_ONCE(1, "WARNING: This combination of AMD"
"processors is not suitable for SMP.\n");
" processors is not suitable for SMP.\n");
if (!test_taint(TAINT_UNSAFE_SMP))
add_taint(TAINT_UNSAFE_SMP);
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
#endif
}
int amd_get_nb_id(int cpu)
{
int id = 0;
#ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
#endif
return id;
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)

View File

@@ -13,7 +13,7 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
#include <asm/perf_counter.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -34,7 +34,6 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <linux/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -870,7 +869,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
init_hw_perf_counters();
init_hw_perf_events();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)

View File

@@ -30,8 +30,8 @@
#include <asm/apic.h>
#include <asm/desc.h>
static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
static DEFINE_PER_CPU(int, cpu_priv_count);
static DEFINE_MUTEX(cpu_debug_lock);

View File

@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
#include <trace/power.h>
#include <trace/events/power.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -60,7 +60,6 @@ enum {
};
#define INTEL_MSR_RANGE (0xffff)
#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
@@ -71,13 +70,7 @@ struct acpi_cpufreq_data {
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
struct acpi_msr_data {
u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
DEFINE_TRACE(power_mark);
static DEFINE_PER_CPU(struct aperfmperf, old_perf);
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -244,23 +237,12 @@ static u32 get_cur_val(const struct cpumask *mask)
return cmd.val;
}
struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
} aperf, mperf;
};
/* Called via smp_call_function_single(), on the target CPU */
static void read_measured_perf_ctrs(void *_cur)
{
struct perf_pair *cur = _cur;
struct aperfmperf *am = _cur;
rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
get_aperfmperf(am);
}
/*
@@ -279,63 +261,17 @@ static void read_measured_perf_ctrs(void *_cur)
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu)
{
struct perf_pair readin, cur;
unsigned int perf_percent;
struct aperfmperf perf;
unsigned long ratio;
unsigned int retval;
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
return 0;
cur.aperf.whole = readin.aperf.whole -
per_cpu(msr_data, cpu).saved_aperf;
cur.mperf.whole = readin.mperf.whole -
per_cpu(msr_data, cpu).saved_mperf;
per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
per_cpu(old_perf, cpu) = perf;
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
shift_count = fls(h);
cur.aperf.whole >>= shift_count;
cur.mperf.whole >>= shift_count;
}
if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
cur.aperf.split.lo >>= shift_count;
cur.mperf.split.lo >>= shift_count;
}
if (cur.aperf.split.lo && cur.mperf.split.lo)
perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
cur.aperf.whole >>= shift_count;
cur.mperf.whole >>= shift_count;
}
if (cur.aperf.whole && cur.mperf.whole)
perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
#endif
retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
return retval;
}
@@ -394,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@@ -426,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
trace_power_mark(&it, POWER_PSTATE, next_perf_state);
trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
@@ -588,6 +523,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
},
{ }
};
static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
/* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
* AL30: A Machine Check Exception (MCE) Occurring during an
* Enhanced Intel SpeedStep Technology Ratio Change May Cause
* Both Processor Cores to Lock Up when HT is enabled*/
if (c->x86_vendor == X86_VENDOR_INTEL) {
if ((c->x86 == 15) &&
(c->x86_model == 6) &&
(c->x86_mask == 8) && smt_capable())
return -ENODEV;
}
return 0;
}
#endif
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -602,6 +552,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
dprintk("acpi_cpufreq_cpu_init\n");
#ifdef CONFIG_SMP
result = acpi_cpufreq_blacklist(c);
if (result)
return result;
#endif
data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -731,12 +687,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
acpi_processor_notify_smm(THIS_MODULE);
/* Check for APERF/MPERF support in hardware */
if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
unsigned int ecx;
ecx = cpuid_ecx(6);
if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
acpi_cpufreq_driver.getavg = get_measured_perf;
}
if (cpu_has(c, X86_FEATURE_APERFMPERF))
acpi_cpufreq_driver.getavg = get_measured_perf;
dprintk("CPU%u - ACPI performance management activated.\n", cpu);
for (i = 0; i < perf->state_count; i++)

View File

@@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
return 0;
}
static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
unsigned int entry)
{
data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
}
static void print_basics(struct powernow_k8_data *data)
@@ -854,6 +855,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
goto err_out;
}
/* fill in data */
data->numps = data->acpi_data.state_count;
powernow_k8_acpi_pst_values(data, 0);
if (cpu_family == CPU_HW_PSTATE)
ret_val = fill_powernow_table_pstate(data, powernow_table);
else
@@ -866,11 +871,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
powernow_table[data->acpi_data.state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
data->numps = data->acpi_data.state_count;
if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
@@ -914,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
"bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS "
"manufacturer\n");
invalidate_entry(data, i);
invalidate_entry(powernow_table, i);
continue;
}
rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
if (!(hi & HW_PSTATE_VALID_MASK)) {
dprintk("invalid pstate %d, ignoring\n", index);
invalidate_entry(data, i);
invalidate_entry(powernow_table, i);
continue;
}
@@ -941,7 +943,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
struct cpufreq_frequency_table *powernow_table)
{
int i;
int cntlofreq = 0;
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 fid;
@@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
/* verify frequency is OK */
if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
dprintk("invalid freq %u kHz, ignoring\n", freq);
invalidate_entry(data, i);
invalidate_entry(powernow_table, i);
continue;
}
@@ -978,38 +979,17 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
* BIOSs are using "off" to indicate invalid */
if (vid == VID_OFF) {
dprintk("invalid vid %u, ignoring\n", vid);
invalidate_entry(data, i);
invalidate_entry(powernow_table, i);
continue;
}
/* verify only 1 entry from the lo frequency table */
if (fid < HI_FID_TABLE_BOTTOM) {
if (cntlofreq) {
/* if both entries are the same,
* ignore this one ... */
if ((freq != powernow_table[cntlofreq].frequency) ||
(index != powernow_table[cntlofreq].index)) {
printk(KERN_ERR PFX
"Too many lo freq table "
"entries\n");
return 1;
}
dprintk("double low frequency table entry, "
"ignoring it.\n");
invalidate_entry(data, i);
continue;
} else
cntlofreq = i;
}
if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries "
"%u kHz vs. %u kHz\n", freq,
(unsigned int)
(data->acpi_data.states[i].core_frequency
* 1000));
invalidate_entry(data, i);
invalidate_entry(powernow_table, i);
continue;
}
}

View File

@@ -34,13 +34,6 @@ detect_hypervisor_vendor(struct cpuinfo_x86 *c)
c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
}
unsigned long get_hypervisor_tsc_freq(void)
{
if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
return vmware_get_tsc_khz();
return 0;
}
static inline void __cpuinit
hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
{
@@ -55,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
detect_hypervisor_vendor(c);
hypervisor_set_feature_bits(c);
}
void __init init_hypervisor_platform(void)
{
init_hypervisor(&boot_cpu_data);
if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
vmware_platform_setup();
}

View File

@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
if (c->cpuid_level > 6) {
unsigned ecx = cpuid_ecx(6);
if (ecx & 0x01)
set_cpu_cap(c, X86_FEATURE_APERFMPERF);
}
if (cpu_has_xmm2)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (cpu_has_ds) {

View File

@@ -1,11 +1,8 @@
obj-y = mce.o
obj-y = mce.o mce-severity.o
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o

View File

@@ -1,116 +0,0 @@
/*
* Athlon specific Machine Check Exception Reporting
* (C) Copyright 2002 Dave Jones <davej@redhat.com>
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* Machine Check Handler For AMD Athlon/Duron: */
static void k7_machine_check(struct pt_regs *regs, long error_code)
{
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
recover = 0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
for (i = 1; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high & (1<<31)) {
char misc[20];
char addr[24];
misc[0] = '\0';
addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
high &= ~(1<<31);
if (high & (1<<27)) {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
smp_processor_id(), i, high, low, misc, addr);
/* Clear it: */
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
/* Serialize: */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
}
if (recover & 2)
panic("CPU context corrupt");
if (recover & 1)
panic("Unable to continue");
printk(KERN_EMERG "Attempting to continue.\n");
mcgstl &= ~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
/* AMD K7 machine check is Intel like: */
void amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
if (!cpu_has(c, X86_FEATURE_MCE))
return;
machine_check_vector = k7_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
/*
* Clear status for MC index 0 separately, we don't touch CTL,
* as some K7 Athlons cause spurious MCEs when its enabled:
*/
if (boot_cpu_data.x86 == 6) {
wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
i = 1;
} else
i = 0;
for (; i < nr_mce_banks; i++) {
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
}

View File

@@ -18,7 +18,12 @@
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/smp.h>
#include <linux/notifier.h>
#include <linux/kdebug.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <asm/mce.h>
#include <asm/apic.h>
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -39,44 +44,142 @@ static void inject_mce(struct mce *m)
i->finished = 1;
}
struct delayed_mce {
struct timer_list timer;
struct mce m;
};
/* Inject mce on current CPU */
static void raise_mce(unsigned long data)
static void raise_poll(struct mce *m)
{
struct delayed_mce *dm = (struct delayed_mce *)data;
struct mce *m = &dm->m;
int cpu = m->extcpu;
unsigned long flags;
mce_banks_t b;
inject_mce(m);
if (m->status & MCI_STATUS_UC) {
struct pt_regs regs;
memset(&b, 0xff, sizeof(mce_banks_t));
local_irq_save(flags);
machine_check_poll(0, &b);
local_irq_restore(flags);
m->finished = 0;
}
static void raise_exception(struct mce *m, struct pt_regs *pregs)
{
struct pt_regs regs;
unsigned long flags;
if (!pregs) {
memset(&regs, 0, sizeof(struct pt_regs));
regs.ip = m->ip;
regs.cs = m->cs;
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
do_machine_check(&regs, 0);
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
} else {
mce_banks_t b;
memset(&b, 0xff, sizeof(mce_banks_t));
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
machine_check_poll(0, &b);
mce_notify_irq();
printk(KERN_INFO "Finished machine check poll on CPU %d\n",
cpu);
pregs = &regs;
}
kfree(dm);
/* in mcheck exeception handler, irq will be disabled */
local_irq_save(flags);
do_machine_check(pregs, 0);
local_irq_restore(flags);
m->finished = 0;
}
static cpumask_t mce_inject_cpumask;
static int mce_raise_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
raise_exception(m, args->regs);
else if (m->status)
raise_poll(m);
return NOTIFY_STOP;
}
static struct notifier_block mce_raise_nb = {
.notifier_call = mce_raise_notify,
.priority = 1000,
};
/* Inject mce on current CPU */
static int raise_local(void)
{
struct mce *m = &__get_cpu_var(injectm);
int context = MCJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;
if (m->inject_flags & MCJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
case MCJ_CTX_IRQ:
/*
* Could do more to fake interrupts like
* calling irq_enter, but the necessary
* machinery isn't exported currently.
*/
/*FALL THROUGH*/
case MCJ_CTX_PROCESS:
raise_exception(m, NULL);
break;
default:
printk(KERN_INFO "Invalid MCE context\n");
ret = -EINVAL;
}
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
} else if (m->status) {
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
} else
m->finished = 0;
return ret;
}
static void raise_mce(struct mce *m)
{
int context = MCJ_CTX(m->inject_flags);
inject_mce(m);
if (context == MCJ_CTX_RANDOM)
return;
#ifdef CONFIG_X86_LOCAL_APIC
if (m->inject_flags & MCJ_NMI_BROADCAST) {
unsigned long start;
int cpu;
get_online_cpus();
mce_inject_cpumask = cpu_online_map;
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
if (!mcpu->finished ||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
if (!cpus_empty(mce_inject_cpumask))
apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
start = jiffies;
while (!cpus_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
printk(KERN_ERR
"Timeout waiting for mce inject NMI %lx\n",
*cpus_addr(mce_inject_cpumask));
break;
}
cpu_relax();
}
raise_local();
put_cpu();
put_online_cpus();
} else
#endif
raise_local();
}
/* Error injection interface */
static ssize_t mce_write(struct file *filp, const char __user *ubuf,
size_t usize, loff_t *off)
{
struct delayed_mce *dm;
struct mce m;
if (!capable(CAP_SYS_ADMIN))
@@ -96,19 +199,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
return -EINVAL;
dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
if (!dm)
return -ENOMEM;
/*
* Need to give user space some time to set everything up,
* so do it a jiffie or two later everywhere.
* Should we use a hrtimer here for better synchronization?
*/
memcpy(&dm->m, &m, sizeof(struct mce));
setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
dm->timer.expires = jiffies + 2;
add_timer_on(&dm->timer, m.extcpu);
schedule_timeout(2);
raise_mce(&m);
return usize;
}
@@ -116,6 +212,7 @@ static int inject_init(void)
{
printk(KERN_INFO "Machine check injector initialized\n");
mce_chrdev_ops.write = mce_write;
register_die_notifier(&mce_raise_nb);
return 0;
}

View File

@@ -1,3 +1,4 @@
#include <linux/sysdev.h>
#include <asm/mce.h>
enum severity_level {
@@ -10,6 +11,20 @@ enum severity_level {
MCE_PANIC_SEVERITY,
};
#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
struct mce_bank {
u64 ctl; /* subevents to enable */
unsigned char init; /* initialise bank? */
struct sysdev_attribute attr; /* sysdev attribute */
char attrname[ATTR_LEN]; /* attribute name */
};
int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern int mce_ser;
extern struct mce_bank *mce_banks;

View File

@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
}
}
#ifdef CONFIG_DEBUG_FS
static void *s_start(struct seq_file *f, loff_t *pos)
{
if (*pos >= ARRAY_SIZE(severities))
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
{
struct dentry *dmce = NULL, *fseverities_coverage = NULL;
dmce = debugfs_create_dir("mce", NULL);
dmce = mce_get_debugfs_dir();
if (dmce == NULL)
goto err_out;
fseverities_coverage = debugfs_create_file("severities-coverage",
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
return 0;
err_out:
if (fseverities_coverage)
debugfs_remove(fseverities_coverage);
if (dmce)
debugfs_remove(dmce);
return -ENOMEM;
}
late_initcall(severities_debugfs_init);
#endif

View File

@@ -34,6 +34,7 @@
#include <linux/smp.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
#include <asm/processor.h>
#include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
#include "mce-internal.h"
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
smp_processor_id());
}
/* Call the installed machine check handler for this CPU setup. */
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
int mce_disabled __read_mostly;
#ifdef CONFIG_X86_NEW_MCE
#define MISC_MCELOG_MINOR 227
#define SPINUNIT 100 /* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
*/
static int tolerant __read_mostly = 1;
static int banks __read_mostly;
static u64 *bank __read_mostly;
static int rip_msr __read_mostly;
static int mce_bootlog __read_mostly = -1;
static int monarch_timeout __read_mostly = -1;
@@ -87,28 +74,35 @@ int mce_cmci_disabled __read_mostly;
int mce_ignore_ce __read_mostly;
int mce_ser __read_mostly;
struct mce_bank *mce_banks __read_mostly;
/* User mode helper program triggered by machine check event */
static unsigned long mce_need_notify;
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };
static unsigned long dont_init_banks;
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
static void default_decode_mce(struct mce *m)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
}
/*
* CPU/chipset specific EDAC code can register a callback here to print
* MCE errors in a human-readable form:
*/
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
static inline int skip_bank_init(int i)
{
return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
}
static DEFINE_PER_CPU(struct work_struct, mce_work);
/* Do initial initialization of a struct mce */
@@ -183,59 +177,60 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
void __weak decode_mce(struct mce *m)
{
return;
}
static void print_mce(struct mce *m)
{
printk(KERN_EMERG
"CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status);
if (m->ip) {
printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip);
pr_emerg("RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
printk(KERN_CONT "\n");
pr_cont("\n");
}
printk(KERN_EMERG "TSC %llx ", m->tsc);
if (m->addr)
printk(KERN_CONT "ADDR %llx ", m->addr);
if (m->misc)
printk(KERN_CONT "MISC %llx ", m->misc);
printk(KERN_CONT "\n");
printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid,
m->apicid);
decode_mce(m);
pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
pr_cont("ADDR %llx ", m->addr);
if (m->misc)
pr_cont("MISC %llx ", m->misc);
pr_cont("\n");
pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that):
*/
x86_mce_decode_callback(m);
}
static void print_mce_head(void)
{
printk(KERN_EMERG "\nHARDWARE ERROR\n");
pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
printk(KERN_EMERG "This is not a software problem!\n"
#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
"Run through mcelog --ascii to decode and contact your hardware vendor\n"
#endif
);
pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
static atomic_t mce_paniced;
static int fake_panic;
static atomic_t mce_fake_paniced;
/* Panic in progress. Enable interrupts and wait for final IPI */
static void wait_for_panic(void)
{
long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
preempt_disable();
local_irq_enable();
while (timeout-- > 0)
@@ -249,15 +244,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
{
int i;
/*
* Make sure only one CPU runs in machine check panic
*/
if (atomic_add_return(1, &mce_paniced) > 1)
wait_for_panic();
barrier();
if (!fake_panic) {
/*
* Make sure only one CPU runs in machine check panic
*/
if (atomic_inc_return(&mce_paniced) > 1)
wait_for_panic();
barrier();
bust_spinlocks(1);
console_verbose();
bust_spinlocks(1);
console_verbose();
} else {
/* Don't log too much for fake panic */
if (atomic_inc_return(&mce_fake_paniced) > 1)
return;
}
print_mce_head();
/* First print corrected ones that are still unlogged */
for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -284,9 +285,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
print_mce_tail();
if (exp)
printk(KERN_EMERG "Machine check: %s\n", exp);
if (panic_timeout == 0)
panic_timeout = mce_panic_timeout;
panic(msg);
if (!fake_panic) {
if (panic_timeout == 0)
panic_timeout = mce_panic_timeout;
panic(msg);
} else
printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
}
/* Support code for software error injection */
@@ -294,13 +298,14 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
unsigned bank = __get_cpu_var(injectm.bank);
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MC0_STATUS + bank*4)
if (msr == MSR_IA32_MCx_STATUS(bank))
return offsetof(struct mce, status);
if (msr == MSR_IA32_MC0_ADDR + bank*4)
if (msr == MSR_IA32_MCx_ADDR(bank))
return offsetof(struct mce, addr);
if (msr == MSR_IA32_MC0_MISC + bank*4)
if (msr == MSR_IA32_MCx_MISC(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -311,13 +316,25 @@ static int msr_to_offset(u32 msr)
static u64 mce_rdmsrl(u32 msr)
{
u64 v;
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
if (offset < 0)
return 0;
return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
}
rdmsrl(msr, v);
if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
}
return v;
}
@@ -325,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
return;
@@ -421,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}
#ifdef CONFIG_X86_LOCAL_APIC
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Called after interrupts have been reenabled again
* when a MCE happened during an interrupts off region
@@ -505,7 +523,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
for (i = 0; i < banks; i++) {
if (!bank[i] || !test_bit(i, *b))
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
m.misc = 0;
@@ -514,7 +532,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -529,9 +547,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
continue;
if (m.status & MCI_STATUS_MISCV)
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
if (m.status & MCI_STATUS_ADDRV)
m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -547,7 +565,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
/*
@@ -568,7 +586,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
int i;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
return 1;
}
@@ -628,7 +646,7 @@ out:
* This way we prevent any potential data corruption in a unrecoverable case
* and also makes sure always all CPU's errors are examined.
*
* Also this detects the case of an machine check event coming from outer
* Also this detects the case of a machine check event coming from outer
* space (not detected by any CPUs) In this case some external agent wants
* us to shut down, so panic too.
*
@@ -681,7 +699,7 @@ static void mce_reign(void)
* No machine check event found. Must be some external
* source or one CPU is hung. Panic.
*/
if (!m && tolerant < 3)
if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL);
/*
@@ -715,7 +733,7 @@ static int mce_start(int *no_way_out)
* global_nwo should be updated before mce_callin
*/
smp_wmb();
order = atomic_add_return(1, &mce_callin);
order = atomic_inc_return(&mce_callin);
/*
* Wait for everyone.
@@ -852,7 +870,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
}
@@ -905,11 +923,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_setup(&m);
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
no_way_out = mce_no_way_out(&m, &msg);
final = &__get_cpu_var(mces_seen);
*final = m;
no_way_out = mce_no_way_out(&m, &msg);
barrier();
/*
@@ -926,14 +944,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
if (!bank[i])
if (!mce_banks[i].ctl)
continue;
m.misc = 0;
m.addr = 0;
m.bank = i;
m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -974,9 +992,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
if (m.status & MCI_STATUS_MISCV)
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
if (m.status & MCI_STATUS_ADDRV)
m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
/*
* Action optional error. Queue address for later processing.
@@ -1101,7 +1119,7 @@ void mce_log_therm_throt_event(__u64 status)
*/
static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
@@ -1120,7 +1138,7 @@ static void mcheck_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
n = &__get_cpu_var(next_interval);
n = &__get_cpu_var(mce_next_interval);
if (mce_notify_irq())
*n = max(*n/2, HZ/100);
else
@@ -1169,10 +1187,26 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
static int mce_banks_init(void)
{
int i;
mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL;
b->init = 1;
}
return 0;
}
/*
* Initialize Machine Checks for a CPU.
*/
static int mce_cap_init(void)
static int __cpuinit mce_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1192,11 +1226,11 @@ static int mce_cap_init(void)
/* Don't support asymmetric configurations today */
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!bank) {
bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
if (!bank)
return -ENOMEM;
memset(bank, 0xff, banks * sizeof(u64));
if (!mce_banks) {
int err = mce_banks_init();
if (err)
return err;
}
/* Use accurate RIP reporting if available. */
@@ -1228,15 +1262,17 @@ static void mce_init(void)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
if (skip_bank_init(i))
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
}
/* Add per CPU specific workarounds here */
static int mce_cpu_quirks(struct cpuinfo_x86 *c)
static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1251,7 +1287,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
* trips off incorrectly with the IOMMU & 3ware
* & Cerberus:
*/
clear_bit(10, (unsigned long *)&bank[4]);
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 <= 17 && mce_bootlog < 0) {
/*
@@ -1265,7 +1301,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
* by default.
*/
if (c->x86 == 6 && banks > 0)
bank[0] = 0;
mce_banks[0].ctl = 0;
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1278,8 +1314,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
if (c->x86 == 6 && c->x86_model < 0x1A)
__set_bit(0, &dont_init_banks);
if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
mce_banks[0].init = 0;
/*
* All newer Intel systems support MCE broadcasting. Enable
@@ -1335,7 +1371,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(next_interval);
int *n = &__get_cpu_var(mce_next_interval);
if (mce_ignore_ce)
return;
@@ -1348,6 +1384,17 @@ static void mce_init_timer(void)
add_timer_on(t, smp_processor_id());
}
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
smp_processor_id());
}
/* Call the installed machine check handler for this CPU setup. */
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@@ -1561,8 +1608,10 @@ static struct miscdevice mce_log_device = {
*/
static int __init mcheck_enable(char *str)
{
if (*str == 0)
if (*str == 0) {
enable_p5_mce();
return 1;
}
if (*str == '=')
str++;
if (!strcmp(str, "off"))
@@ -1603,8 +1652,10 @@ static int mce_disable(void)
int i;
for (i = 0; i < banks; i++) {
if (!skip_bank_init(i))
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
return 0;
}
@@ -1679,14 +1730,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
static struct sysdev_attribute *bank_attrs;
static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
{
return container_of(attr, struct mce_bank, attr);
}
static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
char *buf)
{
u64 b = bank[attr - bank_attrs];
return sprintf(buf, "%llx\n", b);
return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
}
static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1697,7 +1749,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
bank[attr - bank_attrs] = new;
attr_to_bank(attr)->ctl = new;
mce_restart();
return size;
@@ -1839,7 +1891,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
}
for (j = 0; j < banks; j++) {
err = sysdev_create_file(&per_cpu(mce_dev, cpu),
&bank_attrs[j]);
&mce_banks[j].attr);
if (err)
goto error2;
}
@@ -1848,10 +1900,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
error:
while (--i >= 0)
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
sysdev_unregister(&per_cpu(mce_dev, cpu));
@@ -1869,7 +1921,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
for (i = 0; i < banks; i++)
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
sysdev_unregister(&per_cpu(mce_dev, cpu));
cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1886,8 +1938,10 @@ static void mce_disable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
if (!skip_bank_init(i))
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
}
@@ -1902,8 +1956,10 @@ static void mce_reenable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
for (i = 0; i < banks; i++) {
if (!skip_bank_init(i))
wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
struct mce_bank *b = &mce_banks[i];
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
}
@@ -1935,7 +1991,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies(jiffies +
__get_cpu_var(next_interval));
__get_cpu_var(mce_next_interval));
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
@@ -1951,35 +2007,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
.notifier_call = mce_cpu_callback,
};
static __init int mce_init_banks(void)
static __init void mce_init_banks(void)
{
int i;
bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
GFP_KERNEL);
if (!bank_attrs)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct sysdev_attribute *a = &bank_attrs[i];
struct mce_bank *b = &mce_banks[i];
struct sysdev_attribute *a = &b->attr;
a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
if (!a->attr.name)
goto nomem;
a->attr.name = b->attrname;
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
a->attr.mode = 0644;
a->show = show_bank;
a->store = set_bank;
}
return 0;
nomem:
while (--i >= 0)
kfree(bank_attrs[i].attr.name);
kfree(bank_attrs);
bank_attrs = NULL;
return -ENOMEM;
}
static __init int mce_init_device(void)
@@ -1992,9 +2034,7 @@ static __init int mce_init_device(void)
zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
err = mce_init_banks();
if (err)
return err;
mce_init_banks();
err = sysdev_class_register(&mce_sysclass);
if (err)
@@ -2014,51 +2054,6 @@ static __init int mce_init_device(void)
device_initcall(mce_init_device);
#else /* CONFIG_X86_OLD_MCE: */
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
amd_mcheck_init(c);
break;
case X86_VENDOR_INTEL:
if (c->x86 == 5)
intel_p5_mcheck_init(c);
if (c->x86 == 6)
intel_p6_mcheck_init(c);
if (c->x86 == 15)
intel_p4_mcheck_init(c);
break;
case X86_VENDOR_CENTAUR:
if (c->x86 == 5)
winchip_mcheck_init(c);
break;
default:
break;
}
printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
}
static int __init mcheck_enable(char *str)
{
mce_p5_enabled = 1;
return 1;
}
__setup("mce", mcheck_enable);
#endif /* CONFIG_X86_OLD_MCE */
/*
* Old style boot options parsing. Only for compatibility.
*/
@@ -2068,3 +2063,56 @@ static int __init mcheck_disable(char *str)
return 1;
}
__setup("nomce", mcheck_disable);
#ifdef CONFIG_DEBUG_FS
struct dentry *mce_get_debugfs_dir(void)
{
static struct dentry *dmce;
if (!dmce)
dmce = debugfs_create_dir("mce", NULL);
return dmce;
}
static void mce_reset(void)
{
cpu_missing = 0;
atomic_set(&mce_fake_paniced, 0);
atomic_set(&mce_executing, 0);
atomic_set(&mce_callin, 0);
atomic_set(&global_nwo, 0);
}
static int fake_panic_get(void *data, u64 *val)
{
*val = fake_panic;
return 0;
}
static int fake_panic_set(void *data, u64 val)
{
mce_reset();
fake_panic = val;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
static int __init mce_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
dmce = mce_get_debugfs_dir();
if (!dmce)
return -ENOMEM;
ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
&fake_panic_fops);
if (!ffake_panic)
return -ENOMEM;
return 0;
}
late_initcall(mce_debugfs_init);
#endif

View File

@@ -69,7 +69,7 @@ struct threshold_bank {
struct threshold_block *blocks;
cpumask_var_t cpus;
};
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
#ifdef CONFIG_SMP
static unsigned char shared_bank[NR_BANKS] = {
@@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
#ifdef CONFIG_SMP
struct cpuinfo_x86 *c = &cpu_data(cpu);
#endif
sprintf(name, "threshold_bank%i", bank);

View File

@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -90,7 +91,7 @@ static void cmci_discover(int banks, int boot)
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
@@ -101,8 +102,8 @@ static void cmci_discover(int banks, int boot)
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
@@ -152,9 +153,9 @@ void cmci_clear(void)
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);

View File

@@ -1,94 +0,0 @@
/*
* Non Fatal Machine Check Exception Reporting
*
* (C) Copyright 2002 Dave Jones. <davej@redhat.com>
*
* This file contains routines to check for non-fatal MCEs every 15s
*
*/
#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
static int firstbank;
#define MCE_RATE (15*HZ) /* timer rate is 15s */
static void mce_checkregs(void *info)
{
u32 low, high;
int i;
for (i = firstbank; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (!(high & (1<<31)))
continue;
printk(KERN_INFO "MCE: The hardware reports a non fatal, "
"correctable incident occurred on CPU %d.\n",
smp_processor_id());
printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
/*
* Scrub the error so we don't pick it up in MCE_RATE
* seconds time:
*/
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
/* Serialize: */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
}
static void mce_work_fn(struct work_struct *work);
static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
on_each_cpu(mce_checkregs, NULL, 1);
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
static int __init init_nonfatal_mce_checker(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
/* Check for MCE support */
if (!cpu_has(c, X86_FEATURE_MCE))
return -ENODEV;
/* Check for PPro style MCA */
if (!cpu_has(c, X86_FEATURE_MCA))
return -ENODEV;
/* Some Athlons misbehave when we frob bank 0 */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 == 6)
firstbank = 1;
else
firstbank = 0;
/*
* Check for non-fatal errors every MCE_RATE s
*/
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
printk(KERN_INFO "Machine check exception polling timer started.\n");
return 0;
}
module_init(init_nonfatal_mce_checker);
MODULE_LICENSE("GPL");

View File

@@ -1,163 +0,0 @@
/*
* P4 specific Machine Check Exception Reporting
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* as supported by the P4/Xeon family */
struct intel_mce_extended_msrs {
u32 eax;
u32 ebx;
u32 ecx;
u32 edx;
u32 esi;
u32 edi;
u32 ebp;
u32 esp;
u32 eflags;
u32 eip;
/* u32 *reserved[]; */
};
static int mce_num_extended_msrs;
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{
u32 h;
rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
}
static void intel_machine_check(struct pt_regs *regs, long error_code)
{
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
recover = 0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
if (mce_num_extended_msrs > 0) {
struct intel_mce_extended_msrs dbg;
intel_get_extended_msrs(&dbg);
printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
smp_processor_id(), dbg.eip, dbg.eflags,
dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
}
for (i = 0; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high & (1<<31)) {
char misc[20];
char addr[24];
misc[0] = addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
high &= ~(1<<31);
if (high & (1<<27)) {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
smp_processor_id(), i, high, low, misc, addr);
}
}
if (recover & 2)
panic("CPU context corrupt");
if (recover & 1)
panic("Unable to continue");
printk(KERN_EMERG "Attempting to continue.\n");
/*
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
* recoverable/continuable.This will allow BIOS to look at the MSRs
* for errors if the OS could not log the error.
*/
for (i = 0; i < nr_mce_banks; i++) {
u32 msr;
msr = MSR_IA32_MC0_STATUS+i*4;
rdmsr(msr, low, high);
if (high&(1<<31)) {
/* Clear it */
wrmsr(msr, 0UL, 0UL);
/* Serialize */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
}
mcgstl &= ~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
machine_check_vector = intel_machine_check;
wmb();
printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
for (i = 0; i < nr_mce_banks; i++) {
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
/* Check for P4/Xeon extended MCE MSRs */
rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<9)) {/* MCG_EXT_P */
mce_num_extended_msrs = (l >> 16) & 0xff;
printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
" available\n",
smp_processor_id(), mce_num_extended_msrs);
#ifdef CONFIG_X86_MCE_P4THERMAL
/* Check for P4/Xeon Thermal monitor */
intel_init_thermal(c);
#endif
}
}

View File

@@ -1,127 +0,0 @@
/*
* P6 specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* Machine Check Handler For PII/PIII */
static void intel_machine_check(struct pt_regs *regs, long error_code)
{
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
recover = 0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
for (i = 0; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
if (high & (1<<31)) {
char misc[20];
char addr[24];
misc[0] = '\0';
addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
high &= ~(1<<31);
if (high & (1<<27)) {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
}
if (high & (1<<26)) {
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
smp_processor_id(), i, high, low, misc, addr);
}
}
if (recover & 2)
panic("CPU context corrupt");
if (recover & 1)
panic("Unable to continue");
printk(KERN_EMERG "Attempting to continue.\n");
/*
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
* recoverable/continuable.This will allow BIOS to look at the MSRs
* for errors if the OS could not log the error:
*/
for (i = 0; i < nr_mce_banks; i++) {
unsigned int msr;
msr = MSR_IA32_MC0_STATUS+i*4;
rdmsr(msr, low, high);
if (high & (1<<31)) {
/* Clear it: */
wrmsr(msr, 0UL, 0UL);
/* Serialize: */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
}
mcgstl &= ~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
/* Set up machine check reporting for processors with Intel style MCE: */
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
/* Check for MCE support */
if (!cpu_has(c, X86_FEATURE_MCE))
return;
/* Check for PPro style MCA */
if (!cpu_has(c, X86_FEATURE_MCA))
return;
/* Ok machine check is available */
machine_check_vector = intel_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
/*
* Following the example in IA-32 SDM Vol 3:
* - MC0_CTL should not be written
* - Status registers on all banks should be cleared on reset
*/
for (i = 1; i < nr_mce_banks; i++)
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
for (i = 0; i < nr_mce_banks; i++)
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
}

View File

@@ -34,20 +34,31 @@
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
static DEFINE_PER_CPU(bool, thermal_throttle_active);
/*
* Current thermal throttling state:
*/
struct thermal_state {
bool is_throttled;
static atomic_t therm_throt_en = ATOMIC_INIT(0);
u64 next_check;
unsigned long throttle_count;
unsigned long last_throttle_count;
};
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
#define define_therm_throt_sysdev_show_func(name) \
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, \
char *buf) \
\
static ssize_t therm_throt_sysdev_show_##name( \
struct sys_device *dev, \
struct sysdev_attribute *attr, \
char *buf) \
{ \
unsigned int cpu = dev->id; \
ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
preempt_disable(); /* CPU hotplug */ \
if (cpu_online(cpu)) \
ret = sprintf(buf, "%lu\n", \
per_cpu(thermal_throttle_##name, cpu)); \
per_cpu(thermal_state, cpu).name); \
else \
ret = 0; \
preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
return ret; \
}
define_therm_throt_sysdev_show_func(count);
define_therm_throt_sysdev_one_ro(count);
define_therm_throt_sysdev_show_func(throttle_count);
define_therm_throt_sysdev_one_ro(throttle_count);
static struct attribute *thermal_throttle_attrs[] = {
&attr_count.attr,
&attr_throttle_count.attr,
NULL
};
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
static int therm_throt_process(int curr)
static int therm_throt_process(bool is_throttled)
{
unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64();
bool was_throttled = __get_cpu_var(thermal_throttle_active);
bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
struct thermal_state *state;
unsigned int this_cpu;
bool was_throttled;
u64 now;
this_cpu = smp_processor_id();
now = get_jiffies_64();
state = &per_cpu(thermal_state, this_cpu);
was_throttled = state->is_throttled;
state->is_throttled = is_throttled;
if (is_throttled)
__get_cpu_var(thermal_throttle_count)++;
state->throttle_count++;
if (!(was_throttled ^ is_throttled) &&
time_before64(tmp_jiffs, __get_cpu_var(next_check)))
if (time_before64(now, state->next_check) &&
state->throttle_count != state->last_throttle_count)
return 0;
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
state->next_check = now + CHECK_INTERVAL;
state->last_throttle_count = state->throttle_count;
/* if we just entered the thermal event */
if (is_throttled) {
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
"cpu clock throttled (total events = %lu)\n",
cpu, __get_cpu_var(thermal_throttle_count));
printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
add_taint(TAINT_MACHINE_CHECK);
return 1;
}
if (was_throttled) {
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
return 1;
}
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
mce_log_therm_throt_event(msr_val);
}
@@ -260,9 +277,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
/* Check whether a vector already exists */
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
@@ -271,6 +285,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
/* early Pentium M models use different method for enabling TM2 */
if (cpu_has(c, X86_FEATURE_TM2)) {
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
rdmsr(MSR_THERM2_CTL, l, h);
if (l & MSR_THERM2_CTL_TM_SELECT)
tm2 = 1;
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
tm2 = 1;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);

View File

@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
unsigned long long base, size;
char *ptr;
char line[LINE_SIZE];
int length;
size_t linelen;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!len)
return -EINVAL;
memset(line, 0, LINE_SIZE);
if (len > LINE_SIZE)
len = LINE_SIZE;
if (copy_from_user(line, buf, len - 1))
length = len;
length--;
if (length > LINE_SIZE - 1)
length = LINE_SIZE - 1;
if (length < 0)
return -EINVAL;
if (copy_from_user(line, buf, length))
return -EFAULT;
linelen = strlen(line);
@@ -126,8 +133,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EINVAL;
base = simple_strtoull(line + 5, &ptr, 0);
for (; isspace(*ptr); ++ptr)
;
while (isspace(*ptr))
ptr++;
if (strncmp(ptr, "size=", 5))
return -EINVAL;
@@ -135,14 +142,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
for (; isspace(*ptr); ++ptr)
;
while (isspace(*ptr))
ptr++;
if (strncmp(ptr, "type=", 5))
return -EINVAL;
ptr += 5;
for (; isspace(*ptr); ++ptr)
;
while (isspace(*ptr))
ptr++;
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))

View File

@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
static bool mtrr_aps_delayed_init;
static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
@@ -163,7 +164,10 @@ static void ipi_handler(void *info)
if (data->smp_reg != ~0U) {
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
} else {
} else if (mtrr_aps_delayed_init) {
/*
* Initialize the MTRRs inaddition to the synchronisation.
*/
mtrr_if->set_all();
}
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
*/
if (reg != ~0U)
mtrr_if->set(reg, base, size, type);
else if (!mtrr_aps_delayed_init)
mtrr_if->set_all();
/* Wait for the others */
while (atomic_read(&data.count))
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void)
void mtrr_ap_init(void)
{
unsigned long flags;
if (!mtrr_if || !use_intel())
if (!use_intel() || mtrr_aps_delayed_init)
return;
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
@@ -738,11 +742,7 @@ void mtrr_ap_init(void)
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent mtrr entry changes
*/
local_irq_save(flags);
mtrr_if->set_all();
local_irq_restore(flags);
set_mtrr(~0U, 0, 0, 0);
}
/**
@@ -753,6 +753,34 @@ void mtrr_save_state(void)
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
}
void set_mtrr_aps_delayed_init(void)
{
if (!use_intel())
return;
mtrr_aps_delayed_init = true;
}
/*
* MTRR initialization for all AP's
*/
void mtrr_aps_init(void)
{
if (!use_intel())
return;
set_mtrr(~0U, 0, 0, 0);
mtrr_aps_delayed_init = false;
}
void mtrr_bp_restore(void)
{
if (!use_intel())
return;
mtrr_if->set_all();
}
static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)

View File

@@ -20,7 +20,7 @@
#include <linux/kprobes.h>
#include <asm/apic.h>
#include <asm/perf_counter.h>
#include <asm/perf_event.h>
struct nmi_watchdog_ctlblk {
unsigned int cccr_msr;

View File

@@ -0,0 +1,55 @@
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/percpu.h>
#include <linux/irqflags.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
static unsigned long scale_aperfmperf(void)
{
struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
unsigned long ratio, flags;
local_irq_save(flags);
get_aperfmperf(&val);
local_irq_restore(flags);
ratio = calc_aperfmperf_ratio(old, &val);
*old = val;
return ratio;
}
unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
{
/*
* do aperf/mperf on the cpu level because it includes things
* like turbo mode, which are relevant to full cores.
*/
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
return scale_aperfmperf();
/*
* maybe have something cpufreq here
*/
return default_scale_freq_power(sd, cpu);
}
unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
{
/*
* aperf/mperf already includes the smt gain
*/
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
return SCHED_LOAD_SCALE;
return default_scale_smt_power(sd, cpu);
}
#endif

View File

@@ -24,6 +24,7 @@
#include <linux/dmi.h>
#include <asm/div64.h>
#include <asm/vmware.h>
#include <asm/x86_init.h>
#define CPUID_VMWARE_INFO_LEAF 0x40000000
#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
@@ -47,21 +48,35 @@ static inline int __vmware_platform(void)
return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
}
static unsigned long __vmware_get_tsc_khz(void)
static unsigned long vmware_get_tsc_khz(void)
{
uint64_t tsc_hz;
uint32_t eax, ebx, ecx, edx;
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
if (ebx == UINT_MAX)
return 0;
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
(unsigned long) tsc_hz / 1000,
(unsigned long) tsc_hz % 1000);
return tsc_hz;
}
void __init vmware_platform_setup(void)
{
uint32_t eax, ebx, ecx, edx;
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
if (ebx != UINT_MAX)
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
else
printk(KERN_WARNING
"Failed to get TSC freq from the hypervisor\n");
}
/*
* While checking the dmi string infomation, just checking the product
* serial key should be enough, as this will always have a VMware
@@ -87,12 +102,6 @@ int vmware_platform(void)
return 0;
}
unsigned long vmware_get_tsc_khz(void)
{
BUG_ON(!vmware_platform());
return __vmware_get_tsc_khz();
}
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
* Still, due to timing difference when running on virtual cpus, the TSC can

View File

@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
.notifier_call = cpuid_class_cpu_callback,
};
static char *cpuid_nodename(struct device *dev)
static char *cpuid_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
}
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
err = PTR_ERR(cpuid_class);
goto out_chrdev;
}
cpuid_class->nodename = cpuid_nodename;
cpuid_class->devnode = cpuid_devnode;
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)

View File

@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>

View File

@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>

View File

@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
struct resource *res;
u64 end;
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
e820_res = res;
for (i = 0; i < e820.nr_map; i++) {
end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
if (mb < 16)
return 1024*1024;
/* To 32MB for anything above that */
return 32*1024*1024;
/* To 64MB for anything above that */
return 64*1024*1024;
}
#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
@@ -1455,28 +1455,11 @@ char *__init default_machine_specific_memory_setup(void)
return who;
}
char *__init __attribute__((weak)) machine_specific_memory_setup(void)
{
if (x86_quirks->arch_memory_setup) {
char *who = x86_quirks->arch_memory_setup();
if (who)
return who;
}
return default_machine_specific_memory_setup();
}
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
char * __init __attribute__((weak)) memory_setup(void)
{
return machine_specific_memory_setup();
}
void __init setup_memory_map(void)
{
char *who;
who = memory_setup();
who = x86_init.resources.memory_setup();
memcpy(&e820_saved, &e820, sizeof(struct e820map));
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map(who);

View File

@@ -160,721 +160,6 @@ static struct console early_serial_console = {
.index = -1,
};
#ifdef CONFIG_EARLY_PRINTK_DBGP
static struct ehci_caps __iomem *ehci_caps;
static struct ehci_regs __iomem *ehci_regs;
static struct ehci_dbg_port __iomem *ehci_debug;
static unsigned int dbgp_endpoint_out;
struct ehci_dev {
u32 bus;
u32 slot;
u32 func;
};
static struct ehci_dev ehci_dev;
#define USB_DEBUG_DEVNUM 127
#define DBGP_DATA_TOGGLE 0x8800
static inline u32 dbgp_pid_update(u32 x, u32 tok)
{
return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
}
static inline u32 dbgp_len_update(u32 x, u32 len)
{
return (x & ~0x0f) | (len & 0x0f);
}
/*
* USB Packet IDs (PIDs)
*/
/* token */
#define USB_PID_OUT 0xe1
#define USB_PID_IN 0x69
#define USB_PID_SOF 0xa5
#define USB_PID_SETUP 0x2d
/* handshake */
#define USB_PID_ACK 0xd2
#define USB_PID_NAK 0x5a
#define USB_PID_STALL 0x1e
#define USB_PID_NYET 0x96
/* data */
#define USB_PID_DATA0 0xc3
#define USB_PID_DATA1 0x4b
#define USB_PID_DATA2 0x87
#define USB_PID_MDATA 0x0f
/* Special */
#define USB_PID_PREAMBLE 0x3c
#define USB_PID_ERR 0x3c
#define USB_PID_SPLIT 0x78
#define USB_PID_PING 0xb4
#define USB_PID_UNDEF_0 0xf0
#define USB_PID_DATA_TOGGLE 0x88
#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
#define PCI_CAP_ID_EHCI_DEBUG 0xa
#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
#define HUB_SHORT_RESET_TIME 10
#define HUB_LONG_RESET_TIME 200
#define HUB_RESET_TIMEOUT 500
#define DBGP_MAX_PACKET 8
static int dbgp_wait_until_complete(void)
{
u32 ctrl;
int loop = 0x100000;
do {
ctrl = readl(&ehci_debug->control);
/* Stop when the transaction is finished */
if (ctrl & DBGP_DONE)
break;
} while (--loop > 0);
if (!loop)
return -1;
/*
* Now that we have observed the completed transaction,
* clear the done bit.
*/
writel(ctrl | DBGP_DONE, &ehci_debug->control);
return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
}
static void __init dbgp_mdelay(int ms)
{
int i;
while (ms--) {
for (i = 0; i < 1000; i++)
outb(0x1, 0x80);
}
}
static void dbgp_breath(void)
{
/* Sleep to give the debug port a chance to breathe */
}
static int dbgp_wait_until_done(unsigned ctrl)
{
u32 pids, lpid;
int ret;
int loop = 3;
retry:
writel(ctrl | DBGP_GO, &ehci_debug->control);
ret = dbgp_wait_until_complete();
pids = readl(&ehci_debug->pids);
lpid = DBGP_PID_GET(pids);
if (ret < 0)
return ret;
/*
* If the port is getting full or it has dropped data
* start pacing ourselves, not necessary but it's friendly.
*/
if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
dbgp_breath();
/* If I get a NACK reissue the transmission */
if (lpid == USB_PID_NAK) {
if (--loop > 0)
goto retry;
}
return ret;
}
static void dbgp_set_data(const void *buf, int size)
{
const unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = hi = 0;
for (i = 0; i < 4 && i < size; i++)
lo |= bytes[i] << (8*i);
for (; i < 8 && i < size; i++)
hi |= bytes[i] << (8*(i - 4));
writel(lo, &ehci_debug->data03);
writel(hi, &ehci_debug->data47);
}
static void __init dbgp_get_data(void *buf, int size)
{
unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = readl(&ehci_debug->data03);
hi = readl(&ehci_debug->data47);
for (i = 0; i < 4 && i < size; i++)
bytes[i] = (lo >> (8*i)) & 0xff;
for (; i < 8 && i < size; i++)
bytes[i] = (hi >> (8*(i - 4))) & 0xff;
}
static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
const char *bytes, int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_OUT);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
dbgp_set_data(bytes, size);
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
return ret;
}
static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_IN);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl &= ~DBGP_OUT;
ctrl |= DBGP_GO;
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
if (size > ret)
size = ret;
dbgp_get_data(data, size);
return ret;
}
static int __init dbgp_control_msg(unsigned devnum, int requesttype,
int request, int value, int index, void *data, int size)
{
u32 pids, addr, ctrl;
struct usb_ctrlrequest req;
int read;
int ret;
read = (requesttype & USB_DIR_IN) != 0;
if (size > (read ? DBGP_MAX_PACKET:0))
return -1;
/* Compute the control message */
req.bRequestType = requesttype;
req.bRequest = request;
req.wValue = cpu_to_le16(value);
req.wIndex = cpu_to_le16(index);
req.wLength = cpu_to_le16(size);
pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
addr = DBGP_EPADDR(devnum, 0);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, sizeof(req));
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
/* Send the setup message */
dbgp_set_data(&req, sizeof(req));
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
/* Read the result */
return dbgp_bulk_read(devnum, 0, data, size);
}
/* Find a PCI capability */
static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
{
u8 pos;
int bytes;
if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
PCI_STATUS_CAP_LIST))
return 0;
pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
u8 id;
pos &= ~3;
id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
if (id == cap)
return pos;
pos = read_pci_config_byte(num, slot, func,
pos+PCI_CAP_LIST_NEXT);
}
return 0;
}
static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
{
u32 class;
class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
return 0;
return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
}
static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
{
u32 bus, slot, func;
for (bus = 0; bus < 256; bus++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
unsigned cap;
cap = __find_dbgp(bus, slot, func);
if (!cap)
continue;
if (ehci_num-- != 0)
continue;
*rbus = bus;
*rslot = slot;
*rfunc = func;
return cap;
}
}
}
return 0;
}
static int __init ehci_reset_port(int port)
{
u32 portsc;
u32 delay_time, delay;
int loop;
/* Reset the usb debug port */
portsc = readl(&ehci_regs->port_status[port - 1]);
portsc &= ~PORT_PE;
portsc |= PORT_RESET;
writel(portsc, &ehci_regs->port_status[port - 1]);
delay = HUB_ROOT_RESET_TIME;
for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
delay_time += delay) {
dbgp_mdelay(delay);
portsc = readl(&ehci_regs->port_status[port - 1]);
if (portsc & PORT_RESET) {
/* force reset to complete */
loop = 2;
writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
&ehci_regs->port_status[port - 1]);
do {
portsc = readl(&ehci_regs->port_status[port-1]);
} while ((portsc & PORT_RESET) && (--loop > 0));
}
/* Device went away? */
if (!(portsc & PORT_CONNECT))
return -ENOTCONN;
/* bomb out completely if something weird happend */
if ((portsc & PORT_CSC))
return -EINVAL;
/* If we've finished resetting, then break out of the loop */
if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
return 0;
}
return -EBUSY;
}
static int __init ehci_wait_for_port(int port)
{
u32 status;
int ret, reps;
for (reps = 0; reps < 3; reps++) {
dbgp_mdelay(100);
status = readl(&ehci_regs->status);
if (status & STS_PCD) {
ret = ehci_reset_port(port);
if (ret == 0)
return 0;
}
}
return -ENOTCONN;
}
#ifdef DBGP_DEBUG
# define dbgp_printk early_printk
#else
static inline void dbgp_printk(const char *fmt, ...) { }
#endif
typedef void (*set_debug_port_t)(int port);
static void __init default_set_debug_port(int port)
{
}
static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
static void __init nvidia_set_debug_port(int port)
{
u32 dword;
dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x74);
dword &= ~(0x0f<<12);
dword |= ((port & 0x0f)<<12);
write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
dword);
dbgp_printk("set debug port to %d\n", port);
}
static void __init detect_set_debug_port(void)
{
u32 vendorid;
vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x00);
if ((vendorid & 0xffff) == 0x10de) {
dbgp_printk("using nvidia set_debug_port\n");
set_debug_port = nvidia_set_debug_port;
}
}
static int __init ehci_setup(void)
{
struct usb_debug_descriptor dbgp_desc;
u32 cmd, ctrl, status, portsc, hcs_params;
u32 debug_port, new_debug_port = 0, n_ports;
u32 devnum;
int ret, i;
int loop;
int port_map_tried;
int playtimes = 3;
try_next_time:
port_map_tried = 0;
try_next_port:
hcs_params = readl(&ehci_caps->hcs_params);
debug_port = HCS_DEBUG_PORT(hcs_params);
n_ports = HCS_N_PORTS(hcs_params);
dbgp_printk("debug_port: %d\n", debug_port);
dbgp_printk("n_ports: %d\n", n_ports);
for (i = 1; i <= n_ports; i++) {
portsc = readl(&ehci_regs->port_status[i-1]);
dbgp_printk("portstatus%d: %08x\n", i, portsc);
}
if (port_map_tried && (new_debug_port != debug_port)) {
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
loop = 10;
/* Reset the EHCI controller */
cmd = readl(&ehci_regs->command);
cmd |= CMD_RESET;
writel(cmd, &ehci_regs->command);
do {
cmd = readl(&ehci_regs->command);
} while ((cmd & CMD_RESET) && (--loop > 0));
if (!loop) {
dbgp_printk("can not reset ehci\n");
return -1;
}
dbgp_printk("ehci reset done\n");
/* Claim ownership, but do not enable yet */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_OWNER;
ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
writel(ctrl, &ehci_debug->control);
/* Start the ehci running */
cmd = readl(&ehci_regs->command);
cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
cmd |= CMD_RUN;
writel(cmd, &ehci_regs->command);
/* Ensure everything is routed to the EHCI */
writel(FLAG_CF, &ehci_regs->configured_flag);
/* Wait until the controller is no longer halted */
loop = 10;
do {
status = readl(&ehci_regs->status);
} while ((status & STS_HALT) && (--loop > 0));
if (!loop) {
dbgp_printk("ehci can be started\n");
return -1;
}
dbgp_printk("ehci started\n");
/* Wait for a device to show up in the debug port */
ret = ehci_wait_for_port(debug_port);
if (ret < 0) {
dbgp_printk("No device found in debug port\n");
goto next_debug_port;
}
dbgp_printk("ehci wait for port done\n");
/* Enable the debug port */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_CLAIM;
writel(ctrl, &ehci_debug->control);
ctrl = readl(&ehci_debug->control);
if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
dbgp_printk("No device in debug port\n");
writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
goto err;
}
dbgp_printk("debug ported enabled\n");
/* Completely transfer the debug device to the debug controller */
portsc = readl(&ehci_regs->port_status[debug_port - 1]);
portsc &= ~PORT_PE;
writel(portsc, &ehci_regs->port_status[debug_port - 1]);
dbgp_mdelay(100);
/* Find the debug device and make it device number 127 */
for (devnum = 0; devnum <= 127; devnum++) {
ret = dbgp_control_msg(devnum,
USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
&dbgp_desc, sizeof(dbgp_desc));
if (ret > 0)
break;
}
if (devnum > 127) {
dbgp_printk("Could not find attached debug device\n");
goto err;
}
if (ret < 0) {
dbgp_printk("Attached device is not a debug device\n");
goto err;
}
dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
/* Move the device to 127 if it isn't already there */
if (devnum != USB_DEBUG_DEVNUM) {
ret = dbgp_control_msg(devnum,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
if (ret < 0) {
dbgp_printk("Could not move attached device to %d\n",
USB_DEBUG_DEVNUM);
goto err;
}
devnum = USB_DEBUG_DEVNUM;
dbgp_printk("debug device renamed to 127\n");
}
/* Enable the debug interface */
ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
if (ret < 0) {
dbgp_printk(" Could not enable the debug device\n");
goto err;
}
dbgp_printk("debug interface enabled\n");
/* Perform a small write to get the even/odd data state in sync
*/
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
if (ret < 0) {
dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
goto err;
}
dbgp_printk("small write doned\n");
return 0;
err:
/* Things didn't work so remove my claim */
ctrl = readl(&ehci_debug->control);
ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
writel(ctrl, &ehci_debug->control);
return -1;
next_debug_port:
port_map_tried |= (1<<(debug_port - 1));
new_debug_port = ((debug_port-1+1)%n_ports) + 1;
if (port_map_tried != ((1<<n_ports) - 1)) {
set_debug_port(new_debug_port);
goto try_next_port;
}
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
static int __init early_dbgp_init(char *s)
{
u32 debug_port, bar, offset;
u32 bus, slot, func, cap;
void __iomem *ehci_bar;
u32 dbgp_num;
u32 bar_val;
char *e;
int ret;
u8 byte;
if (!early_pci_allowed())
return -1;
dbgp_num = 0;
if (*s)
dbgp_num = simple_strtoul(s, &e, 10);
dbgp_printk("dbgp_num: %d\n", dbgp_num);
cap = find_dbgp(dbgp_num, &bus, &slot, &func);
if (!cap)
return -1;
dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
func);
debug_port = read_pci_config(bus, slot, func, cap);
bar = (debug_port >> 29) & 0x7;
bar = (bar * 4) + 0xc;
offset = (debug_port >> 16) & 0xfff;
dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
if (bar != PCI_BASE_ADDRESS_0) {
dbgp_printk("only debug ports on bar 1 handled.\n");
return -1;
}
bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
dbgp_printk("only simple 32bit mmio bars supported\n");
return -1;
}
/* double check if the mem space is enabled */
byte = read_pci_config_byte(bus, slot, func, 0x04);
if (!(byte & 0x2)) {
byte |= 0x02;
write_pci_config_byte(bus, slot, func, 0x04, byte);
dbgp_printk("mmio for ehci enabled\n");
}
/*
* FIXME I don't have the bar size so just guess PAGE_SIZE is more
* than enough. 1K is the biggest I have seen.
*/
set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
ehci_bar += bar_val & ~PAGE_MASK;
dbgp_printk("ehci_bar: %p\n", ehci_bar);
ehci_caps = ehci_bar;
ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
ehci_debug = ehci_bar + offset;
ehci_dev.bus = bus;
ehci_dev.slot = slot;
ehci_dev.func = func;
detect_set_debug_port();
ret = ehci_setup();
if (ret < 0) {
dbgp_printk("ehci_setup failed\n");
ehci_debug = NULL;
return -1;
}
return 0;
}
static void early_dbgp_write(struct console *con, const char *str, u32 n)
{
int chunk, ret;
if (!ehci_debug)
return;
while (n > 0) {
chunk = n;
if (chunk > DBGP_MAX_PACKET)
chunk = DBGP_MAX_PACKET;
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
dbgp_endpoint_out, str, chunk);
str += chunk;
n -= chunk;
}
}
static struct console early_dbgp_console = {
.name = "earlydbg",
.write = early_dbgp_write,
.flags = CON_PRINTBUFFER,
.index = -1,
};
#endif
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
static int __initdata early_console_initialized;
@@ -891,10 +176,24 @@ asmlinkage void early_printk(const char *fmt, ...)
va_end(ap);
}
static inline void early_console_register(struct console *con, int keep_early)
{
if (early_console->index != -1) {
printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
con->name);
return;
}
early_console = con;
if (keep_early)
early_console->flags &= ~CON_BOOT;
else
early_console->flags |= CON_BOOT;
register_console(early_console);
}
static int __init setup_early_printk(char *buf)
{
int keep_early;
int keep;
if (!buf)
return 0;
@@ -903,42 +202,37 @@ static int __init setup_early_printk(char *buf)
return 0;
early_console_initialized = 1;
keep_early = (strstr(buf, "keep") != NULL);
keep = (strstr(buf, "keep") != NULL);
if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
early_console = &early_serial_console;
} else if (!strncmp(buf, "ttyS", 4)) {
early_serial_init(buf);
early_console = &early_serial_console;
} else if (!strncmp(buf, "vga", 3)
&& boot_params.screen_info.orig_video_isVGA == 1) {
max_xpos = boot_params.screen_info.orig_video_cols;
max_ypos = boot_params.screen_info.orig_video_lines;
current_ypos = boot_params.screen_info.orig_y;
early_console = &early_vga_console;
while (*buf != '\0') {
if (!strncmp(buf, "serial", 6)) {
buf += 6;
early_serial_init(buf);
early_console_register(&early_serial_console, keep);
if (!strncmp(buf, ",ttyS", 5))
buf += 5;
}
if (!strncmp(buf, "ttyS", 4)) {
early_serial_init(buf + 4);
early_console_register(&early_serial_console, keep);
}
if (!strncmp(buf, "vga", 3) &&
boot_params.screen_info.orig_video_isVGA == 1) {
max_xpos = boot_params.screen_info.orig_video_cols;
max_ypos = boot_params.screen_info.orig_video_lines;
current_ypos = boot_params.screen_info.orig_y;
early_console_register(&early_vga_console, keep);
}
#ifdef CONFIG_EARLY_PRINTK_DBGP
} else if (!strncmp(buf, "dbgp", 4)) {
if (early_dbgp_init(buf+4) < 0)
return 0;
early_console = &early_dbgp_console;
/*
* usb subsys will reset ehci controller, so don't keep
* that early console
*/
keep_early = 0;
if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
early_console_register(&early_dbgp_console, keep);
#endif
#ifdef CONFIG_HVC_XEN
} else if (!strncmp(buf, "xen", 3)) {
early_console = &xenboot_console;
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
buf++;
}
if (keep_early)
early_console->flags &= ~CON_BOOT;
else
early_console->flags |= CON_BOOT;
register_console(early_console);
return 0;
}

View File

@@ -42,6 +42,7 @@
#include <asm/time.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
#define EFI_DEBUG 1
#define PFX "EFI: "
@@ -453,6 +454,9 @@ void __init efi_init(void)
if (add_efi_memmap)
do_add_efi_memmap();
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
/* Setup for EFI runtime service */
reboot_type = BOOT_EFI;

View File

@@ -1185,17 +1185,14 @@ END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
pushl $0
pushl %eax
pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
movl %eax, %ecx
popl %edx
popl %ecx
popl %eax
ret
jmp *%ecx
#endif
.section .rodata,"a"

View File

@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller)
END(ftrace_graph_caller)
GLOBAL(return_to_handler)
subq $80, %rsp
subq $24, %rsp
/* Save the return values */
movq %rax, (%rsp)
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
movq %rax, 72(%rsp)
movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $72, %rsp
retq
addq $24, %rsp
jmp *%rdi
#endif
@@ -536,20 +536,13 @@ sysret_signal:
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
/* edx: work flags (arg3) */
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
SAVE_REST
FIXUP_TOP_OF_STACK %r11
call do_notify_resume
RESTORE_TOP_OF_STACK %r11
RESTORE_REST
movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
/*
* We have a signal, or exit tracing or single-step.
* These all wind up with the iret return path anyway,
* so just join that path right now.
*/
FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
jmp int_check_syscall_exit_work
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -654,6 +647,7 @@ int_careful:
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
int_check_syscall_exit_work:
SAVE_REST
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -1021,7 +1015,7 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
#ifdef CONFIG_PERF_COUNTERS
#ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#endif

View File

@@ -9,6 +9,8 @@
* the dangers of modifying code on the run.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data)
switch (faulted) {
case 0:
pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("ftrace: converting mcount calls to jmp . + 5\n");
pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_FTRACE_SYSCALLS
extern unsigned long __start_syscalls_metadata[];
extern unsigned long __stop_syscalls_metadata[];
extern unsigned long *sys_call_table;
static struct syscall_metadata **syscalls_metadata;
static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
unsigned long __init arch_syscall_addr(int nr)
{
struct syscall_metadata *start;
struct syscall_metadata *stop;
char str[KSYM_SYMBOL_LEN];
start = (struct syscall_metadata *)__start_syscalls_metadata;
stop = (struct syscall_metadata *)__stop_syscalls_metadata;
kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
for ( ; start < stop; start++) {
if (start->name && !strcmp(start->name, str))
return start;
}
return NULL;
return (unsigned long)(&sys_call_table)[nr];
}
struct syscall_metadata *syscall_nr_to_meta(int nr)
{
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
return NULL;
return syscalls_metadata[nr];
}
int syscall_name_to_nr(char *name)
{
int i;
if (!syscalls_metadata)
return -1;
for (i = 0; i < NR_syscalls; i++) {
if (syscalls_metadata[i]) {
if (!strcmp(syscalls_metadata[i]->name, name))
return i;
}
}
return -1;
}
void set_syscall_enter_id(int num, int id)
{
syscalls_metadata[num]->enter_id = id;
}
void set_syscall_exit_id(int num, int id)
{
syscalls_metadata[num]->exit_id = id;
}
static int __init arch_init_ftrace_syscalls(void)
{
int i;
struct syscall_metadata *meta;
unsigned long **psys_syscall_table = &sys_call_table;
syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
NR_syscalls, GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
return -ENOMEM;
}
for (i = 0; i < NR_syscalls; i++) {
meta = find_syscall_meta(psys_syscall_table[i]);
syscalls_metadata[i] = meta;
}
return 0;
}
arch_initcall(arch_init_ftrace_syscalls);
#endif

View File

@@ -11,8 +11,21 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
#include <asm/page.h>
#include <asm/trampoline.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
static void __init i386_default_early_setup(void)
{
/* Initilize 32bit specific setup functions */
x86_init.resources.probe_roms = probe_roms;
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
reserve_ebda_region();
}
void __init i386_start_kernel(void)
{
@@ -29,7 +42,16 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
reserve_ebda_region();
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_MRST:
x86_mrst_early_setup();
break;
default:
i386_default_early_setup();
break;
}
/*
* At this point everything still needed from the boot loader

View File

@@ -23,8 +23,8 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
#include <asm/trampoline.h>
#include <asm/bios_ebda.h>
static void __init zap_identity_mappings(void)
{

View File

@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* any particular GDT layout, because we load our own as soon as we
* can.
*/
.section .text.head,"ax",@progbits
__HEAD
ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
@@ -157,6 +157,7 @@ subarch_entries:
.long default_entry /* normal x86/PC */
.long lguest_entry /* lguest hypervisor */
.long xen_entry /* Xen hypervisor */
.long default_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
.previous
#endif /* CONFIG_PARAVIRT */
@@ -607,7 +608,7 @@ ENTRY(initial_code)
/*
* BSS section
*/
.section ".bss.page_aligned","wa"
__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
swapper_pg_pmd:
@@ -625,7 +626,7 @@ ENTRY(empty_zero_page)
* This starts the data section.
*/
#ifdef CONFIG_X86_PAE
.section ".data.page_aligned","wa"
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)

View File

@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
.section .text.head
__HEAD
.code64
.globl startup_64
startup_64:
@@ -418,7 +418,7 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
.section .bss.page_aligned, "aw", @nobits
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.skip PAGE_SIZE

View File

@@ -10,6 +10,16 @@
EXPORT_SYMBOL(mcount);
#endif
/*
* Note, this is a prototype to get at the symbol for
* the export, but dont use it from C code, it is used
* by assembly code and is not using C calling convention!
*/
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
EXPORT_SYMBOL(cmpxchg8b_emu);
#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);

View File

@@ -19,12 +19,6 @@
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
#ifdef CONFIG_X86_32
static void pit_disable_clocksource(void);
#else
static inline void pit_disable_clocksource(void) { }
#endif
/*
* HPET replaces the PIT, when enabled. So we need to know, which of
* the two timers is used
@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode,
outb_pit(0, PIT_CH0);
outb_pit(0, PIT_CH0);
}
pit_disable_clocksource();
break;
case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */
pit_disable_clocksource();
outb_pit(0x38, PIT_MODE);
break;
@@ -200,17 +192,6 @@ static struct clocksource pit_cs = {
.shift = 20,
};
static void pit_disable_clocksource(void)
{
/*
* Use mult to check whether it is registered or not
*/
if (pit_cs.mult) {
clocksource_unregister(&pit_cs);
pit_cs.mult = 0;
}
}
static int __init init_pit_clocksource(void)
{
/*

View File

@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
union thread_union init_thread_union
__attribute__((__section__(".data.init_task"))) =
{ INIT_THREAD_INFO(init_task) };
union thread_union init_thread_union __init_task_data =
{ INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.

View File

@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
#ifdef CONFIG_X86_NEW_MCE
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_threshold_count;
# endif
#endif
#ifdef CONFIG_X86_NEW_MCE
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
sum += per_cpu(mce_poll_count, cpu);
#endif

View File

@@ -116,7 +116,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
return 0;
}
static void __init init_ISA_irqs(void)
void __init init_ISA_irqs(void)
{
int i;
@@ -140,8 +140,10 @@ static void __init init_ISA_irqs(void)
}
}
/* Overridden in paravirt.c */
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
void __init init_IRQ(void)
{
x86_init.irqs.intr_init();
}
static void __init smp_intr_init(void)
{
@@ -190,7 +192,7 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
#endif
@@ -206,39 +208,19 @@ static void __init apic_intr_init(void)
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* Performance monitoring interrupts: */
# ifdef CONFIG_PERF_COUNTERS
# ifdef CONFIG_PERF_EVENTS
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
# endif
#endif
}
/**
* x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
*
* Description:
* Perform any necessary interrupt initialisation prior to setting up
* the "ordinary" interrupt call gates. For legacy reasons, the ISA
* interrupts should be initialised here if the machine emulates a PC
* in any way.
**/
static void __init x86_quirk_pre_intr_init(void)
{
#ifdef CONFIG_X86_32
if (x86_quirks->arch_pre_intr_init) {
if (x86_quirks->arch_pre_intr_init())
return;
}
#endif
init_ISA_irqs();
}
void __init native_init_IRQ(void)
{
int i;
/* Execute any quirks before the call gates are initialised: */
x86_quirk_pre_intr_init();
x86_init.irqs.pre_vector_init();
apic_intr_init();
@@ -257,12 +239,6 @@ void __init native_init_IRQ(void)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
/*
* Call quirks after call gates are initialised (usually add in
* the architecture specific gates):
*/
x86_quirk_intr_init();
/*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.

View File

@@ -22,6 +22,8 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
#define KVM_SCALE 22
@@ -182,12 +184,13 @@ void __init kvmclock_init(void)
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
if (kvm_register_clock("boot clock"))
return;
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
pv_time_ops.get_tsc_khz = kvm_get_tsc_khz;
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
x86_platform.set_wallclock = kvm_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
x86_cpuinit.setup_percpu_clockev =
kvm_setup_secondary_clock;
#endif
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;

View File

@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
if (!cpus_equal(current->mm->cpu_vm_mask,
cpumask_of_cpu(smp_processor_id())))
if (!cpumask_equal(mm_cpumask(current->mm),
cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else

View File

@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
{
ssize_t ret = -EINVAL;
if ((len >> PAGE_SHIFT) > num_physpages) {
pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
if ((len >> PAGE_SHIFT) > totalram_pages) {
pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
.devnode = "cpu/microcode",
.nodename = "cpu/microcode",
.fops = &microcode_fops,
};

View File

@@ -45,6 +45,11 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
int __init default_mpc_apic_id(struct mpc_cpu *m)
{
return m->apicid;
}
static void __init MP_processor_info(struct mpc_cpu *m)
{
int apicid;
@@ -55,10 +60,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
return;
}
if (x86_quirks->mpc_apic_id)
apicid = x86_quirks->mpc_apic_id(m);
else
apicid = m->apicid;
apicid = x86_init.mpparse.mpc_apic_id(m);
if (m->cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)";
@@ -70,16 +72,18 @@ static void __init MP_processor_info(struct mpc_cpu *m)
}
#ifdef CONFIG_X86_IO_APIC
void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
{
memcpy(str, m->bustype, 6);
str[6] = 0;
apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
}
static void __init MP_bus_info(struct mpc_bus *m)
{
char str[7];
memcpy(str, m->bustype, 6);
str[6] = 0;
if (x86_quirks->mpc_oem_bus_info)
x86_quirks->mpc_oem_bus_info(m, str);
else
apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
x86_init.mpparse.mpc_oem_bus_info(m, str);
#if MAX_MP_BUSSES < 256
if (m->busid >= MAX_MP_BUSSES) {
@@ -96,8 +100,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
if (x86_quirks->mpc_oem_pci_bus)
x86_quirks->mpc_oem_pci_bus(m);
if (x86_init.mpparse.mpc_oem_pci_bus)
x86_init.mpparse.mpc_oem_pci_bus(m);
clear_bit(m->busid, mp_bus_not_pci);
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -291,6 +295,8 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
1, mpc, mpc->length, 1);
}
void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -312,16 +318,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (early)
return 1;
if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
}
if (mpc->oemptr)
x86_init.mpparse.smp_read_mpc_oem(mpc);
/*
* Now process the configuration blocks.
*/
if (x86_quirks->mpc_record)
*x86_quirks->mpc_record = 0;
x86_init.mpparse.mpc_record(0);
while (count < mpc->length) {
switch (*mpt) {
@@ -353,8 +356,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
count = mpc->length;
break;
}
if (x86_quirks->mpc_record)
(*x86_quirks->mpc_record)++;
x86_init.mpparse.mpc_record(1);
}
#ifdef CONFIG_X86_BIGSMP
@@ -608,7 +610,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
/*
* Scan the memory blocks for an SMP configuration block.
*/
static void __init __get_smp_config(unsigned int early)
void __init default_get_smp_config(unsigned int early)
{
struct mpf_intel *mpf = mpf_found;
@@ -625,11 +627,6 @@ static void __init __get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
if (x86_quirks->mach_get_smp_config) {
if (x86_quirks->mach_get_smp_config(early))
return;
}
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -670,16 +667,6 @@ static void __init __get_smp_config(unsigned int early)
*/
}
void __init early_get_smp_config(void)
{
__get_smp_config(1);
}
void __init get_smp_config(void)
{
__get_smp_config(0);
}
static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
@@ -745,14 +732,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0;
}
static void __init __find_smp_config(unsigned int reserve)
void __init default_find_smp_config(unsigned int reserve)
{
unsigned int address;
if (x86_quirks->mach_find_smp_config) {
if (x86_quirks->mach_find_smp_config(reserve))
return;
}
/*
* FIXME: Linux assumes you have 640K of base ram..
* this continues the error...
@@ -787,16 +770,6 @@ static void __init __find_smp_config(unsigned int reserve)
smp_scan_config(address, 0x400, reserve);
}
void __init early_find_smp_config(void)
{
__find_smp_config(0);
}
void __init find_smp_config(void)
{
__find_smp_config(1);
}
#ifdef CONFIG_X86_IO_APIC
static u8 __initdata irq_used[MAX_IRQ_SOURCES];

24
arch/x86/kernel/mrst.c Normal file
View File

@@ -0,0 +1,24 @@
/*
* mrst.c: Intel Moorestown platform specific setup code
*
* (C) Copyright 2008 Intel Corporation
* Author: Jacob Pan (jacob.jun.pan@intel.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/init.h>
#include <asm/setup.h>
/*
* Moorestown specific x86_init function overrides and early setup
* calls.
*/
void __init x86_mrst_early_setup(void)
{
x86_init.resources.probe_roms = x86_init_noop;
x86_init.resources.reserve_resources = x86_init_noop;
}

View File

@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
.notifier_call = msr_class_cpu_callback,
};
static char *msr_nodename(struct device *dev)
static char *msr_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
}
@@ -262,7 +262,7 @@ static int __init msr_init(void)
err = PTR_ERR(msr_class);
goto out_chrdev;
}
msr_class->nodename = msr_nodename;
msr_class->devnode = msr_devnode;
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)

View File

@@ -54,17 +54,12 @@ u64 _paravirt_ident_64(u64 x)
return x;
}
static void __init default_banner(void)
void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
pv_info.name);
}
char *memory_setup(void)
{
return pv_init_ops.memory_setup();
}
/* Simple instruction patching code. */
#define DEF_NATIVE(ops, name, code) \
extern const char start_##ops##_##name[], end_##ops##_##name[]; \
@@ -188,11 +183,6 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
return insn_len;
}
void init_IRQ(void)
{
pv_irq_ops.init_IRQ();
}
static void native_flush_tlb(void)
{
__native_flush_tlb();
@@ -218,13 +208,6 @@ extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
static int __init print_banner(void)
{
pv_init_ops.banner();
return 0;
}
core_initcall(print_banner);
static struct resource reserve_ioports = {
.start = 0,
.end = IO_SPACE_LIMIT,
@@ -320,21 +303,13 @@ struct pv_info pv_info = {
struct pv_init_ops pv_init_ops = {
.patch = native_patch,
.banner = default_banner,
.arch_setup = paravirt_nop,
.memory_setup = machine_specific_memory_setup,
};
struct pv_time_ops pv_time_ops = {
.time_init = hpet_time_init,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
.sched_clock = native_sched_clock,
.get_tsc_khz = native_calibrate_tsc,
};
struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ,
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -409,8 +384,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
#endif
};
@@ -424,13 +397,6 @@ struct pv_apic_ops pv_apic_ops = {
#endif
struct pv_mmu_ops pv_mmu_ops = {
#ifndef CONFIG_X86_64
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
#else
.pagetable_setup_start = paravirt_nop,
.pagetable_setup_done = paravirt_nop,
#endif
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,

View File

@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
/*
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
* If this variable is 1, IOMMU implementations do no DMA ranslation for
* If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is
* useful if a user want to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
@@ -225,10 +225,8 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
if (!strncmp(p, "pt", 2)) {
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
return 1;
}
gart_parse_options(p);
@@ -313,7 +311,7 @@ void pci_iommu_shutdown(void)
amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */

View File

@@ -16,6 +16,7 @@
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>

View File

@@ -46,9 +46,8 @@ void __init pci_swiotlb_init(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
iommu_pass_through)
swiotlb = 1;
if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
swiotlb = 1;
#endif
if (swiotlb_force)
swiotlb = 1;

View File

@@ -9,7 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
#include <trace/power.h>
#include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -27,9 +27,6 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
DEFINE_TRACE(power_start);
DEFINE_TRACE(power_end);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
@@ -289,9 +286,7 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, 1);
trace_power_start(POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -304,7 +299,6 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -362,9 +356,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
trace_power_start(POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -374,15 +366,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
if (!need_resched())
__mwait(ax, cx);
}
trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
struct power_trace it;
if (!need_resched()) {
trace_power_start(&it, POWER_CSTATE, 1);
trace_power_start(POWER_CSTATE, 1);
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -392,7 +382,6 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
trace_power_end(&it);
} else
local_irq_enable();
}
@@ -404,13 +393,11 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
struct power_trace it;
trace_power_start(&it, POWER_CSTATE, 0);
trace_power_start(POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
trace_power_end(&it);
trace_power_end(0);
}
/*
@@ -558,10 +545,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
if (pm_idle == c1e_idle) {
alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
cpumask_clear(c1e_mask);
}
if (pm_idle == c1e_idle)
zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)

View File

@@ -312,16 +312,6 @@ static int putreg(struct task_struct *child,
return set_flags(child, value);
#ifdef CONFIG_X86_64
/*
* Orig_ax is really just a flag with small positive and
* negative values, so make sure to always sign-extend it
* from 32 bits so that it works correctly regardless of
* whether we come from a 32-bit environment or not.
*/
case offsetof(struct user_regs_struct, orig_ax):
value = (long) (s32) value;
break;
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
@@ -1177,10 +1167,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
case offsetof(struct user32, regs.orig_eax):
/*
* Sign-extend the value so that orig_eax = -1
* causes (long)orig_ax < 0 tests to fire correctly.
* A 32-bit debugger setting orig_eax means to restore
* the state of the task restarting a 32-bit syscall.
* Make sure we interpret the -ERESTART* codes correctly
* in case the task is not actually still sitting at the
* exit from a 32-bit syscall with TS_COMPAT still set.
*/
regs->orig_ax = (long) (s32) value;
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
task_thread_info(child)->status |= TS_COMPAT;
break;
case offsetof(struct user32, regs.eflags):

View File

@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
pci_read_config_dword(nb_ht, 0x60, &val);
set_dev_node(&dev->dev, val & 7);
pci_dev_put(dev);
pci_dev_put(nb_ht);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,

View File

@@ -4,6 +4,8 @@
#include <linux/pm.h>
#include <linux/efi.h>
#include <linux/dmi.h>
#include <linux/sched.h>
#include <linux/tboot.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -508,6 +510,8 @@ static void native_machine_emergency_restart(void)
if (reboot_emergency)
emergency_vmx_disable_all();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
/* Tell the BIOS if we want cold or warm reboot */
*((unsigned short *)__va(0x472)) = reboot_mode;
@@ -634,6 +638,8 @@ static void native_machine_halt(void)
/* stop other cpus and apics */
machine_shutdown();
tboot_shutdown(TB_SHUTDOWN_HALT);
/* stop this cpu */
stop_this_cpu(NULL);
}
@@ -645,6 +651,8 @@ static void native_machine_power_off(void)
machine_shutdown();
pm_power_off();
}
/* a fallback in case there is no PM info available */
tboot_shutdown(TB_SHUTDOWN_HALT);
}
struct machine_ops machine_ops = {

View File

@@ -8,6 +8,7 @@
#include <linux/pnp.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/time.h>
#ifdef CONFIG_X86_32
@@ -165,33 +166,29 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
}
EXPORT_SYMBOL(rtc_cmos_write);
static int set_rtc_mmss(unsigned long nowtime)
int update_persistent_clock(struct timespec now)
{
unsigned long flags;
int retval;
spin_lock_irqsave(&rtc_lock, flags);
retval = set_wallclock(nowtime);
retval = x86_platform.set_wallclock(now.tv_sec);
spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
/* not static: needed by APM */
unsigned long read_persistent_clock(void)
void read_persistent_clock(struct timespec *ts)
{
unsigned long retval, flags;
spin_lock_irqsave(&rtc_lock, flags);
retval = get_wallclock();
retval = x86_platform.get_wallclock();
spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
int update_persistent_clock(struct timespec now)
{
return set_rtc_mmss(now.tv_sec);
ts->tv_sec = retval;
ts->tv_nsec = 0;
}
unsigned long long native_read_tsc(void)

View File

@@ -27,6 +27,7 @@
#include <linux/screen_info.h>
#include <linux/ioport.h>
#include <linux/acpi.h>
#include <linux/sfi.h>
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
@@ -66,6 +67,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
#include <video/edid.h>
@@ -108,10 +110,6 @@
#include <asm/numa_64.h>
#endif
#ifndef ARCH_SETUP
#define ARCH_SETUP
#endif
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -133,9 +131,9 @@ int default_cpu_present_to_apicid(int mps_cpu)
return __default_cpu_present_to_apicid(mps_cpu);
}
int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
int default_check_phys_apicid_present(int phys_apicid)
{
return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
return __default_check_phys_apicid_present(phys_apicid);
}
#endif
@@ -171,13 +169,6 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
static struct resource video_ram_resource = {
.name = "Video RAM area",
.start = 0xa0000,
.end = 0xbffff,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
/* common cpu data for all cpus */
@@ -605,7 +596,7 @@ static struct resource standard_io_resources[] = {
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
};
static void __init reserve_standard_io_resources(void)
void __init reserve_standard_io_resources(void)
{
int i;
@@ -637,10 +628,6 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
@@ -757,7 +744,7 @@ void __init setup_arch(char **cmdline_p)
}
#endif
ARCH_SETUP
x86_init.oem.arch_setup();
setup_memory_map();
parse_setup_data();
@@ -796,6 +783,16 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
#ifdef CONFIG_X86_64
/*
* Must call this twice: Once just to detect whether hardware doesn't
* support NX (so that the early EHCI debug console setup can safely
* call set_fixmap(), and then again after parsing early parameters to
* honor the respective command line option.
*/
check_efer();
#endif
parse_early_param();
#ifdef CONFIG_X86_64
@@ -833,11 +830,9 @@ void __init setup_arch(char **cmdline_p)
* VMware detection requires dmi to be available, so this
* needs to be done after dmi_scan_machine, for the BP.
*/
init_hypervisor(&boot_cpu_data);
init_hypervisor_platform();
#ifdef CONFIG_X86_32
probe_roms();
#endif
x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */
insert_resource(&iomem_resource, &code_resource);
@@ -972,10 +967,11 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif
paravirt_pagetable_setup_start(swapper_pg_dir);
x86_init.paging.pagetable_setup_start(swapper_pg_dir);
paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
paravirt_post_allocator_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
tboot_probe();
#ifdef CONFIG_X86_64
map_vsyscall();
@@ -990,13 +986,13 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_init();
#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
sfi_init();
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
#endif
prefill_possible_map();
@@ -1015,10 +1011,7 @@ void __init setup_arch(char **cmdline_p)
e820_reserve_resources();
e820_mark_nosave_regions(max_low_pfn);
#ifdef CONFIG_X86_32
request_resource(&iomem_resource, &video_ram_resource);
#endif
reserve_standard_io_resources();
x86_init.resources.reserve_resources();
e820_setup_gap();
@@ -1030,78 +1023,22 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
x86_init.oem.banner();
}
#ifdef CONFIG_X86_32
/**
* x86_quirk_intr_init - post gate setup interrupt initialisation
*
* Description:
* Fill in any interrupts that may have been left out by the general
* init_IRQ() routine. interrupts having to do with the machine rather
* than the devices on the I/O bus (like APIC interrupts in intel MP
* systems) are started here.
**/
void __init x86_quirk_intr_init(void)
{
if (x86_quirks->arch_intr_init) {
if (x86_quirks->arch_intr_init())
return;
}
}
/**
* x86_quirk_trap_init - initialise system specific traps
*
* Description:
* Called as the final act of trap_init(). Used in VISWS to initialise
* the various board specific APIC traps.
**/
void __init x86_quirk_trap_init(void)
{
if (x86_quirks->arch_trap_init) {
if (x86_quirks->arch_trap_init())
return;
}
}
static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
.name = "timer"
static struct resource video_ram_resource = {
.name = "Video RAM area",
.start = 0xa0000,
.end = 0xbffff,
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
/**
* x86_quirk_pre_time_init - do any specific initialisations before.
*
**/
void __init x86_quirk_pre_time_init(void)
void __init i386_reserve_resources(void)
{
if (x86_quirks->arch_pre_time_init)
x86_quirks->arch_pre_time_init();
request_resource(&iomem_resource, &video_ram_resource);
reserve_standard_io_resources();
}
/**
* x86_quirk_time_init - do any specific initialisations for the system timer.
*
* Description:
* Must plug the system timer interrupt source at HZ into the IRQ listed
* in irq_vectors.h:TIMER_IRQ
**/
void __init x86_quirk_time_init(void)
{
if (x86_quirks->arch_time_init) {
/*
* A nonzero return code does not mean failure, it means
* that the architecture quirk does not want any
* generic (timer) setup to be performed after this:
*/
if (x86_quirks->arch_time_init())
return;
}
irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
#endif /* CONFIG_X86_32 */

View File

@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
#define PERCPU_FIRST_CHUNK_RESERVE 0
#endif
#ifdef CONFIG_X86_32
/**
* pcpu_need_numa - determine percpu allocation needs to consider NUMA
*
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void)
#endif
return false;
}
#endif
/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
}
/*
* Large page remap allocator
*
* This allocator uses PMD page as unit. A PMD page is allocated for
* each cpu and each is remapped into vmalloc area using PMD mapping.
* As PMD page is quite large, only part of it is used for the first
* chunk. Unused part is returned to the bootmem allocator.
*
* So, the PMD pages are mapped twice - once to the physical mapping
* and to the vmalloc area for the first percpu chunk. The double
* mapping does add one more PMD TLB entry pressure but still is much
* better than only using 4k mappings while still being NUMA friendly.
* Helpers for first chunk memory allocation
*/
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
return pcpu_alloc_bootmem(cpu, size, align);
}
static void __init pcpu_fc_free(void *ptr, size_t size)
{
free_bootmem(__pa(ptr), size);
}
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
struct pcpul_ent {
unsigned int cpu;
void *ptr;
};
static size_t pcpul_size;
static struct pcpul_ent *pcpul_map;
static struct vm_struct pcpul_vm;
static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpul_size)
return NULL;
return virt_to_page(pcpul_map[cpu].ptr + off);
}
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
size_t map_size, dyn_size;
unsigned int cpu;
int i, j;
ssize_t ret;
if (!chosen) {
size_t vm_size = VMALLOC_END - VMALLOC_START;
size_t tot_size = nr_cpu_ids * PMD_SIZE;
/* on non-NUMA, embedding is better */
if (!pcpu_need_numa())
return -EINVAL;
/* don't consume more than 20% of vmalloc area */
if (tot_size > vm_size / 5) {
pr_info("PERCPU: too large chunk size %zuMB for "
"large page remap\n", tot_size >> 20);
return -EINVAL;
}
}
/* need PSE */
if (!cpu_has_pse) {
pr_warning("PERCPU: lpage allocator requires PSE\n");
return -EINVAL;
}
/*
* Currently supports only single page. Supporting multiple
* pages won't be too difficult if it ever becomes necessary.
*/
pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
if (pcpul_size > PMD_SIZE) {
pr_warning("PERCPU: static data is larger than large page, "
"can't use large page\n");
return -EINVAL;
}
dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
/* allocate pointer array and alloc large pages */
map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
pcpul_map = alloc_bootmem(map_size);
for_each_possible_cpu(cpu) {
pcpul_map[cpu].cpu = cpu;
pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
PMD_SIZE);
if (!pcpul_map[cpu].ptr) {
pr_warning("PERCPU: failed to allocate large page "
"for cpu%u\n", cpu);
goto enomem;
}
/*
* Only use pcpul_size bytes and give back the rest.
*
* Ingo: The 2MB up-rounding bootmem is needed to make
* sure the partial 2MB page is still fully RAM - it's
* not well-specified to have a PAT-incompatible area
* (unmapped RAM, device memory, etc.) in that hole.
*/
free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
PMD_SIZE - pcpul_size);
memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
}
/* allocate address and map */
pcpul_vm.flags = VM_ALLOC;
pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
vm_area_register_early(&pcpul_vm, PMD_SIZE);
for_each_possible_cpu(cpu) {
pmd_t *pmd, pmd_v;
pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
cpu * PMD_SIZE);
pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
PAGE_KERNEL_LARGE);
set_pmd(pmd, pmd_v);
}
/* we're ready, commit */
pr_info("PERCPU: Remapped at %p with large pages, static data "
"%zu bytes\n", pcpul_vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, pcpul_vm.addr, NULL);
/* sort pcpul_map array for pcpu_lpage_remapped() */
for (i = 0; i < nr_cpu_ids - 1; i++)
for (j = i + 1; j < nr_cpu_ids; j++)
if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
struct pcpul_ent tmp = pcpul_map[i];
pcpul_map[i] = pcpul_map[j];
pcpul_map[j] = tmp;
}
return ret;
enomem:
for_each_possible_cpu(cpu)
if (pcpul_map[cpu].ptr)
free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
free_bootmem(__pa(pcpul_map), map_size);
return -ENOMEM;
}
/**
* pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
* @kaddr: the kernel address in question
*
* Determine whether @kaddr falls in the pcpul recycled area. This is
* used by pageattr to detect VM aliases and break up the pcpu PMD
* mapping such that the same physical page is not mapped under
* different attributes.
*
* The recycled area is always at the tail of a partially used PMD
* page.
*
* RETURNS:
* Address of corresponding remapped pcpu address if match is found;
* otherwise, NULL.
*/
void *pcpu_lpage_remapped(void *kaddr)
{
void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
int left = 0, right = nr_cpu_ids - 1;
int pos;
/* pcpul in use at all? */
if (!pcpul_map)
return NULL;
/* okay, perform binary search */
while (left <= right) {
pos = (left + right) / 2;
if (pcpul_map[pos].ptr < pmd_addr)
left = pos + 1;
else if (pcpul_map[pos].ptr > pmd_addr)
right = pos - 1;
else {
/* it shouldn't be in the area for the first chunk */
WARN_ON(offset < pcpul_size);
return pcpul_vm.addr +
pcpul_map[pos].cpu * PMD_SIZE + offset;
}
}
return NULL;
}
if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
#else
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
return -EINVAL;
}
return LOCAL_DISTANCE;
#endif
/*
* Embedding allocator
*
* The first chunk is sized to just contain the static area plus
* module and dynamic reserves and embedded into linear physical
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
{
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
/*
* If large page isn't supported, there's no benefit in doing
* this. Also, embedding allocation doesn't play well with
* NUMA.
*/
if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
return -EINVAL;
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
}
/*
* 4k page allocator
*
* This is the basic allocator. Static percpu area is allocated
* page-by-page and most of initialization is done by the generic
* setup function.
*/
static struct page **pcpu4k_pages __initdata;
static int pcpu4k_nr_static_pages __initdata;
static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
{
if (pageno < pcpu4k_nr_static_pages)
return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
return NULL;
}
static void __init pcpu4k_populate_pte(unsigned long addr)
static void __init pcpup_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
static ssize_t __init setup_pcpu_4k(size_t static_size)
{
size_t pages_size;
unsigned int cpu;
int i, j;
ssize_t ret;
pcpu4k_nr_static_pages = PFN_UP(static_size);
/* unaligned allocations can't be freed, round up to page size */
pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
* sizeof(pcpu4k_pages[0]));
pcpu4k_pages = alloc_bootmem(pages_size);
/* allocate and copy */
j = 0;
for_each_possible_cpu(cpu)
for (i = 0; i < pcpu4k_nr_static_pages; i++) {
void *ptr;
ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
if (!ptr) {
pr_warning("PERCPU: failed to allocate "
"4k page for cpu%u\n", cpu);
goto enomem;
}
memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
pcpu4k_pages[j++] = virt_to_page(ptr);
}
/* we're ready, commit */
pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, -1,
-1, NULL, pcpu4k_populate_pte);
goto out_free_ar;
enomem:
while (--j >= 0)
free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
ret = -ENOMEM;
out_free_ar:
free_bootmem(__pa(pcpu4k_pages), pages_size);
return ret;
}
/* for explicit first chunk allocator selection */
static char pcpu_chosen_alloc[16] __initdata;
static int __init percpu_alloc_setup(char *str)
{
strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
return 0;
}
early_param("percpu_alloc", percpu_alloc_setup);
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu)
void __init setup_per_cpu_areas(void)
{
size_t static_size = __per_cpu_end - __per_cpu_start;
unsigned int cpu;
unsigned long delta;
size_t pcpu_unit_size;
ssize_t ret;
int rc;
pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/*
* Allocate percpu area. If PSE is supported, try to make use
* of large page mappings. Please read comments on top of
* each allocator for details.
* Allocate percpu area. Embedding allocator is our favorite;
* however, on NUMA configurations, it can result in very
* sparse unit mapping and vmalloc area isn't spacious enough
* on 32bit. Use page in that case.
*/
ret = -EINVAL;
if (strlen(pcpu_chosen_alloc)) {
if (strcmp(pcpu_chosen_alloc, "4k")) {
if (!strcmp(pcpu_chosen_alloc, "lpage"))
ret = setup_pcpu_lpage(static_size, true);
else if (!strcmp(pcpu_chosen_alloc, "embed"))
ret = setup_pcpu_embed(static_size, true);
else
pr_warning("PERCPU: unknown allocator %s "
"specified\n", pcpu_chosen_alloc);
if (ret < 0)
pr_warning("PERCPU: %s allocator failed (%zd), "
"falling back to 4k\n",
pcpu_chosen_alloc, ret);
}
} else {
ret = setup_pcpu_lpage(static_size, false);
if (ret < 0)
ret = setup_pcpu_embed(static_size, false);
}
if (ret < 0)
ret = setup_pcpu_4k(static_size);
if (ret < 0)
panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
static_size, ret);
#ifdef CONFIG_X86_32
if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
rc = -EINVAL;
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
const size_t dyn_size = PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
pcpu_unit_size = ret;
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
pr_warning("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
pcpu_fc_alloc, pcpu_fc_free,
pcpup_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
/* alrighty, percpu areas up and running */
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);

122
arch/x86/kernel/sfi.c Normal file
View File

@@ -0,0 +1,122 @@
/*
* sfi.c - x86 architecture SFI support.
*
* Copyright (c) 2009, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#define KMSG_COMPONENT "SFI"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/io.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
#ifdef CONFIG_X86_LOCAL_APIC
static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
void __init mp_sfi_register_lapic_address(unsigned long address)
{
mp_lapic_addr = address;
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
if (boot_cpu_physical_apicid == -1U)
boot_cpu_physical_apicid = read_apic_id();
pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
}
/* All CPUs enumerated by SFI must be present and enabled */
void __cpuinit mp_sfi_register_lapic(u8 id)
{
if (MAX_APICS - id <= 0) {
pr_warning("Processor #%d invalid (max %d)\n",
id, MAX_APICS);
return;
}
pr_info("registering lapic[%d]\n", id);
generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
}
static int __init sfi_parse_cpus(struct sfi_table_header *table)
{
struct sfi_table_simple *sb;
struct sfi_cpu_table_entry *pentry;
int i;
int cpu_num;
sb = (struct sfi_table_simple *)table;
cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
pentry = (struct sfi_cpu_table_entry *)sb->pentry;
for (i = 0; i < cpu_num; i++) {
mp_sfi_register_lapic(pentry->apic_id);
pentry++;
}
smp_found_config = 1;
return 0;
}
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
static u32 gsi_base;
static int __init sfi_parse_ioapic(struct sfi_table_header *table)
{
struct sfi_table_simple *sb;
struct sfi_apic_table_entry *pentry;
int i, num;
sb = (struct sfi_table_simple *)table;
num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
pentry = (struct sfi_apic_table_entry *)sb->pentry;
for (i = 0; i < num; i++) {
mp_register_ioapic(i, pentry->phys_addr, gsi_base);
gsi_base += io_apic_get_redir_entries(i);
pentry++;
}
WARN(pic_mode, KERN_WARNING
"SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
pic_mode = 0;
return 0;
}
#endif /* CONFIG_X86_IO_APIC */
/*
* sfi_platform_init(): register lapics & io-apics
*/
int __init sfi_platform_init(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
mp_sfi_register_lapic_address(sfi_lapic_addr);
sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
#endif
#ifdef CONFIG_X86_IO_APIC
sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
#endif
return 0;
}

View File

@@ -847,7 +847,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
#ifdef CONFIG_X86_NEW_MCE
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_process();

View File

@@ -47,6 +47,7 @@
#include <linux/bootmem.h>
#include <linux/err.h>
#include <linux/nmi.h>
#include <linux/tboot.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -324,7 +325,7 @@ notrace static void __cpuinit start_secondary(void *unused)
/* enable local interrupts */
local_irq_enable();
setup_secondary_clock();
x86_cpuinit.setup_percpu_clockev();
wmb();
load_debug_registers();
@@ -1060,12 +1061,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
#endif
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
cpumask_clear(per_cpu(cpu_core_map, i));
cpumask_clear(per_cpu(cpu_sibling_map, i));
cpumask_clear(cpu_data(i).llc_shared_map);
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -1115,13 +1113,26 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
setup_boot_clock();
x86_init.timers.setup_percpu_clockev();
if (is_uv_system())
uv_system_init();
set_mtrr_aps_delayed_init();
out:
preempt_enable();
}
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
}
void arch_enable_nonboot_cpus_end(void)
{
mtrr_aps_init();
}
/*
* Early setup to make printk work.
*/
@@ -1143,6 +1154,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
check_nmi_watchdog();
mtrr_aps_init();
}
static int __initdata setup_possible_cpus = -1;
@@ -1321,6 +1333,7 @@ void play_dead_common(void)
void native_play_dead(void)
{
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
wbinvd_halt();
}

View File

@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
.long sys_preadv
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_counter_open
.long sys_perf_event_open

447
arch/x86/kernel/tboot.c Normal file
View File

@@ -0,0 +1,447 @@
/*
* tboot.c: main implementation of helper functions used by kernel for
* runtime support of Intel(R) Trusted Execution Technology
*
* Copyright (c) 2006-2009, Intel Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/dma_remapping.h>
#include <linux/init_task.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/dmar.h>
#include <linux/cpu.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/tboot.h>
#include <asm/trampoline.h>
#include <asm/processor.h>
#include <asm/bootparam.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/e820.h>
#include <asm/io.h>
#include "acpi/realmode/wakeup.h"
/* Global pointer to shared data; NULL means no measured launch. */
struct tboot *tboot __read_mostly;
/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
#define AP_WAIT_TIMEOUT 1
#undef pr_fmt
#define pr_fmt(fmt) "tboot: " fmt
static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
void __init tboot_probe(void)
{
/* Look for valid page-aligned address for shared page. */
if (!boot_params.tboot_addr)
return;
/*
* also verify that it is mapped as we expect it before calling
* set_fixmap(), to reduce chance of garbage value causing crash
*/
if (!e820_any_mapped(boot_params.tboot_addr,
boot_params.tboot_addr, E820_RESERVED)) {
pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n");
return;
}
/* only a natively booted kernel should be using TXT */
if (paravirt_enabled()) {
pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
return;
}
/* Map and check for tboot UUID. */
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
pr_warning("tboot at 0x%llx is invalid\n",
boot_params.tboot_addr);
tboot = NULL;
return;
}
if (tboot->version < 5) {
pr_warning("tboot version is invalid: %u\n", tboot->version);
tboot = NULL;
return;
}
pr_info("found shared page at phys addr 0x%llx:\n",
boot_params.tboot_addr);
pr_debug("version: %d\n", tboot->version);
pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
}
static pgd_t *tboot_pg_dir;
static struct mm_struct tboot_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.cpu_vm_mask = CPU_MASK_ALL,
};
static inline void switch_to_tboot_pt(void)
{
write_cr3(virt_to_phys(tboot_pg_dir));
}
static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(&tboot_mm, vaddr);
pud = pud_alloc(&tboot_mm, pgd, vaddr);
if (!pud)
return -1;
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd)
return -1;
pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
if (!pte)
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
pte_unmap(pte);
return 0;
}
static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
unsigned long nr)
{
/* Reuse the original kernel mapping */
tboot_pg_dir = pgd_alloc(&tboot_mm);
if (!tboot_pg_dir)
return -1;
for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
return -1;
}
return 0;
}
static void tboot_create_trampoline(void)
{
u32 map_base, map_size;
/* Create identity map for tboot shutdown code. */
map_base = PFN_DOWN(tboot->tboot_base);
map_size = PFN_UP(tboot->tboot_size);
if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
map_base, map_size);
}
#ifdef CONFIG_ACPI_SLEEP
static void add_mac_region(phys_addr_t start, unsigned long size)
{
struct tboot_mac_region *mr;
phys_addr_t end = start + size;
if (start && size) {
mr = &tboot->mac_regions[tboot->num_mac_regions++];
mr->start = round_down(start, PAGE_SIZE);
mr->size = round_up(end, PAGE_SIZE) - mr->start;
}
}
static int tboot_setup_sleep(void)
{
tboot->num_mac_regions = 0;
/* S3 resume code */
add_mac_region(acpi_wakeup_address, WAKEUP_SIZE);
#ifdef CONFIG_X86_TRAMPOLINE
/* AP trampoline code */
add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE);
#endif
/* kernel code + data + bss */
add_mac_region(virt_to_phys(_text), _end - _text);
tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
return 0;
}
#else /* no CONFIG_ACPI_SLEEP */
static int tboot_setup_sleep(void)
{
/* S3 shutdown requested, but S3 not supported by the kernel... */
BUG();
return -1;
}
#endif
void tboot_shutdown(u32 shutdown_type)
{
void (*shutdown)(void);
if (!tboot_enabled())
return;
/*
* if we're being called before the 1:1 mapping is set up then just
* return and let the normal shutdown happen; this should only be
* due to very early panic()
*/
if (!tboot_pg_dir)
return;
/* if this is S3 then set regions to MAC */
if (shutdown_type == TB_SHUTDOWN_S3)
if (tboot_setup_sleep())
return;
tboot->shutdown_type = shutdown_type;
switch_to_tboot_pt();
shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry;
shutdown();
/* should not reach here */
while (1)
halt();
}
static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
{
#define TB_COPY_GAS(tbg, g) \
tbg.space_id = g.space_id; \
tbg.bit_width = g.bit_width; \
tbg.bit_offset = g.bit_offset; \
tbg.access_width = g.access_width; \
tbg.address = g.address;
TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block);
TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block);
TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block);
TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block);
/*
* We need phys addr of waking vector, but can't use virt_to_phys() on
* &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys
* addr.
*/
tboot->acpi_sinfo.wakeup_vector = fadt->facs +
offsetof(struct acpi_table_facs, firmware_waking_vector);
}
void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
{
static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
/* S0,1,2: */ -1, -1, -1,
/* S3: */ TB_SHUTDOWN_S3,
/* S4: */ TB_SHUTDOWN_S4,
/* S5: */ TB_SHUTDOWN_S5 };
if (!tboot_enabled())
return;
tboot_copy_fadt(&acpi_gbl_FADT);
tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control;
/* we always use the 32b wakeup vector */
tboot->acpi_sinfo.vector_width = 32;
if (sleep_state >= ACPI_S_STATE_COUNT ||
acpi_shutdown_map[sleep_state] == -1) {
pr_warning("unsupported sleep state 0x%x\n", sleep_state);
return;
}
tboot_shutdown(acpi_shutdown_map[sleep_state]);
}
static atomic_t ap_wfs_count;
static int tboot_wait_for_aps(int num_aps)
{
unsigned long timeout;
timeout = AP_WAIT_TIMEOUT*HZ;
while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
timeout) {
mdelay(1);
timeout--;
}
if (timeout)
pr_warning("tboot wait for APs timeout\n");
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
}
static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action) {
case CPU_DYING:
atomic_inc(&ap_wfs_count);
if (num_online_cpus() == 1)
if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
return NOTIFY_BAD;
break;
}
return NOTIFY_OK;
}
static struct notifier_block tboot_cpu_notifier __cpuinitdata =
{
.notifier_call = tboot_cpu_callback,
};
static __init int tboot_late_init(void)
{
if (!tboot_enabled())
return 0;
tboot_create_trampoline();
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
return 0;
}
late_initcall(tboot_late_init);
/*
* TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
*/
#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000
#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000
/* # pages for each config regs space - used by fixmap */
#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \
TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
/* offsets from pub/priv config space */
#define TXTCR_HEAP_BASE 0x0300
#define TXTCR_HEAP_SIZE 0x0308
#define SHA1_SIZE 20
struct sha1_hash {
u8 hash[SHA1_SIZE];
};
struct sinit_mle_data {
u32 version; /* currently 6 */
struct sha1_hash bios_acm_id;
u32 edx_senter_flags;
u64 mseg_valid;
struct sha1_hash sinit_hash;
struct sha1_hash mle_hash;
struct sha1_hash stm_hash;
struct sha1_hash lcp_policy_hash;
u32 lcp_policy_control;
u32 rlp_wakeup_addr;
u32 reserved;
u32 num_mdrs;
u32 mdrs_off;
u32 num_vtd_dmars;
u32 vtd_dmars_off;
} __packed;
struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl)
{
void *heap_base, *heap_ptr, *config;
if (!tboot_enabled())
return dmar_tbl;
/*
* ACPI tables may not be DMA protected by tboot, so use DMAR copy
* SINIT saved in SinitMleData in TXT heap (which is DMA protected)
*/
/* map config space in order to get heap addr */
config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES *
PAGE_SIZE);
if (!config)
return NULL;
/* now map TXT heap */
heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE),
*(u64 *)(config + TXTCR_HEAP_SIZE));
iounmap(config);
if (!heap_base)
return NULL;
/* walk heap to SinitMleData */
/* skip BiosData */
heap_ptr = heap_base + *(u64 *)heap_base;
/* skip OsMleData */
heap_ptr += *(u64 *)heap_ptr;
/* skip OsSinitData */
heap_ptr += *(u64 *)heap_ptr;
/* now points to SinitMleDataSize; set to SinitMleData */
heap_ptr += sizeof(u64);
/* get addr of DMAR table */
dmar_tbl = (struct acpi_table_header *)(heap_ptr +
((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off -
sizeof(u64));
/* don't unmap heap because dmar.c needs access to this */
return dmar_tbl;
}
int tboot_force_iommu(void)
{
if (!tboot_enabled())
return 0;
if (no_iommu || swiotlb || dmar_disabled)
pr_warning("Forcing Intel-IOMMU to enabled\n");
dmar_disabled = 0;
#ifdef CONFIG_SWIOTLB
swiotlb = 0;
#endif
no_iommu = 0;
return 1;
}

121
arch/x86/kernel/time.c Normal file
View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 1991,1992,1995 Linus Torvalds
* Copyright (c) 1994 Alan Modra
* Copyright (c) 1995 Markus Kuhn
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
* Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
*
*/
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/mca.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/i8259.h>
#include <asm/i8253.h>
#include <asm/timer.h>
#include <asm/hpet.h>
#include <asm/time.h>
#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
int timer_ack;
#endif
#ifdef CONFIG_X86_64
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
unsigned long *sp =
(unsigned long *)kernel_stack_pointer(regs);
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
* kernel addresses don't.
*/
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
#endif
}
return pc;
}
EXPORT_SYMBOL(profile_pc);
/*
* Default timer interrupt handler for PIT/HPET
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
/* Keep nmi watchdog up to date */
inc_irq_stat(irq0_irqs);
/* Optimized out for !IO_APIC and x86_64 */
if (timer_ack) {
/*
* Subtle, when I/O APICs are used we have to ack timer IRQ
* manually to deassert NMI lines for the watchdog if run
* on an 82489DX-based system.
*/
spin_lock(&i8259A_lock);
outb(0x0c, PIC_MASTER_OCW3);
/* Ack the IRQ; AEOI will end it automatically. */
inb(PIC_MASTER_POLL);
spin_unlock(&i8259A_lock);
}
global_clock_event->event_handler(global_clock_event);
/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
if (MCA_bus)
outb_p(inb_p(0x61)| 0x80, 0x61);
return IRQ_HANDLED;
}
static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
.name = "timer"
};
void __init setup_default_timer_irq(void)
{
setup_irq(0, &irq0);
}
/* Default timer init function */
void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
setup_default_timer_irq();
}
static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init();
tsc_init();
}
/*
* Initialize TSC and delay the periodic timer init to
* late x86_late_time_init() so ioremap works.
*/
void __init time_init(void)
{
late_time_init = x86_late_time_init;
}

View File

@@ -1,137 +0,0 @@
/*
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*
* This file contains the PC-specific time handling details:
* reading the RTC at bootup, etc..
* 1994-07-02 Alan Modra
* fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
* 1995-03-26 Markus Kuhn
* fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
* precision CMOS clock update
* 1996-05-03 Ingo Molnar
* fixed time warps in do_[slow|fast]_gettimeoffset()
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1998-09-05 (Various)
* More robust do_fast_gettimeoffset() algorithm implemented
* (works with APM, Cyrix 6x86MX and Centaur C6),
* monotonic gettimeofday() with fast_get_timeoffset(),
* drift-proof precision TSC calibration on boot
* (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
* Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
* ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
* 1998-12-16 Andrea Arcangeli
* Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
* because was not accounting lost_ticks.
* 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
* Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
* serialize accesses to xtime/lost_ticks).
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/mca.h>
#include <asm/setup.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/timer.h>
#include <asm/do_timer.h>
int timer_ack;
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
#ifdef CONFIG_SMP
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
unsigned long *sp = (unsigned long *)&regs->sp;
/* Return address is either directly at stack pointer
or above a saved flags. Eflags has bits 22-31 zero,
kernel addresses don't. */
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
#endif
}
#endif
return pc;
}
EXPORT_SYMBOL(profile_pc);
/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
irqreturn_t timer_interrupt(int irq, void *dev_id)
{
/* Keep nmi watchdog up to date */
inc_irq_stat(irq0_irqs);
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
* Subtle, when I/O APICs are used we have to ack timer IRQ
* manually to deassert NMI lines for the watchdog if run
* on an 82489DX-based system.
*/
spin_lock(&i8259A_lock);
outb(0x0c, PIC_MASTER_OCW3);
/* Ack the IRQ; AEOI will end it automatically. */
inb(PIC_MASTER_POLL);
spin_unlock(&i8259A_lock);
}
#endif
do_timer_interrupt_hook();
#ifdef CONFIG_MCA
if (MCA_bus) {
/* The PS/2 uses level-triggered interrupts. You can't
turn them off, nor would you want to (any attempt to
enable edge-triggered interrupts usually gets intercepted by a
special hardware circuit). Hence we have to acknowledge
the timer interrupt. Through some incredibly stupid
design idea, the reset for IRQ 0 is done by setting the
high bit of the PPI port B (0x61). Note that some PS/2s,
notably the 55SX, work fine if this is removed. */
u8 irq_v = inb_p(0x61); /* read the current state */
outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */
}
#endif
return IRQ_HANDLED;
}
/* Duplicate of time_init() below, with hpet_enable part added */
void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
x86_quirk_time_init();
}
/*
* This is called directly from init code; we must delay timer setup in the
* HPET case as we can't make the decision to turn on HPET this early in the
* boot process.
*
* The chosen time_init function will usually be hpet_time_init, above, but
* in the case of virtual hardware, an alternative function may be substituted.
*/
void __init time_init(void)
{
x86_quirk_pre_time_init();
tsc_init();
late_time_init = choose_time_init();
}

View File

@@ -1,135 +0,0 @@
/*
* "High Precision Event Timer" based timekeeping.
*
* Copyright (c) 1991,1992,1995 Linus Torvalds
* Copyright (c) 1994 Alan Modra
* Copyright (c) 1995 Markus Kuhn
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
* Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
* RTC support code taken from arch/i386/kernel/timers/time_hpet.c
*/
#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/time.h>
#include <linux/mca.h>
#include <linux/nmi.h>
#include <asm/i8253.h>
#include <asm/hpet.h>
#include <asm/vgtod.h>
#include <asm/time.h>
#include <asm/timer.h>
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
/* Assume the lock function has either no stack frame or a copy
of flags from PUSHF
Eflags always has bits 22 and up cleared unlike kernel addresses. */
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
unsigned long *sp = (unsigned long *)regs->sp;
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
#endif
}
return pc;
}
EXPORT_SYMBOL(profile_pc);
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
inc_irq_stat(irq0_irqs);
global_clock_event->event_handler(global_clock_event);
#ifdef CONFIG_MCA
if (MCA_bus) {
u8 irq_v = inb_p(0x61); /* read the current state */
outb_p(irq_v|0x80, 0x61); /* reset the IRQ */
}
#endif
return IRQ_HANDLED;
}
/* calibrate_cpu is used on systems with fixed rate TSCs to determine
* processor frequency */
#define TICK_COUNT 100000000
unsigned long __init calibrate_cpu(void)
{
int tsc_start, tsc_now;
int i, no_ctr_free;
unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
unsigned long flags;
for (i = 0; i < 4; i++)
if (avail_to_resrv_perfctr_nmi_bit(i))
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
"cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
rdmsrl(MSR_K7_PERFCTR3, pmc3);
} else {
reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
local_irq_save(flags);
/* start measuring cycles, incrementing from 0 */
wrmsrl(MSR_K7_PERFCTR0 + i, 0);
wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
rdtscl(tsc_start);
do {
rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
tsc_now = get_cycles();
} while ((tsc_now - tsc_start) < TICK_COUNT);
local_irq_restore(flags);
if (no_ctr_free) {
wrmsrl(MSR_K7_EVNTSEL3, 0);
wrmsrl(MSR_K7_PERFCTR3, pmc3);
wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
} else {
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
return pmc_now * tsc_khz / (tsc_now - tsc_start);
}
static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
.name = "timer"
};
void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
setup_irq(0, &irq0);
}
void __init time_init(void)
{
tsc_init();
late_time_init = choose_time_init();
}

View File

@@ -3,8 +3,16 @@
#include <asm/trampoline.h>
#include <asm/e820.h>
#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
#define __trampinit
#define __trampinitdata
#else
#define __trampinit __cpuinit
#define __trampinitdata __cpuinitdata
#endif
/* ready for x86_64 and x86 */
unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
void __init reserve_trampoline_memory(void)
{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
unsigned long setup_trampoline(void)
unsigned long __trampinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);

View File

@@ -28,16 +28,12 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/segment.h>
#include <asm/page_types.h>
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
.section ".cpuinit.data","aw",@progbits
#else
.section .rodata,"a",@progbits
#endif
__CPUINITRODATA
.code16
ENTRY(trampoline_data)

View File

@@ -25,14 +25,19 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
#ifdef CONFIG_ACPI_SLEEP
.section .rodata, "a", @progbits
#else
/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
__CPUINITRODATA
#endif
.code16
ENTRY(trampoline_data)

View File

@@ -14,7 +14,6 @@
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -59,12 +58,12 @@
#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
#include <asm/traps.h>
asmlinkage int system_call(void);
@@ -73,11 +72,9 @@ char ignore_fpu_irq;
/*
* The IDT has to be page-aligned to simplify the Pentium
* F0 0F bug workaround.. We have a special link segment
* for this.
* F0 0F bug workaround.
*/
gate_desc idt_table[NR_VECTORS]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
@@ -951,7 +948,5 @@ void __init trap_init(void)
*/
cpu_init();
#ifdef CONFIG_X86_32
x86_quirk_trap_init();
#endif
x86_init.irqs.trap_init();
}

View File

@@ -17,6 +17,8 @@
#include <asm/time.h>
#include <asm/delay.h>
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -400,15 +402,9 @@ unsigned long native_calibrate_tsc(void)
{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
unsigned long flags, latch, ms, fast_calibrate;
int hpet = is_hpet_enabled(), i, loopmin;
hv_tsc_khz = get_hypervisor_tsc_freq();
if (hv_tsc_khz) {
printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
return hv_tsc_khz;
}
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
@@ -566,7 +562,7 @@ int recalibrate_cpu_khz(void)
unsigned long cpu_khz_old = cpu_khz;
if (cpu_has_tsc) {
tsc_khz = calibrate_tsc();
tsc_khz = x86_platform.calibrate_tsc();
cpu_khz = tsc_khz;
cpu_data(0).loops_per_jiffy =
cpufreq_scale(cpu_data(0).loops_per_jiffy,
@@ -670,7 +666,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
(val == CPUFREQ_RESUMECHANGE)) {
*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -744,10 +740,16 @@ static cycle_t __vsyscall_fn vread_tsc(void)
}
#endif
static void resume_tsc(void)
{
clocksource_tsc.cycle_last = 0;
}
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.resume = resume_tsc,
.mask = CLOCKSOURCE_MASK(64),
.shift = 22,
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
@@ -761,12 +763,14 @@ void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
printk("Marking TSC unstable due to %s\n", reason);
printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */
if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0);
else
clocksource_mark_unstable(&clocksource_tsc);
else {
clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
clocksource_tsc.rating = 0;
}
}
}
@@ -852,15 +856,71 @@ static void __init init_tsc_clocksource(void)
clocksource_register(&clocksource_tsc);
}
#ifdef CONFIG_X86_64
/*
* calibrate_cpu is used on systems with fixed rate TSCs to determine
* processor frequency
*/
#define TICK_COUNT 100000000
static unsigned long __init calibrate_cpu(void)
{
int tsc_start, tsc_now;
int i, no_ctr_free;
unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
unsigned long flags;
for (i = 0; i < 4; i++)
if (avail_to_resrv_perfctr_nmi_bit(i))
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
"cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
rdmsrl(MSR_K7_PERFCTR3, pmc3);
} else {
reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
local_irq_save(flags);
/* start measuring cycles, incrementing from 0 */
wrmsrl(MSR_K7_PERFCTR0 + i, 0);
wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
rdtscl(tsc_start);
do {
rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
tsc_now = get_cycles();
} while ((tsc_now - tsc_start) < TICK_COUNT);
local_irq_restore(flags);
if (no_ctr_free) {
wrmsrl(MSR_K7_EVNTSEL3, 0);
wrmsrl(MSR_K7_PERFCTR3, pmc3);
wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
} else {
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
return pmc_now * tsc_khz / (tsc_now - tsc_start);
}
#else
static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
#endif
void __init tsc_init(void)
{
u64 lpj;
int cpu;
x86_init.timers.tsc_pre_init();
if (!cpu_has_tsc)
return;
tsc_khz = calibrate_tsc();
tsc_khz = x86_platform.calibrate_tsc();
cpu_khz = tsc_khz;
if (!tsc_khz) {
@@ -868,11 +928,9 @@ void __init tsc_init(void)
return;
}
#ifdef CONFIG_X86_64
if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
cpu_khz = calibrate_cpu();
#endif
printk("Detected %lu.%03lu MHz processor.\n",
(unsigned long)cpu_khz / 1000,

View File

@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
return;
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
pr_info("Skipping synchronization checks as TSC is reliable.\n");
printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
return;
}

View File

@@ -30,6 +30,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/io.h>
#include <linux/kernel_stat.h>
@@ -53,7 +54,7 @@ int is_visws_box(void)
return visws_board_type >= 0;
}
static int __init visws_time_init(void)
static void __init visws_time_init(void)
{
printk(KERN_INFO "Starting Cobalt Timer system clock\n");
@@ -66,21 +67,13 @@ static int __init visws_time_init(void)
/* Enable (unmask) the timer interrupt */
co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
/*
* Zero return means the generic timer setup code will set up
* the standard vector:
*/
return 0;
setup_default_timer_irq();
}
static int __init visws_pre_intr_init(void)
/* Replaces the default init_ISA_irqs in the generic setup */
static void __init visws_pre_intr_init(void)
{
init_VISWS_APIC_irqs();
/*
* We dont want ISA irqs to be set up by the generic code:
*/
return 1;
}
/* Quirk for machine specific memory setup. */
@@ -156,12 +149,8 @@ static void visws_machine_power_off(void)
outl(PIIX_SPECIAL_STOP, 0xCFC);
}
static int __init visws_get_smp_config(unsigned int early)
static void __init visws_get_smp_config(unsigned int early)
{
/*
* Prevent MP-table parsing by the generic code:
*/
return 1;
}
/*
@@ -208,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
apic_version[m->apicid] = ver;
}
static int __init visws_find_smp_config(unsigned int reserve)
static void __init visws_find_smp_config(unsigned int reserve)
{
struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -230,21 +219,9 @@ static int __init visws_find_smp_config(unsigned int reserve)
MP_processor_info(mp++);
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
return 1;
}
static int visws_trap_init(void);
static struct x86_quirks visws_x86_quirks __initdata = {
.arch_time_init = visws_time_init,
.arch_pre_intr_init = visws_pre_intr_init,
.arch_memory_setup = visws_memory_setup,
.arch_intr_init = NULL,
.arch_trap_init = visws_trap_init,
.mach_get_smp_config = visws_get_smp_config,
.mach_find_smp_config = visws_find_smp_config,
};
static void visws_trap_init(void);
void __init visws_early_detect(void)
{
@@ -257,11 +234,14 @@ void __init visws_early_detect(void)
return;
/*
* Install special quirks for timer, interrupt and memory setup:
* Fall back to generic behavior for traps:
* Override generic MP-table parsing:
* Override the default platform setup functions
*/
x86_quirks = &visws_x86_quirks;
x86_init.resources.memory_setup = visws_memory_setup;
x86_init.mpparse.get_smp_config = visws_get_smp_config;
x86_init.mpparse.find_smp_config = visws_find_smp_config;
x86_init.irqs.pre_vector_init = visws_pre_intr_init;
x86_init.irqs.trap_init = visws_trap_init;
x86_init.timers.timer_init = visws_time_init;
/*
* Install reboot quirks:
@@ -400,12 +380,10 @@ static __init void cobalt_init(void)
co_apic_read(CO_APIC_ID));
}
static int __init visws_trap_init(void)
static void __init visws_trap_init(void)
{
lithium_init();
cobalt_init();
return 1;
}
/*

View File

@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
pv_info.paravirt_enabled = 1;
pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
pv_info.name = "vmi";
pv_info.name = "vmi [deprecated]";
pv_init_ops.patch = vmi_patch;
@@ -817,15 +817,15 @@ static inline int __init activate_vmi(void)
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
vmi_timer_ops.cancel_alarm =
vmi_get_function(VMI_CALL_CancelAlarm);
pv_time_ops.time_init = vmi_time_init;
pv_time_ops.get_wallclock = vmi_get_wallclock;
pv_time_ops.set_wallclock = vmi_set_wallclock;
x86_init.timers.timer_init = vmi_time_init;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init;
x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init;
#endif
pv_time_ops.sched_clock = vmi_sched_clock;
pv_time_ops.get_tsc_khz = vmi_tsc_khz;
x86_platform.calibrate_tsc = vmi_tsc_khz;
x86_platform.get_wallclock = vmi_get_wallclock;
x86_platform.set_wallclock = vmi_set_wallclock;
/* We have true wallclock functions; disable CMOS clock sync */
no_sync_cmos_clock = 1;

View File

@@ -68,7 +68,7 @@ unsigned long long vmi_sched_clock(void)
return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
}
/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
/* x86_platform.calibrate_tsc = vmi_tsc_khz */
unsigned long vmi_tsc_khz(void)
{
unsigned long long khz;

View File

@@ -45,9 +45,9 @@ PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
#ifdef CONFIG_X86_64
user PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(7); /* RWE */
percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
init PT_LOAD FLAGS(7); /* RWE */
#endif
@@ -65,17 +65,11 @@ SECTIONS
#endif
/* Text and read-only data */
/* bootstrapping code */
.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
_text = .;
*(.text.head)
} :text = 0x9090
/* The rest of the text */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .;
/* bootstrapping code */
HEAD_TEXT
#ifdef CONFIG_X86_32
/* not really needed, already page aligned */
. = ALIGN(PAGE_SIZE);
*(.text.page_aligned)
#endif
@@ -94,13 +88,7 @@ SECTIONS
NOTES :text :note
/* Exception table */
. = ALIGN(16);
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
} :text = 0x9090
EXCEPTION_TABLE(16) :text = 0x9090
RO_DATA(PAGE_SIZE)
@@ -118,7 +106,6 @@ SECTIONS
#endif
PAGE_ALIGNED_DATA(PAGE_SIZE)
*(.data.idt)
CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
@@ -135,24 +122,21 @@ SECTIONS
#ifdef CONFIG_X86_64
#define VSYSCALL_ADDR (-10*1024*1024)
#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
. = ALIGN(4096);
__vsyscall_0 = .;
. = VSYSCALL_ADDR;
.vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
.vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
*(.vsyscall_0)
} :user
__vsyscall_0 = VSYSCALL_VIRT_ADDR;
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
*(.vsyscall_3)
}
. = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
. = __vsyscall_0 + PAGE_SIZE;
#undef VSYSCALL_ADDR
#undef VSYSCALL_PHYS_ADDR
#undef VSYSCALL_VIRT_ADDR
#undef VLOAD_OFFSET
#undef VLOAD
#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
PERCPU_VADDR(0, :percpu)
#endif
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
_einittext = .;
}
INIT_TEXT_SECTION(PAGE_SIZE)
#ifdef CONFIG_X86_64
:init
#endif
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
}
. = ALIGN(16);
.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
__setup_start = .;
*(.init.setup)
__setup_end = .;
}
.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
__initcall_start = .;
INITCALLS
__initcall_end = .;
}
.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
__con_initcall_start = .;
*(.con_initcall.init)
__con_initcall_end = .;
}
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
__x86_cpu_dev_end = .;
}
SECURITY_INIT
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
__parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
EXIT_DATA
}
#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(PAGE_SIZE);
.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
__initramfs_start = .;
*(.init.ramfs)
__initramfs_end = .;
}
#endif
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU(PAGE_SIZE)
#endif
@@ -348,21 +295,18 @@ SECTIONS
_end = .;
}
/* Sections to be discarded */
/DISCARD/ : {
*(.exitcall.exit)
*(.eh_frame)
*(.discard)
}
STABS_DEBUG
DWARF_DEBUG
/* Sections to be discarded */
DISCARDS
/DISCARD/ : { *(.eh_frame) }
}
#ifdef CONFIG_X86_32
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
#else
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
@@ -375,12 +319,12 @@ INIT_PER_CPU(irq_stack_union);
/*
* Build-time check on the image size:
*/
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
. = ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
@@ -388,7 +332,6 @@ INIT_PER_CPU(irq_stack_union);
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big");
ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big");
#endif

View File

@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -227,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
}
#ifdef CONFIG_SYSCTL
static int
vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
}
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = vsyscall_sysctl_change },
.proc_handler = proc_dointvec },
{}
};

View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/init.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/tsc.h>
void __cpuinit x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
void __init x86_init_pgd_noop(pgd_t *unused) { }
/*
* The platform setup functions are preset with the default functions
* for standard PC hardware.
*/
struct x86_init_ops x86_init __initdata = {
.resources = {
.probe_roms = x86_init_noop,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = default_machine_specific_memory_setup,
},
.mpparse = {
.mpc_record = x86_init_uint_noop,
.setup_ioapic_ids = x86_init_noop,
.mpc_apic_id = default_mpc_apic_id,
.smp_read_mpc_oem = default_smp_read_mpc_oem,
.mpc_oem_bus_info = default_mpc_oem_bus_info,
.find_smp_config = default_find_smp_config,
.get_smp_config = default_get_smp_config,
},
.irqs = {
.pre_vector_init = init_ISA_irqs,
.intr_init = native_init_IRQ,
.trap_init = x86_init_noop,
},
.oem = {
.arch_setup = x86_init_noop,
.banner = default_banner,
},
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
},
.timers = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
},
};
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
.setup_percpu_clockev = setup_secondary_APIC_clock,
};
struct x86_platform_ops x86_platform = {
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
};