Merge branch 'linus' into tracing/hw-breakpoints

Conflicts:
	arch/x86/kernel/process_64.c

Semantic conflict fixed in:
	arch/x86/kvm/x86.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-09-15 12:18:15 +02:00
3318 changed files with 281605 additions and 124914 deletions

View File

@@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
extern int es7000_plat;
#endif
static struct {
int gsi_base;
int gsi_end;
} mp_ioapic_routing[MAX_IO_APICS];
int mp_find_ioapic(int gsi)
{
int i = 0;
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
if ((gsi >= mp_ioapic_routing[i].gsi_base)
&& (gsi <= mp_ioapic_routing[i].gsi_end))
return i;
}
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
return -1;
}
int mp_find_ioapic_pin(int ioapic, int gsi)
{
if (WARN_ON(ioapic == -1))
return -1;
if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
return -1;
return gsi - mp_ioapic_routing[ioapic].gsi_base;
}
static u8 __init uniq_ioapic_id(u8 id)
{
#ifdef CONFIG_X86_32
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
!APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
return io_apic_get_unique_id(nr_ioapics, id);
else
return id;
#else
int i;
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
for (i = 0; i < nr_ioapics; i++) {
struct mpc_ioapic *ia = &mp_ioapics[i];
__set_bit(ia->apicid, used);
}
if (!test_bit(id, used))
return id;
return find_first_zero_bit(used, 256);
#endif
}
static int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
"(found %d)\n", MAX_IO_APICS, nr_ioapics);
panic("Recompile kernel with bigger MAX_IO_APICS!\n");
}
if (!address) {
printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
" found in table, skipping!\n");
return 1;
}
return 0;
}
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
int idx = 0;
if (bad_ioapic(address))
return;
idx = nr_ioapics;
mp_ioapics[idx].type = MP_IOAPIC;
mp_ioapics[idx].flags = MPC_APIC_USABLE;
mp_ioapics[idx].apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].apicid = uniq_ioapic_id(id);
mp_ioapics[idx].apicver = io_apic_get_version(idx);
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
nr_ioapics++;
}
int __init acpi_probe_gsi(void)
{
int idx;
@@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void)
max_gsi = 0;
for (idx = 0; idx < nr_ioapics; idx++) {
gsi = mp_ioapic_routing[idx].gsi_end;
gsi = mp_gsi_routing[idx].gsi_end;
if (gsi > max_gsi)
max_gsi = gsi;
@@ -1179,9 +1079,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
* If MPS is present, it will handle them,
* otherwise the system will stay in PIC mode
*/
if (acpi_disabled || acpi_noirq) {
if (acpi_disabled || acpi_noirq)
return -ENODEV;
}
if (!cpu_has_apic)
return -ENODEV;

View File

@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/stringify.h>
#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly);
#define smp_alt_once 1
#endif
static int debug_alternative;
static int __initdata_or_module debug_alternative;
static int __init debug_alt(char *str)
{
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str)
__setup("noreplace-smp", setup_noreplace_smp);
#ifdef CONFIG_PARAVIRT
static int noreplace_paravirt = 0;
static int __initdata_or_module noreplace_paravirt = 0;
static int __init setup_noreplace_paravirt(char *str)
{
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
#define DPRINTK(fmt, args...) if (debug_alternative) \
printk(KERN_DEBUG fmt, args)
#ifdef GENERIC_NOP1
#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.section .rodata, \"a\"\nintelnops: "
asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8
"\t.previous");
extern const unsigned char intelnops[];
static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
static const unsigned char *const __initconst_or_module
intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
intelnops + 1,
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
#endif
#ifdef K8_NOP1
asm("\t.section .rodata, \"a\"\nk8nops: "
asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8
"\t.previous");
extern const unsigned char k8nops[];
static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
static const unsigned char *const __initconst_or_module
k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
k8nops + 1,
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
};
#endif
#ifdef K7_NOP1
asm("\t.section .rodata, \"a\"\nk7nops: "
#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8
"\t.previous");
extern const unsigned char k7nops[];
static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
static const unsigned char *const __initconst_or_module
k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
k7nops + 1,
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
#endif
#ifdef P6_NOP1
asm("\t.section .rodata, \"a\"\np6nops: "
asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
P6_NOP7 P6_NOP8
"\t.previous");
extern const unsigned char p6nops[];
static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
static const unsigned char *const __initconst_or_module
p6_nops[ASM_NOP_MAX+1] = {
NULL,
p6nops,
p6nops + 1,
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
const unsigned char *const *find_nop_table(void)
static const unsigned char *const *__init_or_module find_nop_table(void)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_has(X86_FEATURE_NOPL))
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void)
#else /* CONFIG_X86_64 */
const unsigned char *const *find_nop_table(void)
static const unsigned char *const *__init_or_module find_nop_table(void)
{
if (boot_cpu_has(X86_FEATURE_K8))
return k8_nops;
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void)
#endif /* CONFIG_X86_64 */
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
void add_nops(void *insns, unsigned int len)
static void __init_or_module add_nops(void *insns, unsigned int len)
{
const unsigned char *const *noptable = find_nop_table();
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len)
len -= noplen;
}
}
EXPORT_SYMBOL_GPL(add_nops);
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
static void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
APs have less capabilities than the boot processor are not handled.
Tough. Make sure you disable such features by hand. */
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
struct alt_instr *a;
char insnbuf[MAX_PATCH_LEN];
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules);
static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
void __init_or_module alternatives_smp_module_add(struct module *mod,
char *name,
void *locks, void *locks_end,
void *text, void *text_end)
{
struct smp_alt_module *smp;
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name,
mutex_unlock(&smp_alt);
}
void alternatives_smp_module_del(struct module *mod)
void __init_or_module alternatives_smp_module_del(struct module *mod)
{
struct smp_alt_module *item;
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp)
#endif
#ifdef CONFIG_PARAVIRT
void apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
{
struct paravirt_patch_site *p;
char insnbuf[MAX_PATCH_LEN];
@@ -485,13 +492,14 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
void *text_poke_early(void *addr, const void *opcode, size_t len)
static void *__init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
{
unsigned long flags;
local_irq_save(flags);
memcpy(addr, opcode, len);
local_irq_restore(flags);
sync_core();
local_irq_restore(flags);
/* Could also do a CLFLUSH here to speed up CPU recovery; but
that causes hangs on some VIA CPUs. */
return addr;

View File

@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);
#ifdef CONFIG_IOMMU_API
/*
* Domain for untranslated devices - only allocated
* if iommu=pt passed on kernel cmd line.
*/
static struct protection_domain *pt_domain;
static struct iommu_ops amd_iommu_ops;
#endif
/*
* general struct to manage commands send to an IOMMU
@@ -55,16 +59,16 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
static u64* alloc_pte(struct protection_domain *dom,
unsigned long address, u64
**pte_page, gfp_t gfp);
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address, int end_lvl,
u64 **pte_page, gfp_t gfp);
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages);
#ifndef BUS_NOTIFY_UNBOUND_DRIVER
#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
#endif
static void reset_iommu_command_buffer(struct amd_iommu *iommu);
static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address, int map_size);
static void update_domain(struct protection_domain *domain);
#ifdef CONFIG_AMD_IOMMU_STATS
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
*
****************************************************************************/
static void iommu_print_event(void *__evt)
static void dump_dte_entry(u16 devid)
{
int i;
for (i = 0; i < 8; ++i)
pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
amd_iommu_dev_table[devid].data[i]);
}
static void dump_command(unsigned long phys_addr)
{
struct iommu_cmd *cmd = phys_to_virt(phys_addr);
int i;
for (i = 0; i < 4; ++i)
pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
u32 *event = __evt;
int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt)
int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
u64 address = (u64)(((u64)event[3]) << 32) | event[2];
printk(KERN_ERR "AMD IOMMU: Event logged [");
printk(KERN_ERR "AMD-Vi: Event logged [");
switch (type) {
case EVENT_TYPE_ILL_DEV:
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt)
"address=0x%016llx flags=0x%04x]\n",
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
address, flags);
dump_dte_entry(devid);
break;
case EVENT_TYPE_IO_FAULT:
printk("IO_PAGE_FAULT device=%02x:%02x.%x "
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt)
break;
case EVENT_TYPE_ILL_CMD:
printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
reset_iommu_command_buffer(iommu);
dump_command(address);
break;
case EVENT_TYPE_CMD_HARD_ERR:
printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
while (head != tail) {
iommu_print_event(iommu->evt_buf + head);
iommu_print_event(iommu, iommu->evt_buf + head);
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
}
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
if (unlikely(i == EXIT_LOOP_COUNT))
panic("AMD IOMMU: Completion wait loop failed\n");
if (unlikely(i == EXIT_LOOP_COUNT)) {
spin_unlock(&iommu->lock);
reset_iommu_command_buffer(iommu);
spin_lock(&iommu->lock);
}
}
/*
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
}
/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
* This function flushes one domain on one IOMMU
*/
static void iommu_flush_domain(u16 domid)
static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
{
unsigned long flags;
struct amd_iommu *iommu;
struct iommu_cmd cmd;
INC_STATS_COUNTER(domain_flush_all);
unsigned long flags;
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
domid, 1, 1);
for_each_iommu(iommu) {
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
__iommu_wait_for_completion(iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
}
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
__iommu_wait_for_completion(iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
}
void amd_iommu_flush_all_domains(void)
static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
{
int i;
for (i = 1; i < MAX_DOMAIN_ID; ++i) {
if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
continue;
iommu_flush_domain(i);
flush_domain_on_iommu(iommu, i);
}
}
/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
*/
static void iommu_flush_domain(u16 domid)
{
struct amd_iommu *iommu;
INC_STATS_COUNTER(domain_flush_all);
for_each_iommu(iommu)
flush_domain_on_iommu(iommu, domid);
}
void amd_iommu_flush_all_domains(void)
{
struct amd_iommu *iommu;
for_each_iommu(iommu)
flush_all_domains_on_iommu(iommu);
}
static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
{
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if (iommu != amd_iommu_rlookup_table[i])
continue;
iommu_queue_inv_dev_entry(iommu, i);
iommu_completion_wait(iommu);
}
}
void amd_iommu_flush_all_devices(void)
static void flush_devices_by_domain(struct protection_domain *domain)
{
struct amd_iommu *iommu;
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if (amd_iommu_pd_table[i] == NULL)
if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
(amd_iommu_pd_table[i] != domain))
continue;
iommu = amd_iommu_rlookup_table[i];
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void)
}
}
static void reset_iommu_command_buffer(struct amd_iommu *iommu)
{
pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
if (iommu->reset_in_progress)
panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
iommu->reset_in_progress = true;
amd_iommu_reset_cmd_buffer(iommu);
flush_all_devices_for_iommu(iommu);
flush_all_domains_on_iommu(iommu);
iommu->reset_in_progress = false;
}
void amd_iommu_flush_all_devices(void)
{
flush_devices_by_domain(NULL);
}
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void)
static int iommu_map_page(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
int prot,
int map_size)
{
u64 __pte, *pte;
bus_addr = PAGE_ALIGN(bus_addr);
phys_addr = PAGE_ALIGN(phys_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
BUG_ON(!PM_ALIGNED(map_size, bus_addr));
BUG_ON(!PM_ALIGNED(map_size, phys_addr));
if (!(prot & IOMMU_PROT_MASK))
return -EINVAL;
pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
if (IOMMU_PTE_PRESENT(*pte))
return -EBUSY;
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom,
*pte = __pte;
update_domain(dom);
return 0;
}
static void iommu_unmap_page(struct protection_domain *dom,
unsigned long bus_addr)
unsigned long bus_addr, int map_size)
{
u64 *pte;
u64 *pte = fetch_pte(dom, bus_addr, map_size);
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
*pte = 0;
if (pte)
*pte = 0;
}
/*
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
PM_MAP_4k);
if (ret)
return ret;
/*
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
* This function checks if there is a PTE for a given dma address. If
* there is one, it returns the pointer to it.
*/
static u64* fetch_pte(struct protection_domain *domain,
unsigned long address)
static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address, int map_size)
{
int level;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
if (!IOMMU_PTE_PRESENT(*pte))
return NULL;
while (level > map_size) {
if (!IOMMU_PTE_PRESENT(*pte))
return NULL;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
level -= 1;
if (!IOMMU_PTE_PRESENT(*pte))
return NULL;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[PM_LEVEL_INDEX(level, address)];
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
pte = NULL;
break;
}
}
return pte;
}
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
u64 *pte, *pte_page;
for (i = 0; i < num_ptes; ++i) {
pte = alloc_pte(&dma_dom->domain, address,
pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
&pte_page, gfp);
if (!pte)
goto out_free;
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu,
for (i = dma_dom->aperture[index]->offset;
i < dma_dom->aperture_size;
i += PAGE_SIZE) {
u64 *pte = fetch_pte(&dma_dom->domain, i);
u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
continue;
dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
}
update_domain(&dma_dom->domain);
return 0;
out_free:
update_domain(&dma_dom->domain);
free_page((unsigned long)dma_dom->aperture[index]->bitmap);
kfree(dma_dom->aperture[index]);
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
dma_dom->domain.id = domain_id_alloc();
if (dma_dom->domain.id == 0)
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
dma_dom->domain.priv = dma_dom;
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid)
return dom;
}
static void set_dte_entry(u16 devid, struct protection_domain *domain)
{
u64 pte_root = virt_to_phys(domain->pt_root);
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
amd_iommu_pd_table[devid] = domain;
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void __attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
/* lock domain */
spin_lock(&domain->lock);
/* update DTE entry */
set_dte_entry(devid, domain);
domain->dev_cnt += 1;
/* ready */
spin_unlock(&domain->lock);
}
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu,
u16 devid)
{
unsigned long flags;
u64 pte_root = virt_to_phys(domain->pt_root);
domain->dev_cnt += 1;
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
amd_iommu_dev_table[devid].data[2] = domain->id;
amd_iommu_pd_table[devid] = domain;
__attach_device(iommu, domain, devid);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
/*
* We might boot into a crash-kernel here. The crashed kernel
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
/*
* We might boot into a crash-kernel here. The crashed kernel
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
iommu_queue_inv_dev_entry(iommu, devid);
iommu_flush_tlb_pde(iommu, domain->id);
}
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
/* ready */
spin_unlock(&domain->lock);
/*
* If we run in passthrough mode the device must be assigned to the
* passthrough domain if it is detached from any other domain
*/
if (iommu_pass_through) {
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
__attach_device(iommu, pt_domain, devid);
}
}
/*
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb,
case BUS_NOTIFY_UNBOUND_DRIVER:
if (!domain)
goto out;
if (iommu_pass_through)
break;
detach_device(domain, devid);
break;
case BUS_NOTIFY_ADD_DEVICE:
@@ -1292,40 +1409,92 @@ static int get_device_resources(struct device *dev,
return 1;
}
static void update_device_table(struct protection_domain *domain)
{
unsigned long flags;
int i;
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
if (amd_iommu_pd_table[i] != domain)
continue;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
set_dte_entry(i, domain);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
}
static void update_domain(struct protection_domain *domain)
{
if (!domain->updated)
return;
update_device_table(domain);
flush_devices_by_domain(domain);
iommu_flush_domain(domain->id);
domain->updated = false;
}
/*
* If the pte_page is not yet allocated this function is called
* This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits.
*/
static u64* alloc_pte(struct protection_domain *dom,
unsigned long address, u64 **pte_page, gfp_t gfp)
static bool increase_address_space(struct protection_domain *domain,
gfp_t gfp)
{
u64 *pte;
if (domain->mode == PAGE_MODE_6_LEVEL)
/* address space already 64 bit large */
return false;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
return false;
*pte = PM_LEVEL_PDE(domain->mode,
virt_to_phys(domain->pt_root));
domain->pt_root = pte;
domain->mode += 1;
domain->updated = true;
return true;
}
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address,
int end_lvl,
u64 **pte_page,
gfp_t gfp)
{
u64 *pte, *page;
int level;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
while (address > PM_LEVEL_SIZE(domain->mode))
increase_address_space(domain, gfp);
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
*pte = IOMMU_L2_PDE(virt_to_phys(page));
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
while (level > end_lvl) {
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
}
level -= 1;
pte = IOMMU_PTE_PAGE(*pte);
if (pte_page && level == end_lvl)
*pte_page = pte;
pte = &pte[PM_LEVEL_INDEX(level, address)];
}
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
if (!IOMMU_PTE_PRESENT(*pte)) {
page = (u64 *)get_zeroed_page(gfp);
if (!page)
return NULL;
*pte = IOMMU_L1_PDE(virt_to_phys(page));
}
pte = IOMMU_PTE_PAGE(*pte);
if (pte_page)
*pte_page = pte;
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
return pte;
}
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
if (!pte) {
pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
GFP_ATOMIC);
aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
} else
pte += IOMMU_PTE_L0_INDEX(address);
pte += PM_LEVEL_INDEX(0, address);
update_domain(&dom->domain);
return pte;
}
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
if (!pte)
return;
pte += IOMMU_PTE_L0_INDEX(address);
pte += PM_LEVEL_INDEX(0, address);
WARN_ON(!*pte);
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain)
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static int amd_iommu_domain_init(struct iommu_domain *dom)
static void protection_domain_free(struct protection_domain *domain)
{
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
kfree(domain);
}
static struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return -ENOMEM;
return NULL;
spin_lock_init(&domain->lock);
domain->mode = PAGE_MODE_3_LEVEL;
domain->id = domain_id_alloc();
if (!domain->id)
goto out_err;
return domain;
out_err:
kfree(domain);
return NULL;
}
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
domain = protection_domain_alloc();
if (!domain)
goto out_free;
domain->mode = PAGE_MODE_3_LEVEL;
domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
if (!domain->pt_root)
goto out_free;
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
return 0;
out_free:
kfree(domain);
protection_domain_free(domain);
return -ENOMEM;
}
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
paddr &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
ret = iommu_map_page(domain, iova, paddr, prot);
ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
if (ret)
return ret;
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
iova &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
iommu_unmap_page(domain, iova);
iommu_unmap_page(domain, iova, PM_MAP_4k);
iova += PAGE_SIZE;
}
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
phys_addr_t paddr;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
pte = fetch_pte(domain, iova, PM_MAP_4k);
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
if (!pte || !IOMMU_PTE_PRESENT(*pte))
return 0;
paddr = *pte & IOMMU_PAGE_MASK;
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = {
.domain_has_cap = amd_iommu_domain_has_cap,
};
/*****************************************************************************
*
* The next functions do a basic initialization of IOMMU for pass through
* mode
*
* In passthrough mode the IOMMU is initialized and enabled but not used for
* DMA-API translation.
*
*****************************************************************************/
int __init amd_iommu_init_passthrough(void)
{
struct pci_dev *dev = NULL;
u16 devid, devid2;
/* allocate passthroug domain */
pt_domain = protection_domain_alloc();
if (!pt_domain)
return -ENOMEM;
pt_domain->mode |= PAGE_MODE_NONE;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
struct amd_iommu *iommu;
devid = calc_devid(dev->bus->number, dev->devfn);
if (devid > amd_iommu_last_bdf)
continue;
devid2 = amd_iommu_alias_table[devid];
iommu = amd_iommu_rlookup_table[devid2];
if (!iommu)
continue;
__attach_device(iommu, pt_domain, devid);
__attach_device(iommu, pt_domain, devid2);
}
pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
return 0;
}

View File

@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */
static void iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
@@ -434,6 +434,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
return cmd_buf;
}
/*
* This function resets the command buffer if the IOMMU stopped fetching
* commands from it.
*/
void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
{
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
}
/*
* This function writes the command buffer address to the hardware and
* enables it.
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
amd_iommu_reset_cmd_buffer(iommu);
}
static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
switch (*p) {
case ACPI_IVHD_TYPE:
DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
DUMP_printk("device: %02x:%02x.%01x cap: %04x "
"seg: %d flags: %01x info %04x\n",
PCI_BUS(h->devid), PCI_SLOT(h->devid),
PCI_FUNC(h->devid), h->cap_ptr,
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
IRQF_SAMPLE_RANDOM,
"AMD IOMMU",
"AMD-Vi",
NULL);
if (r) {
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void)
if (no_iommu) {
printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
return 0;
}
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void)
if (ret)
goto free;
ret = amd_iommu_init_dma_ops();
if (iommu_pass_through)
ret = amd_iommu_init_passthrough();
else
ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
enable_iommus();
printk(KERN_INFO "AMD IOMMU: device isolation ");
if (iommu_pass_through)
goto out;
printk(KERN_INFO "AMD-Vi: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
else
printk("disabled\n");
if (amd_iommu_unmap_flush)
printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
else
printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
out:
return ret;

View File

@@ -20,6 +20,7 @@
#include <linux/bitops.h>
#include <linux/ioport.h>
#include <linux/suspend.h>
#include <linux/kmemleak.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/iommu.h>
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void)
* code for safe
*/
p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
kmemleak_ignore(p);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%p,%uK)\n",

View File

@@ -49,6 +49,7 @@
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
#include <asm/kvm_para.h>
unsigned int num_processors;
@@ -1361,52 +1362,80 @@ void enable_x2apic(void)
}
#endif /* CONFIG_X86_X2APIC */
void __init enable_IR_x2apic(void)
int __init enable_IR(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
ret = dmar_table_init();
if (ret) {
pr_debug("dmar_table_init() failed with %d:\n", ret);
goto ir_failed;
}
if (!intr_remapping_supported()) {
pr_debug("intr-remapping not supported\n");
goto ir_failed;
return 0;
}
if (!x2apic_preenabled && skip_ioapic_setup) {
pr_info("Skipped enabling intr-remap because of skipping "
"io-apic setup\n");
return;
return 0;
}
if (enable_intr_remapping(x2apic_supported()))
return 0;
pr_info("Enabled Interrupt-remapping\n");
return 1;
#endif
return 0;
}
void __init enable_IR_x2apic(void)
{
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
int ret, x2apic_enabled = 0;
int dmar_table_init_ret = 0;
#ifdef CONFIG_INTR_REMAP
dmar_table_init_ret = dmar_table_init();
if (dmar_table_init_ret)
pr_debug("dmar_table_init() failed with %d:\n",
dmar_table_init_ret);
#endif
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
pr_info("Allocate ioapic_entries failed: %d\n", ret);
goto end;
pr_err("Allocate ioapic_entries failed\n");
goto out;
}
ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
goto out;
}
local_irq_save(flags);
mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
mask_IO_APIC_setup(ioapic_entries);
ret = enable_intr_remapping(x2apic_supported());
if (ret)
goto end_restore;
if (dmar_table_init_ret)
ret = 0;
else
ret = enable_IR();
pr_info("Enabled Interrupt-remapping\n");
if (!ret) {
/* IR is required if there is APIC ID > 255 even when running
* under KVM
*/
if (max_physical_apicid > 255 || !kvm_para_available())
goto nox2apic;
/*
* without IR all CPUs can be addressed by IOAPIC/MSI
* only in physical mode
*/
x2apic_force_phys();
}
x2apic_enabled = 1;
if (x2apic_supported() && !x2apic_mode) {
x2apic_mode = 1;
@@ -1414,41 +1443,25 @@ void __init enable_IR_x2apic(void)
pr_info("Enabled x2apic\n");
}
end_restore:
if (ret)
/*
* IR enabling failed
*/
nox2apic:
if (!ret) /* IR enabling failed */
restore_IO_APIC_setup(ioapic_entries);
unmask_8259A();
local_irq_restore(flags);
end:
out:
if (ioapic_entries)
free_ioapic_entries(ioapic_entries);
if (!ret)
if (x2apic_enabled)
return;
ir_failed:
if (x2apic_preenabled)
panic("x2apic enabled by bios. But IR enabling failed");
panic("x2apic: enabled by BIOS but kernel init failed.");
else if (cpu_has_x2apic)
pr_info("Not enabling x2apic,Intr-remapping\n");
#else
if (!cpu_has_x2apic)
return;
if (x2apic_preenabled)
panic("x2apic enabled prior OS handover,"
" enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
#endif
return;
pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
}
#ifdef CONFIG_X86_64
/*
* Detect and enable local APICs on non-SMP boards.
@@ -1549,8 +1562,6 @@ no_apic:
#ifdef CONFIG_X86_64
void __init early_init_lapic_mapping(void)
{
unsigned long phys_addr;
/*
* If no local APIC can be found then go out
* : it means there is no mpatable and MADT
@@ -1558,11 +1569,9 @@ void __init early_init_lapic_mapping(void)
if (!smp_found_config)
return;
phys_addr = mp_lapic_addr;
set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
APIC_BASE, phys_addr);
APIC_BASE, mp_lapic_addr);
/*
* Fetch the APIC ID of the BSP in case we have a
@@ -1651,7 +1660,6 @@ int __init APIC_init_uniprocessor(void)
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
#endif

View File

@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void)
{
/* MPENTIUMIII */
if (boot_cpu_data.x86 == 6 &&
(boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
(boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
return 1;
return 0;

View File

@@ -66,6 +66,8 @@
#include <asm/apic.h>
#define __apicdebuginit(type) static type __init
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
/*
* Is the SiS APIC rmw bug present ?
@@ -85,6 +87,9 @@ int nr_ioapic_registers[MAX_IO_APICS];
struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
/* MP IRQ source entries */
struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
@@ -116,15 +121,6 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
struct irq_pin_list;
/*
* This is performance-critical, we want to do it O(1)
*
* the indexing order of this array favors 1:1 mappings
* between pins and IRQs.
*/
struct irq_pin_list {
int apic, pin;
struct irq_pin_list *next;
@@ -139,6 +135,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}
/*
* This is performance-critical, we want to do it O(1)
*
* Most irqs are mapped 1:1 with pins.
*/
struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
@@ -414,13 +415,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
entry = cfg->irq_2_pin;
for (;;) {
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
int pin;
if (!entry)
break;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
@@ -428,9 +426,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
if (!entry->next)
break;
entry = entry->next;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -498,72 +493,68 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
static int
add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list *entry;
struct irq_pin_list **last, *entry;
entry = cfg->irq_2_pin;
if (!entry) {
entry = get_one_free_irq_2_pin(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
apic, pin);
return;
}
cfg->irq_2_pin = entry;
entry->apic = apic;
entry->pin = pin;
return;
}
while (entry->next) {
/* not again, please */
/* don't allow duplicates */
last = &cfg->irq_2_pin;
for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
return;
entry = entry->next;
return 0;
last = &entry->next;
}
entry->next = get_one_free_irq_2_pin(node);
entry = entry->next;
entry = get_one_free_irq_2_pin(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
node, apic, pin);
return -ENOMEM;
}
entry->apic = apic;
entry->pin = pin;
*last = entry;
return 0;
}
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
/*
* Reroute an IRQ to a different pin.
*/
static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
int oldapic, int oldpin,
int newapic, int newpin)
int oldapic, int oldpin,
int newapic, int newpin)
{
struct irq_pin_list *entry = cfg->irq_2_pin;
int replaced = 0;
struct irq_pin_list *entry;
while (entry) {
for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
replaced = 1;
/* every one is different, right? */
break;
return;
}
entry = entry->next;
}
/* why? call replace before add? */
if (!replaced)
add_pin_to_irq_node(cfg, node, newapic, newpin);
/* old apic/pin didn't exist, so just add new ones */
add_pin_to_irq_node(cfg, node, newapic, newpin);
}
static inline void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
static void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
int pin;
struct irq_pin_list *entry;
for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -580,7 +571,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
#ifdef CONFIG_X86_64
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -596,11 +586,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
#else /* CONFIG_X86_32 */
static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
}
static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
@@ -613,7 +598,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
#endif /* CONFIG_X86_32 */
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
@@ -1702,12 +1686,8 @@ __apicdebuginit(void) print_IO_APIC(void)
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
for (;;) {
for_each_irq_pin(entry, cfg->irq_2_pin)
printk("-> %d:%d", entry->apic, entry->pin);
if (!entry->next)
break;
entry = entry->next;
}
printk("\n");
}
@@ -2211,7 +2191,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
return was_pending;
}
#ifdef CONFIG_X86_64
static int ioapic_retrigger_irq(unsigned int irq)
{
@@ -2224,14 +2203,6 @@ static int ioapic_retrigger_irq(unsigned int irq)
return 1;
}
#else
static int ioapic_retrigger_irq(unsigned int irq)
{
apic->send_IPI_self(irq_cfg(irq)->vector);
return 1;
}
#endif
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -2269,13 +2240,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
struct irq_pin_list *entry;
u8 vector = cfg->vector;
entry = cfg->irq_2_pin;
for (;;) {
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
if (!entry)
break;
apic = entry->apic;
pin = entry->pin;
/*
@@ -2288,9 +2255,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
io_apic_modify(apic, 0x10 + pin*2, reg);
if (!entry->next)
break;
entry = entry->next;
}
}
@@ -2515,11 +2479,8 @@ atomic_t irq_mis_count;
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
#ifdef CONFIG_X86_32
unsigned long v;
int i;
#endif
struct irq_cfg *cfg;
int do_unmask_irq = 0;
@@ -2532,31 +2493,28 @@ static void ack_apic_level(unsigned int irq)
}
#endif
#ifdef CONFIG_X86_32
/*
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
* chipsets). Under certain conditions a level-triggered interrupt is
* erroneously delivered as edge-triggered one but the respective IRR
* bit gets set nevertheless. As a result the I/O unit expects an EOI
* message but it will never arrive and further interrupts are blocked
* from the source. The exact reason is so far unknown, but the
* phenomenon was observed when two consecutive interrupt requests
* from a given source get delivered to the same CPU and the source is
* temporarily disabled in between.
*
* A workaround is to simulate an EOI message manually. We achieve it
* by setting the trigger mode to edge and then to level when the edge
* trigger mode gets detected in the TMR of a local APIC for a
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
* chipsets). Under certain conditions a level-triggered interrupt is
* erroneously delivered as edge-triggered one but the respective IRR
* bit gets set nevertheless. As a result the I/O unit expects an EOI
* message but it will never arrive and further interrupts are blocked
* from the source. The exact reason is so far unknown, but the
* phenomenon was observed when two consecutive interrupt requests
* from a given source get delivered to the same CPU and the source is
* temporarily disabled in between.
*
* A workaround is to simulate an EOI message manually. We achieve it
* by setting the trigger mode to edge and then to level when the edge
* trigger mode gets detected in the TMR of a local APIC for a
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
cfg = desc->chip_data;
i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
#endif
/*
* We must acknowledge the irq before we move it or the acknowledge will
@@ -2598,7 +2556,7 @@ static void ack_apic_level(unsigned int irq)
unmask_IO_APIC_irq_desc(desc);
}
#ifdef CONFIG_X86_32
/* Tail end of version 0x11 I/O APIC bug workaround */
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
@@ -2606,26 +2564,15 @@ static void ack_apic_level(unsigned int irq)
__unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
#endif
}
#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
int apic, pin;
struct irq_pin_list *entry;
entry = cfg->irq_2_pin;
for (;;) {
if (!entry)
break;
apic = entry->apic;
pin = entry->pin;
io_apic_eoi(apic, pin);
entry = entry->next;
}
for_each_irq_pin(entry, cfg->irq_2_pin)
io_apic_eoi(entry->apic, entry->pin);
}
static void
@@ -3241,8 +3188,7 @@ void destroy_irq(unsigned int irq)
cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
/* connect back irq_cfg */
if (desc)
desc->chip_data = cfg;
desc->chip_data = cfg;
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
@@ -3912,7 +3858,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
*/
if (irq >= NR_IRQS_LEGACY) {
cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, ioapic, pin);
if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
printk(KERN_INFO "can not add pin %d for irq %d\n",
pin, irq);
return 0;
}
}
setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -3941,11 +3891,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq,
return __io_apic_set_pci_routing(dev, irq, irq_attr);
}
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
u8 __init io_apic_unique_id(u8 id)
{
#ifdef CONFIG_X86_32
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
!APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
return io_apic_get_unique_id(nr_ioapics, id);
else
return id;
#else
int i;
DECLARE_BITMAP(used, 256);
#ifdef CONFIG_ACPI
bitmap_zero(used, 256);
for (i = 0; i < nr_ioapics; i++) {
struct mpc_ioapic *ia = &mp_ioapics[i];
__set_bit(ia->apicid, used);
}
if (!test_bit(id, used))
return id;
return find_first_zero_bit(used, 256);
#endif
}
#ifdef CONFIG_X86_32
int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4054,8 +4021,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
return 0;
}
#endif /* CONFIG_ACPI */
/*
* This function currently is only a helper for the i386 smp boot process where
* we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -4109,7 +4074,7 @@ void __init setup_ioapic_dest(void)
static struct resource *ioapic_resources;
static struct resource * __init ioapic_setup_resources(void)
static struct resource * __init ioapic_setup_resources(int nr_ioapics)
{
unsigned long n;
struct resource *res;
@@ -4125,15 +4090,13 @@ static struct resource * __init ioapic_setup_resources(void)
mem = alloc_bootmem(n);
res = (void *)mem;
if (mem != NULL) {
mem += sizeof(struct resource) * nr_ioapics;
mem += sizeof(struct resource) * nr_ioapics;
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
sprintf(mem, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
sprintf(mem, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
ioapic_resources = res;
@@ -4147,7 +4110,7 @@ void __init ioapic_init_mappings(void)
struct resource *ioapic_res;
int i;
ioapic_res = ioapic_setup_resources();
ioapic_res = ioapic_setup_resources(nr_ioapics);
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4176,11 +4139,9 @@ fake_ioapic_page:
__fix_to_virt(idx), ioapic_phys);
idx++;
if (ioapic_res != NULL) {
ioapic_res->start = ioapic_phys;
ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
ioapic_res++;
}
ioapic_res->start = ioapic_phys;
ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
ioapic_res++;
}
}
@@ -4201,3 +4162,76 @@ void __init ioapic_insert_resources(void)
r++;
}
}
int mp_find_ioapic(int gsi)
{
int i = 0;
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
if ((gsi >= mp_gsi_routing[i].gsi_base)
&& (gsi <= mp_gsi_routing[i].gsi_end))
return i;
}
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
return -1;
}
int mp_find_ioapic_pin(int ioapic, int gsi)
{
if (WARN_ON(ioapic == -1))
return -1;
if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
return -1;
return gsi - mp_gsi_routing[ioapic].gsi_base;
}
static int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
"(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
return 1;
}
if (!address) {
printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
" found in table, skipping!\n");
return 1;
}
return 0;
}
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
int idx = 0;
if (bad_ioapic(address))
return;
idx = nr_ioapics;
mp_ioapics[idx].type = MP_IOAPIC;
mp_ioapics[idx].flags = MPC_APIC_USABLE;
mp_ioapics[idx].apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].apicid = io_apic_unique_id(id);
mp_ioapics[idx].apicver = io_apic_get_version(idx);
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
mp_gsi_routing[idx].gsi_base = gsi_base;
mp_gsi_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
nr_ioapics++;
}

View File

@@ -153,7 +153,7 @@ int safe_smp_processor_id(void)
{
int apicid, cpuid;
if (!boot_cpu_has(X86_FEATURE_APIC))
if (!cpu_has_apic)
return 0;
apicid = hard_smp_processor_id();

View File

@@ -39,7 +39,7 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
static cpumask_var_t backtrace_mask;
static cpumask_t backtrace_mask __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void)
if (!prev_nmi_count)
goto error;
alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
if (cpumask_test_cpu(cpu, &backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
dump_stack();
spin_unlock(&lock);
cpumask_clear_cpu(cpu, backtrace_mask);
cpumask_clear_cpu(cpu, &backtrace_mask);
rc = 1;
}
/* Could check oops_in_progress here too, but it's safer not to */
@@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu)
return 0;
}
void __trigger_all_cpu_backtrace(void)
void arch_trigger_all_cpu_backtrace(void)
{
int i;
cpumask_copy(backtrace_mask, cpu_online_mask);
cpumask_copy(&backtrace_mask, cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpumask_empty(backtrace_mask))
if (cpumask_empty(&backtrace_mask))
break;
mdelay(1);
}

View File

@@ -55,11 +55,11 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
void __init default_setup_apic_routing(void)
{
#ifdef CONFIG_X86_X2APIC
if (x2apic_mode && (apic != &apic_x2apic_phys &&
if (x2apic_mode
#ifdef CONFIG_X86_UV
apic != &apic_x2apic_uv_x &&
&& apic != &apic_x2apic_uv_x
#endif
apic != &apic_x2apic_cluster)) {
) {
if (x2apic_phys)
apic = &apic_x2apic_phys;
else

View File

@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } };
/*
* Set up a segment that references the real mode segment 0x40
* that extends up to the end of page zero (that we have reserved).
* This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode.
*/
static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
(unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
static const char driver_version[] = "1.16ac"; /* no spaces */
@@ -2331,15 +2339,6 @@ static int __init apm_init(void)
}
pm_flags |= PM_APM;
/*
* Set up a segment that references the real mode segment 0x40
* that extends up to the end of page zero (that we have reserved).
* This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode.
*/
set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
/*
* Set up the long jump entry point to the APM BIOS, which is called
* from inline assembly.
@@ -2358,12 +2357,12 @@ static int __init apm_init(void)
* code to that CPU.
*/
gdt = get_cpu_gdt_table(0);
set_base(gdt[APM_CS >> 3],
__va((unsigned long)apm_info.bios.cseg << 4));
set_base(gdt[APM_CS_16 >> 3],
__va((unsigned long)apm_info.bios.cseg_16 << 4));
set_base(gdt[APM_DS >> 3],
__va((unsigned long)apm_info.bios.dseg << 4));
set_desc_base(&gdt[APM_CS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
set_desc_base(&gdt[APM_CS_16 >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
set_desc_base(&gdt[APM_DS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
proc_create("apm", 0, NULL, &apm_file_ops);

View File

@@ -2,7 +2,7 @@
#include <linux/bitops.h>
#include <linux/mm.h>
#include <asm/io.h>
#include <linux/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
#define CBAR_ENB (0x80000000)
#define CBAR_KEY (0X000000CB)
if (c->x86_model == 9 || c->x86_model == 10) {
if (inl (CBAR) & CBAR_ENB)
outl (0 | CBAR_KEY, CBAR);
if (inl(CBAR) & CBAR_ENB)
outl(0 | CBAR_KEY, CBAR);
}
}
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
if (d > 20*K6_BUG_LOOP)
printk("system stability may be impaired when more than 32 MB are used.\n");
printk(KERN_CONT
"system stability may be impaired when more than 32 MB are used.\n");
else
printk("probably OK (after B9730xxxx).\n");
printk(KERN_CONT "probably OK (after B9730xxxx).\n");
printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
}
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
((l & 0x000fffff)|0x20000000));
printk(KERN_INFO
"CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
l, ((l & 0x000fffff)|0x20000000));
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
}
}
@@ -250,6 +252,64 @@ static int __cpuinit nearby_node(int apicid)
}
#endif
/*
* Fixup core topology information for AMD multi-node processors.
* Assumption 1: Number of cores in each internal node is the same.
* Assumption 2: Mixed systems with both single-node and dual-node
* processors are not supported.
*/
#ifdef CONFIG_X86_HT
static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_PCI
u32 t, cpn;
u8 n, n_id;
int cpu = smp_processor_id();
/* fixup topology information only once for a core */
if (cpu_has(c, X86_FEATURE_AMD_DCM))
return;
/* check for multi-node processor on boot cpu */
t = read_pci_config(0, 24, 3, 0xe8);
if (!(t & (1 << 29)))
return;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
/* cores per node: each internal node has half the number of cores */
cpn = c->x86_max_cores >> 1;
/* even-numbered NB_id of this dual-node processor */
n = c->phys_proc_id << 1;
/*
* determine internal node id and assign cores fifty-fifty to
* each node of the dual-node processor
*/
t = read_pci_config(0, 24 + n, 3, 0xe8);
n = (t>>30) & 0x3;
if (n == 0) {
if (c->cpu_core_id < cpn)
n_id = 0;
else
n_id = 1;
} else {
if (c->cpu_core_id < cpn)
n_id = 1;
else
n_id = 0;
}
/* compute entire NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
/* fixup core id to be in range from 0 to cpn */
c->cpu_core_id = c->cpu_core_id % cpn;
#endif
}
#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
@@ -267,6 +327,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
/* fixup topology information on multi-node processors */
if ((c->x86 == 0x10) && (c->x86_model == 9))
amd_fixup_dcm(c);
#endif
}
@@ -275,9 +338,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
int cpu = smp_processor_id();
int node;
unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
unsigned apicid = c->apicid;
node = per_cpu(cpu_llc_id, cpu);
node = c->phys_proc_id;
if (apicid_to_node[apicid] != NUMA_NO_NODE)
node = apicid_to_node[apicid];
if (!node_online(node)) {
@@ -398,18 +462,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
u32 level;
level = cpuid_eax(1);
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
/*
* Some BIOSes incorrectly force this feature, but only K8
* revision D (model = 0x14) and later actually support it.
* (AMD Erratum #110, docId: 25759).
*/
if (c->x86_model < 0x14)
if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
u64 val;
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
if (!rdmsrl_amd_safe(0xc001100d, &val)) {
val &= ~(1ULL << 32);
wrmsrl_amd_safe(0xc001100d, val);
}
}
}
if (c->x86 == 0x10 || c->x86 == 0x11)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
#else
/*
@@ -494,27 +570,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* benefit in doing so.
*/
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
if ((tseg>>PMD_SHIFT) <
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
if ((tseg>>PMD_SHIFT) <
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
((tseg>>PMD_SHIFT) <
((tseg>>PMD_SHIFT) <
(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
set_memory_4k((unsigned long)__va(tseg), 1);
(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
set_memory_4k((unsigned long)__va(tseg), 1);
}
}
#endif
}
#ifdef CONFIG_X86_32
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
unsigned int size)
{
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_mask == 0)
size = 64;
/* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
(c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */
(c->x86_mask == 0 || c->x86_mask == 1))
size = 256;
}
return size;

View File

@@ -81,7 +81,7 @@ static void __init check_fpu(void)
boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
}
static void __init check_hlt(void)
@@ -98,7 +98,7 @@ static void __init check_hlt(void)
halt();
halt();
halt();
printk("OK.\n");
printk(KERN_CONT "OK.\n");
}
/*
@@ -122,9 +122,9 @@ static void __init check_popad(void)
* CPU hard. Too bad.
*/
if (res != 12345678)
printk("Buggy.\n");
printk(KERN_CONT "Buggy.\n");
else
printk("OK.\n");
printk(KERN_CONT "OK.\n");
#endif
}
@@ -156,7 +156,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#ifndef CONFIG_SMP
printk("CPU: ");
printk(KERN_INFO "CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
check_config();

View File

@@ -15,7 +15,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#if !defined(CONFIG_SMP)
printk("CPU: ");
printk(KERN_INFO "CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();

View File

@@ -18,8 +18,8 @@
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/topology.h>
#include <asm/cpumask.h>
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <asm/pgtable.h>
#include <asm/atomic.h>
#include <asm/proto.h>
@@ -28,13 +28,13 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mtrr.h>
#include <asm/numa.h>
#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/smp.h>
#include <linux/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -94,45 +94,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
* TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
[GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
[GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
[GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
[GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
#else
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
[GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
[GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
/*
* Segments used for calling PnP BIOS have byte granularity.
* They code segments and data segments have fixed 64k limits,
* the transfer segment sizes are set at run time.
*/
/* 32-bit code */
[GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
[GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
[GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
[GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* 16-bit data */
[GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
[GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
/* 16-bit data */
[GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
[GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
/* 16-bit data */
[GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
[GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
/*
* The APM segments have byte granularity and their bases
* are set at run time. All have 64k limits.
*/
/* 32-bit code */
[GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
[GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
[GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
[GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* data */
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
[GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
GDT_STACK_CANARY_INIT
#endif
} };
@@ -982,18 +982,26 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
/*
* The following four percpu variables are hot. Align current_task to
* cacheline size such that all four fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
/*
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
};
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
/* May not be marked __init: used by software suspend */
void syscall_init(void)
@@ -1042,8 +1049,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */

View File

@@ -3,10 +3,10 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <asm/dma.h>
#include <asm/io.h>
#include <linux/io.h>
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
#include <asm/timer.h>
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
* The 5510/5520 companion chips have a funky PIT.
*/
if (vendor == PCI_VENDOR_ID_CYRIX &&
(device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
(device == PCI_DEVICE_ID_CYRIX_5510 ||
device == PCI_DEVICE_ID_CYRIX_5520))
mark_tsc_unstable("cyrix 5510/5520 detected");
}
#endif
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
* ? : 0x7x
* GX1 : 0x8x GX1 datasheet 56
*/
if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
if ((0x30 <= dir1 && dir1 <= 0x6f) ||
(0x80 <= dir1 && dir1 <= 0x8f))
geode_configure();
return;
} else { /* MediaGX */
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
/* enable MAPEN */
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
/* enable cpuid */
setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
/* disable MAPEN */
setCx86(CX86_CCR3, ccr3);
local_irq_restore(flags);
}
}

View File

@@ -28,11 +28,10 @@
static inline void __cpuinit
detect_hypervisor_vendor(struct cpuinfo_x86 *c)
{
if (vmware_platform()) {
if (vmware_platform())
c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
} else {
else
c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
}
}
unsigned long get_hypervisor_tsc_freq(void)

View File

@@ -7,17 +7,17 @@
#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/ds.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_64
#include <asm/topology.h>
#include <linux/topology.h>
#include <asm/numa_64.h>
#endif
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_F00F_BUG
/*
* All current models of Pentium and Pentium with MMX technology CPUs
* have the F0 0F bug, which lets nonprivileged users lock up the system.
* have the F0 0F bug, which lets nonprivileged users lock up the
* system.
* Note that the workaround only should be initialized once...
*/
c->f00f_bug = 0;
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
}
}
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return ((eax >> 26) + 1);
return (eax >> 26) + 1;
else
return 1;
}

View File

@@ -3,7 +3,7 @@
*
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
@@ -16,7 +16,7 @@
#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/smp.h>
#include <linux/smp.h>
#include <asm/k8.h>
#define LVL_1_INST 1
@@ -25,14 +25,15 @@
#define LVL_3 4
#define LVL_TRACE 5
struct _cache_table
{
struct _cache_table {
unsigned char descriptor;
char cache_type;
short size;
};
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
/* All the cache descriptor types we care about (no TLB or
trace cache entries) */
static const struct _cache_table __cpuinitconst cache_table[] =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
};
enum _cache_type
{
enum _cache_type {
CACHE_TYPE_NULL = 0,
CACHE_TYPE_DATA = 1,
CACHE_TYPE_INST = 2,
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves;
Maybe later */
union l1_cache {
struct {
unsigned line_size : 8;
unsigned lines_per_tag : 8;
unsigned assoc : 8;
unsigned size_in_kb : 8;
unsigned line_size:8;
unsigned lines_per_tag:8;
unsigned assoc:8;
unsigned size_in_kb:8;
};
unsigned val;
};
union l2_cache {
struct {
unsigned line_size : 8;
unsigned lines_per_tag : 4;
unsigned assoc : 4;
unsigned size_in_kb : 16;
unsigned line_size:8;
unsigned lines_per_tag:4;
unsigned assoc:4;
unsigned size_in_kb:16;
};
unsigned val;
};
union l3_cache {
struct {
unsigned line_size : 8;
unsigned lines_per_tag : 4;
unsigned assoc : 4;
unsigned res : 2;
unsigned size_encoded : 14;
unsigned line_size:8;
unsigned lines_per_tag:4;
unsigned assoc:4;
unsigned res:2;
unsigned size_encoded:14;
};
unsigned val;
};
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
case 0:
if (!l1->val)
return;
assoc = l1->assoc;
assoc = assocs[l1->assoc];
line_size = l1->line_size;
lines_per_tag = l1->lines_per_tag;
size_in_kb = l1->size_in_kb;
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
case 2:
if (!l2.val)
return;
assoc = l2.assoc;
assoc = assocs[l2.assoc];
line_size = l2.line_size;
lines_per_tag = l2.lines_per_tag;
/* cpu_data has errata corrections for K7 applied */
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
case 3:
if (!l3.val)
return;
assoc = l3.assoc;
assoc = assocs[l3.assoc];
line_size = l3.line_size;
lines_per_tag = l3.lines_per_tag;
size_in_kb = l3.size_encoded * 512;
if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
size_in_kb = size_in_kb >> 1;
assoc = assoc >> 1;
}
break;
default:
return;
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
eax->split.is_self_initializing = 1;
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
if (leaf == 3)
eax->split.num_threads_sharing =
current_cpu_data.x86_max_cores - 1;
else
eax->split.num_threads_sharing = 0;
eax->split.num_threads_sharing = 0;
eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
if (assoc == 0xf)
if (assoc == 0xffff)
eax->split.is_fully_associative = 1;
ebx->split.coherency_line_size = line_size - 1;
ebx->split.ways_of_associativity = assocs[assoc] - 1;
ebx->split.ways_of_associativity = assoc - 1;
ebx->split.physical_line_partition = lines_per_tag - 1;
ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
(ebx->split.ways_of_associativity + 1) - 1;
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void)
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
switch(this_leaf.eax.split.level) {
case 1:
switch (this_leaf.eax.split.level) {
case 1:
if (this_leaf.eax.split.type ==
CACHE_TYPE_DATA)
new_l1d = this_leaf.size/1024;
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
CACHE_TYPE_INST)
new_l1i = this_leaf.size/1024;
break;
case 2:
case 2:
new_l2 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
l2_id = c->apicid >> index_msb;
break;
case 3:
case 3:
new_l3 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
index_msb = get_count_order(
num_threads_sharing);
l3_id = c->apicid >> index_msb;
break;
default:
default:
break;
}
}
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;
for ( i = 0 ; i < n ; i++ ) {
for (i = 0 ; i < n ; i++) {
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
for ( j = 0 ; j < 3 ; j++ ) {
if (regs[j] & (1 << 31)) regs[j] = 0;
}
for (j = 0 ; j < 3 ; j++)
if (regs[j] & (1 << 31))
regs[j] = 0;
/* Byte 0 is level count, not a descriptor */
for ( j = 1 ; j < 16 ; j++ ) {
for (j = 1 ; j < 16 ; j++) {
unsigned char des = dp[j];
unsigned char k = 0;
/* look up this descriptor in the table */
while (cache_table[k].descriptor != 0)
{
while (cache_table[k].descriptor != 0) {
if (cache_table[k].descriptor == des) {
if (only_trace && cache_table[k].cache_type != LVL_TRACE)
break;
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
}
if (trace)
printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
else if ( l1i )
printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
else if (l1i)
printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
if (l1d)
printk(", L1 D cache: %dK\n", l1d);
printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
else
printk("\n");
printk(KERN_CONT "\n");
if (l2)
printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
@@ -522,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
int index_msb, i;
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
struct cpuinfo_x86 *d;
for_each_online_cpu(i) {
if (!per_cpu(cpuid4_info, i))
continue;
d = &cpu_data(i);
this_leaf = CPUID4_INFO_IDX(i, index);
cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
d->llc_shared_map);
}
return;
}
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -558,8 +571,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
}
}
#else
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
}
static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
{
}
#endif
static void __cpuinit free_cache_attributes(unsigned int cpu)
@@ -645,7 +663,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
{ \
return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
show_one_plus(level, eax.split.level, 0);
@@ -656,7 +674,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
{
return sprintf (buf, "%luK\n", this_leaf->size / 1024);
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -669,7 +687,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
const struct cpumask *mask;
mask = to_cpumask(this_leaf->shared_cpu_map);
n = type?
n = type ?
cpulist_scnprintf(buf, len-2, mask) :
cpumask_scnprintf(buf, len-2, mask);
buf[n++] = '\n';
@@ -800,7 +818,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
static struct attribute * default_attrs[] = {
static struct attribute *default_attrs[] = {
&type.attr,
&level.attr,
&coherency_line_size.attr,
@@ -815,7 +833,7 @@ static struct attribute * default_attrs[] = {
NULL
};
static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct _cache_attr *fattr = to_attr(attr);
struct _index_kobject *this_leaf = to_object(kobj);
@@ -828,8 +846,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
return ret;
}
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
static ssize_t store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t count)
{
struct _cache_attr *fattr = to_attr(attr);
struct _index_kobject *this_leaf = to_object(kobj);
@@ -883,7 +901,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
goto err_out;
per_cpu(index_kobject, cpu) = kzalloc(
sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
if (unlikely(per_cpu(index_kobject, cpu) == NULL))
goto err_out;
@@ -917,7 +935,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
for (i = 0; i < num_cache_leaves; i++) {
this_object = INDEX_KOBJECT_PTR(cpu,i);
this_object = INDEX_KOBJECT_PTR(cpu, i);
this_object->cpu = cpu;
this_object->index = i;
retval = kobject_init_and_add(&(this_object->kobj),
@@ -925,9 +943,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
per_cpu(cache_kobject, cpu),
"index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++) {
kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
}
for (j = 0; j < i; j++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
return retval;
@@ -952,7 +969,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
}
@@ -977,8 +994,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
{
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
.notifier_call = cacheinfo_cpu_callback,
};

View File

@@ -183,6 +183,11 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
void __weak decode_mce(struct mce *m)
{
return;
}
static void print_mce(struct mce *m)
{
printk(KERN_EMERG
@@ -205,6 +210,8 @@ static void print_mce(struct mce *m)
printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid,
m->apicid);
decode_mce(m);
}
static void print_mce_head(void)
@@ -215,7 +222,10 @@ static void print_mce_head(void)
static void print_mce_tail(void)
{
printk(KERN_EMERG "This is not a software problem!\n"
"Run through mcelog --ascii to decode and contact your hardware vendor\n");
#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
"Run through mcelog --ascii to decode and contact your hardware vendor\n"
#endif
);
}
#define PANIC_TIMEOUT 5 /* 5 seconds */

View File

@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
struct cpuinfo_x86 *c = &cpu_data(cpu);
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
i = cpumask_first(cpu_core_mask(cpu));
i = cpumask_first(c->llc_shared_map);
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
cpumask_copy(b->cpus, cpu_core_mask(cpu));
cpumask_copy(b->cpus, c->llc_shared_map);
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifndef CONFIG_SMP
cpumask_setall(b->cpus);
#else
cpumask_copy(b->cpus, cpu_core_mask(cpu));
cpumask_copy(b->cpus, c->llc_shared_map);
#endif
per_cpu(threshold_banks, cpu)[bank] = b;

View File

@@ -7,15 +7,15 @@
static void
amd_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
unsigned long *size, mtrr_type *type)
{
unsigned long low, high;
rdmsr(MSR_K6_UWCCR, low, high);
/* Upper dword is region 1, lower is region 0 */
/* Upper dword is region 1, lower is region 0 */
if (reg == 1)
low = high;
/* The base masks off on the right alignment */
/* The base masks off on the right alignment */
*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
*type = 0;
if (low & 1)
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base,
return;
}
/*
* This needs a little explaining. The size is stored as an
* inverted mask of bits of 128K granularity 15 bits long offset
* 2 bits
* This needs a little explaining. The size is stored as an
* inverted mask of bits of 128K granularity 15 bits long offset
* 2 bits.
*
* So to get a size we do invert the mask and add 1 to the lowest
* mask bit (4 as its 2 bits in). This gives us a size we then shift
* to turn into 128K blocks
* So to get a size we do invert the mask and add 1 to the lowest
* mask bit (4 as its 2 bits in). This gives us a size we then shift
* to turn into 128K blocks.
*
* eg 111 1111 1111 1100 is 512K
* eg 111 1111 1111 1100 is 512K
*
* invert 000 0000 0000 0011
* +1 000 0000 0000 0100
* *128K ...
* invert 000 0000 0000 0011
* +1 000 0000 0000 0100
* *128K ...
*/
low = (~low) & 0x1FFFC;
*size = (low + 4) << (15 - PAGE_SHIFT);
return;
}
static void amd_set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
/* [SUMMARY] Set variable MTRR register on the local CPU.
<reg> The register to set.
<base> The base address of the region.
<size> The size of the region. If this is 0 the region is disabled.
<type> The type of the region.
[RETURNS] Nothing.
*/
/**
* amd_set_mtrr - Set variable MTRR register on the local CPU.
*
* @reg The register to set.
* @base The base address of the region.
* @size The size of the region. If this is 0 the region is disabled.
* @type The type of the region.
*
* Returns nothing.
*/
static void
amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
{
u32 regs[2];
/*
* Low is MTRR0 , High MTRR 1
* Low is MTRR0, High MTRR 1
*/
rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
/*
* Blank to disable
* Blank to disable
*/
if (size == 0)
if (size == 0) {
regs[reg] = 0;
else
/* Set the register to the base, the type (off by one) and an
inverted bitmask of the size The size is the only odd
bit. We are fed say 512K We invert this and we get 111 1111
1111 1011 but if you subtract one and invert you get the
desired 111 1111 1111 1100 mask
But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
} else {
/*
* Set the register to the base, the type (off by one) and an
* inverted bitmask of the size The size is the only odd
* bit. We are fed say 512K We invert this and we get 111 1111
* 1111 1011 but if you subtract one and invert you get the
* desired 111 1111 1111 1100 mask
*
* But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
*/
regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
| (base << PAGE_SHIFT) | (type + 1);
}
/*
* The writeback rule is quite specific. See the manual. Its
* disable local interrupts, write back the cache, set the mtrr
* The writeback rule is quite specific. See the manual. Its
* disable local interrupts, write back the cache, set the mtrr
*/
wbinvd();
wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
}
static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
static int
amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
{
/* Apply the K6 block alignment and size rules
In order
o Uncached or gathering only
o 128K or bigger block
o Power of 2 block
o base suitably aligned to the power
*/
/*
* Apply the K6 block alignment and size rules
* In order
* o Uncached or gathering only
* o 128K or bigger block
* o Power of 2 block
* o base suitably aligned to the power
*/
if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
|| (size & ~(size - 1)) - size || (base & (size - 1)))
return -EINVAL;
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void)
set_mtrr_ops(&amd_mtrr_ops);
return 0;
}
//arch_initcall(amd_mtrr_init);

View File

@@ -1,7 +1,9 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include "mtrr.h"
static struct {
@@ -12,25 +14,25 @@ static struct {
static u8 centaur_mcr_reserved;
static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
/*
* Report boot time MCR setups
/**
* centaur_get_free_region - Get a free MTRR.
*
* @base: The starting (base) address of the region.
* @size: The size (in bytes) of the region.
*
* Returns: the index of the region on success, else -1 on error.
*/
static int
centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
/* [SUMMARY] Get a free MTRR.
<base> The starting (base) address of the region.
<size> The size (in bytes) of the region.
[RETURNS] The index of the region on success, else -1 on error.
*/
{
int i, max;
mtrr_type ltype;
unsigned long lbase, lsize;
mtrr_type ltype;
int i, max;
max = num_var_ranges;
if (replace_reg >= 0 && replace_reg < max)
return replace_reg;
for (i = 0; i < max; ++i) {
if (centaur_mcr_reserved & (1 << i))
continue;
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
if (lsize == 0)
return i;
}
return -ENOSPC;
}
void
mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
/*
* Report boot time MCR setups
*/
void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
centaur_mcr[mcr].low = lo;
centaur_mcr[mcr].high = hi;
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base,
{
*base = centaur_mcr[reg].high >> PAGE_SHIFT;
*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
*type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */
*type = MTRR_TYPE_WRCOMB; /* write-combining */
if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
*type = MTRR_TYPE_UNCACHABLE;
if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
*type = MTRR_TYPE_WRBACK;
if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
*type = MTRR_TYPE_WRBACK;
}
static void centaur_set_mcr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
static void
centaur_set_mcr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
{
unsigned long low, high;
if (size == 0) {
/* Disable */
/* Disable */
high = low = 0;
} else {
high = base << PAGE_SHIFT;
if (centaur_mcr_type == 0)
low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */
else {
if (centaur_mcr_type == 0) {
/* Only support write-combining... */
low = -size << PAGE_SHIFT | 0x1f;
} else {
if (type == MTRR_TYPE_UNCACHABLE)
low = -size << PAGE_SHIFT | 0x02; /* NC */
low = -size << PAGE_SHIFT | 0x02; /* NC */
else
low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */
low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
}
}
centaur_mcr[reg].high = high;
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base,
wrmsr(MSR_IDT_MCR0 + reg, low, high);
}
#if 0
/*
* Initialise the later (saner) Winchip MCR variant. In this version
* the BIOS can pass us the registers it has used (but not their values)
* and the control register is read/write
*/
static void __init
centaur_mcr1_init(void)
{
unsigned i;
u32 lo, hi;
/* Unfortunately, MCR's are read-only, so there is no way to
* find out what the bios might have done.
*/
rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
lo &= ~0x1C0; /* clear key */
lo |= 0x040; /* set key to 1 */
wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
}
centaur_mcr_type = 1;
/*
* Clear any unconfigured MCR's.
*/
for (i = 0; i < 8; ++i) {
if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
if (!(lo & (1 << (9 + i))))
wrmsr(MSR_IDT_MCR0 + i, 0, 0);
else
/*
* If the BIOS set up an MCR we cannot see it
* but we don't wish to obliterate it
*/
centaur_mcr_reserved |= (1 << i);
}
}
/*
* Throw the main write-combining switch...
* However if OOSTORE is enabled then people have already done far
* cleverer things and we should behave.
*/
lo |= 15; /* Write combine enables */
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
}
/*
* Initialise the original winchip with read only MCR registers
* no used bitmask for the BIOS to pass on and write only control
*/
static void __init
centaur_mcr0_init(void)
{
unsigned i;
/* Unfortunately, MCR's are read-only, so there is no way to
* find out what the bios might have done.
*/
/* Clear any unconfigured MCR's.
* This way we are sure that the centaur_mcr array contains the actual
* values. The disadvantage is that any BIOS tweaks are thus undone.
*
*/
for (i = 0; i < 8; ++i) {
if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
wrmsr(MSR_IDT_MCR0 + i, 0, 0);
}
wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
}
/*
* Initialise Winchip series MCR registers
*/
static void __init
centaur_mcr_init(void)
{
struct set_mtrr_context ctxt;
set_mtrr_prepare_save(&ctxt);
set_mtrr_cache_disable(&ctxt);
if (boot_cpu_data.x86_model == 4)
centaur_mcr0_init();
else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
centaur_mcr1_init();
set_mtrr_done(&ctxt);
}
#endif
static int centaur_validate_add_page(unsigned long base,
unsigned long size, unsigned int type)
static int
centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
{
/*
* FIXME: Winchip2 supports uncached
* FIXME: Winchip2 supports uncached
*/
if (type != MTRR_TYPE_WRCOMB &&
if (type != MTRR_TYPE_WRCOMB &&
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
printk(KERN_WARNING
"mtrr: only write-combining%s supported\n",
centaur_mcr_type ? " and uncacheable are"
: " is");
pr_warning("mtrr: only write-combining%s supported\n",
centaur_mcr_type ? " and uncacheable are" : " is");
return -EINVAL;
}
return 0;
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base,
static struct mtrr_ops centaur_mtrr_ops = {
.vendor = X86_VENDOR_CENTAUR,
// .init = centaur_mcr_init,
.set = centaur_set_mcr,
.get = centaur_get_mcr,
.get_free_region = centaur_get_free_region,
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void)
set_mtrr_ops(&centaur_mtrr_ops);
return 0;
}
//arch_initcall(centaur_init_mtrr);

View File

@@ -1,51 +1,75 @@
/* MTRR (Memory Type Range Register) cleanup
Copyright (C) 2009 Yinghai Lu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MTRR (Memory Type Range Register) cleanup
*
* Copyright (C) 2009 Yinghai Lu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/sort.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/kvm_para.h>
#include <asm/processor.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/kvm_para.h>
#include "mtrr.h"
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
struct res_range {
unsigned long start;
unsigned long end;
unsigned long start;
unsigned long end;
};
struct var_mtrr_range_state {
unsigned long base_pfn;
unsigned long size_pfn;
mtrr_type type;
};
struct var_mtrr_state {
unsigned long range_startk;
unsigned long range_sizek;
unsigned long chunk_sizek;
unsigned long gran_sizek;
unsigned int reg;
};
/* Should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
static struct res_range __initdata range[RANGE_NUM];
static int __initdata nr_range;
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
static int __init
add_range(struct res_range *range, int nr_range, unsigned long start,
unsigned long end)
add_range(struct res_range *range, int nr_range,
unsigned long start, unsigned long end)
{
/* out of slots */
/* Out of slots: */
if (nr_range >= RANGE_NUM)
return nr_range;
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start,
}
static int __init
add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
unsigned long end)
add_range_with_merge(struct res_range *range, int nr_range,
unsigned long start, unsigned long end)
{
int i;
/* try to merge it with old one */
/* Try to merge it with old one: */
for (i = 0; i < nr_range; i++) {
unsigned long final_start, final_end;
unsigned long common_start, common_end;
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
return nr_range;
}
/* need to add that */
/* Need to add it: */
return add_range(range, nr_range, start, end);
}
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
}
if (start > range[j].start && end < range[j].end) {
/* find the new spare */
/* Find the new spare: */
for (i = 0; i < RANGE_NUM; i++) {
if (range[i].end == 0)
break;
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2)
return start1 - start2;
}
struct var_mtrr_range_state {
unsigned long base_pfn;
unsigned long size_pfn;
mtrr_type type;
};
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
#define BIOS_BUG_MSG KERN_WARNING \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
static int __init
x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
range[i].start, range[i].end + 1);
}
/* take out UC ranges */
/* Take out UC ranges: */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_UNCACHABLE &&
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
(mtrr_state.enabled & 1)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
"contains strange UC entry under 1M, check "
"with your system vendor!\n", i);
printk(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))
continue;
size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
return nr_range;
}
static struct res_range __initdata range[RANGE_NUM];
static int __initdata nr_range;
#ifdef CONFIG_MTRR_SANITIZER
static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
{
unsigned long sum;
unsigned long sum = 0;
int i;
sum = 0;
for (i = 0; i < nr_range; i++)
sum += range[i].end + 1 - range[i].start;
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str)
}
early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
struct var_mtrr_state {
unsigned long range_startk;
unsigned long range_sizek;
unsigned long chunk_sizek;
unsigned long gran_sizek;
unsigned int reg;
};
static void __init
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
unsigned char type, unsigned int address_bits)
unsigned char type, unsigned int address_bits)
{
u32 base_lo, base_hi, mask_lo, mask_hi;
u64 base, mask;
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
mask = (1ULL << address_bits) - 1;
mask &= ~((((u64)sizek) << 10) - 1);
base = ((u64)basek) << 10;
base = ((u64)basek) << 10;
base |= type;
mask |= 0x800;
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
static void __init
save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
unsigned char type)
unsigned char type)
{
range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
range_state[reg].type = type;
}
static void __init
set_var_mtrr_all(unsigned int address_bits)
static void __init set_var_mtrr_all(unsigned int address_bits)
{
unsigned long basek, sizek;
unsigned char type;
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits)
static unsigned long to_size_factor(unsigned long sizek, char *factorp)
{
char factor;
unsigned long base = sizek;
char factor;
if (base & ((1<<10) - 1)) {
/* not MB alignment */
/* Not MB-aligned: */
factor = 'K';
} else if (base & ((1<<20) - 1)) {
factor = 'M';
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
unsigned long max_align, align;
unsigned long sizek;
/* Compute the maximum size I can make a range */
/* Compute the maximum size with which we can make a range: */
if (range_startk)
max_align = ffs(range_startk) - 1;
else
max_align = 32;
align = fls(range_sizek) - 1;
if (align > max_align)
align = max_align;
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
start_base = to_size_factor(range_startk,
&start_factor),
size_base = to_size_factor(sizek, &size_factor),
start_base = to_size_factor(range_startk, &start_factor);
size_base = to_size_factor(sizek, &size_factor);
printk(KERN_DEBUG "Setting variable MTRR %d, "
Dprintk("Setting variable MTRR %d, "
"base: %ld%cB, range: %ld%cB, type %s\n",
reg, start_base, start_factor,
size_base, size_factor,
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
/* align with gran size, prevent small block used up MTRRs */
/* Align with gran size, prevent small block used up MTRRs: */
range_basek = ALIGN(state->range_startk, gran_sizek);
if ((range_basek > basek) && basek)
return second_sizek;
state->range_sizek -= (range_basek - state->range_startk);
range_sizek = ALIGN(state->range_sizek, gran_sizek);
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
}
state->range_sizek = range_sizek;
/* try to append some small hole */
/* Try to append some small hole: */
range0_basek = state->range_startk;
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
/* no increase */
/* No increase: */
if (range0_sizek == state->range_sizek) {
if (debug_print)
printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + state->range_sizek)<<10);
Dprintk("rangeX: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + state->range_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
state->range_sizek, MTRR_TYPE_WRBACK);
return 0;
}
/* only cut back, when it is not the last */
/* Only cut back when it is not the last: */
if (sizek) {
while (range0_basek + range0_sizek > (basek + sizek)) {
if (range0_sizek >= chunk_sizek)
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
second_try:
range_basek = range0_basek + range0_sizek;
/* one hole in the middle */
/* One hole in the middle: */
if (range_basek > basek && range_basek <= (basek + sizek))
second_sizek = range_basek - basek;
if (range0_sizek > state->range_sizek) {
/* one hole in middle or at end */
/* One hole in middle or at the end: */
hole_sizek = range0_sizek - state->range_sizek - second_sizek;
/* hole size should be less than half of range0 size */
/* Hole size should be less than half of range0 size: */
if (hole_sizek >= (range0_sizek >> 1) &&
range0_sizek >= chunk_sizek) {
range0_sizek -= chunk_sizek;
@@ -491,32 +494,30 @@ second_try:
}
if (range0_sizek) {
if (debug_print)
printk(KERN_DEBUG "range0: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + range0_sizek)<<10);
Dprintk("range0: %016lx - %016lx\n",
range0_basek<<10,
(range0_basek + range0_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
range0_sizek, MTRR_TYPE_WRBACK);
}
if (range0_sizek < state->range_sizek) {
/* need to handle left over */
/* Need to handle left over range: */
range_sizek = state->range_sizek - range0_sizek;
if (debug_print)
printk(KERN_DEBUG "range: %016lx - %016lx\n",
range_basek<<10,
(range_basek + range_sizek)<<10);
Dprintk("range: %016lx - %016lx\n",
range_basek<<10,
(range_basek + range_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range_basek,
range_sizek, MTRR_TYPE_WRBACK);
}
if (hole_sizek) {
hole_basek = range_basek - hole_sizek - second_sizek;
if (debug_print)
printk(KERN_DEBUG "hole: %016lx - %016lx\n",
hole_basek<<10,
(hole_basek + hole_sizek)<<10);
Dprintk("hole: %016lx - %016lx\n",
hole_basek<<10,
(hole_basek + hole_sizek)<<10);
state->reg = range_to_mtrr(state->reg, hole_basek,
hole_sizek, MTRR_TYPE_UNCACHABLE);
}
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
basek = base_pfn << (PAGE_SHIFT - 10);
sizek = size_pfn << (PAGE_SHIFT - 10);
/* See if I can merge with the last range */
/* See if I can merge with the last range: */
if ((basek <= 1024) ||
(state->range_startk + state->range_sizek == basek)) {
unsigned long endk = basek + sizek;
state->range_sizek = endk - state->range_startk;
return;
}
/* Write the range mtrrs */
/* Write the range mtrrs: */
if (state->range_sizek != 0)
second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
/* Allocate an msr */
/* Allocate an msr: */
state->range_startk = basek + second_sizek;
state->range_sizek = sizek - second_sizek;
}
/* mininum size of mtrr block that can take hole */
/* Mininum size of mtrr block that can take hole: */
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
static int __init parse_mtrr_chunk_size_opt(char *p)
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
}
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
/* granity of mtrr of block */
/* Granularity of mtrr of block: */
static u64 mtrr_gran_size __initdata;
static int __init parse_mtrr_gran_size_opt(char *p)
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p)
}
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
static int nr_mtrr_spare_reg __initdata =
static unsigned long nr_mtrr_spare_reg __initdata =
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
static int __init parse_mtrr_spare_reg(char *arg)
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg)
nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
return 0;
}
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
static int __init
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
u64 chunk_size, u64 gran_size)
{
struct var_mtrr_state var_state;
int i;
int num_reg;
int i;
var_state.range_startk = 0;
var_state.range_sizek = 0;
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
memset(range_state, 0, sizeof(range_state));
/* Write the range etc */
for (i = 0; i < nr_range; i++)
/* Write the range: */
for (i = 0; i < nr_range; i++) {
set_var_mtrr_range(&var_state, range[i].start,
range[i].end - range[i].start + 1);
}
/* Write the last range */
/* Write the last range: */
if (var_state.range_sizek != 0)
range_to_mtrr_with_hole(&var_state, 0, 0);
num_reg = var_state.reg;
/* Clear out the extra MTRR's */
/* Clear out the extra MTRR's: */
while (var_state.reg < num_var_ranges) {
save_var_mtrr(var_state.reg, 0, 0, 0);
var_state.reg++;
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
}
struct mtrr_cleanup_result {
unsigned long gran_sizek;
unsigned long chunk_sizek;
unsigned long lose_cover_sizek;
unsigned int num_reg;
int bad;
unsigned long gran_sizek;
unsigned long chunk_sizek;
unsigned long lose_cover_sizek;
unsigned int num_reg;
int bad;
};
/*
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
static void __init print_out_mtrr_range_state(void)
{
int i;
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
mtrr_type type;
int i;
for (i = 0; i < num_var_ranges; i++) {
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void)
int i;
mtrr_type type;
unsigned long size;
/* extra one for all 0 */
/* Extra one for all 0: */
int num[MTRR_NUM_TYPES + 1];
/* check entries number */
/* Check entries number: */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void)
num[type]++;
}
/* check if we got UC entries */
/* Check if we got UC entries: */
if (!num[MTRR_TYPE_UNCACHABLE])
return 0;
/* check if we only had WB and UC */
/* Check if we only had WB and UC */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
num_var_ranges - num[MTRR_NUM_TYPES])
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
return 1;
}
static unsigned long __initdata range_sums;
static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long extra_remove_base,
unsigned long extra_remove_size,
int i)
static void __init
mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base,
unsigned long x_remove_size, int i)
{
int num_reg;
static struct res_range range_new[RANGE_NUM];
static int nr_range_new;
unsigned long range_sums_new;
static int nr_range_new;
int num_reg;
/* convert ranges to var ranges state */
num_reg = x86_setup_var_mtrrs(range, nr_range,
chunk_size, gran_size);
/* Convert ranges to var ranges state: */
num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
/* we got new setting in range_state, check it */
/* We got new setting in range_state, check it: */
memset(range_new, 0, sizeof(range_new));
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
extra_remove_base, extra_remove_size);
x_remove_base, x_remove_size);
range_sums_new = sum_ranges(range_new, nr_range_new);
result[i].chunk_sizek = chunk_size >> 10;
result[i].gran_sizek = gran_size >> 10;
result[i].num_reg = num_reg;
if (range_sums < range_sums_new) {
result[i].lose_cover_sizek =
(range_sums_new - range_sums) << PSHIFT;
result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
result[i].bad = 1;
} else
result[i].lose_cover_sizek =
(range_sums - range_sums_new) << PSHIFT;
} else {
result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
}
/* double check it */
/* Double check it: */
if (!result[i].bad && !result[i].lose_cover_sizek) {
if (nr_range_new != nr_range ||
memcmp(range, range_new, sizeof(range)))
result[i].bad = 1;
if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
result[i].bad = 1;
}
if (!result[i].bad && (range_sums - range_sums_new <
min_loss_pfn[num_reg])) {
min_loss_pfn[num_reg] =
range_sums - range_sums_new;
}
if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
min_loss_pfn[num_reg] = range_sums - range_sums_new;
}
static void __init mtrr_print_out_one_result(int i)
{
char gran_factor, chunk_factor, lose_factor;
unsigned long gran_base, chunk_base, lose_base;
char gran_factor, chunk_factor, lose_factor;
gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
result[i].bad ? "*BAD*" : " ",
gran_base, gran_factor, chunk_base, chunk_factor);
printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
result[i].num_reg, result[i].bad ? "-" : "",
lose_base, lose_factor);
pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
result[i].bad ? "*BAD*" : " ",
gran_base, gran_factor, chunk_base, chunk_factor);
pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
result[i].num_reg, result[i].bad ? "-" : "",
lose_base, lose_factor);
}
static int __init mtrr_search_optimal_index(void)
{
int i;
int num_reg_good;
int index_good;
int i;
if (nr_mtrr_spare_reg >= num_var_ranges)
nr_mtrr_spare_reg = num_var_ranges - 1;
num_reg_good = -1;
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
if (!min_loss_pfn[i])
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void)
return index_good;
}
int __init mtrr_cleanup(unsigned address_bits)
{
unsigned long extra_remove_base, extra_remove_size;
unsigned long x_remove_base, x_remove_size;
unsigned long base, size, def, dummy;
mtrr_type type;
u64 chunk_size, gran_size;
mtrr_type type;
int index_good;
int i;
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
return 0;
rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
/* get it and store it aside */
/* Get it and store it aside: */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits)
range_state[i].type = type;
}
/* check if we need handle it and can handle it */
/* Check if we need handle it and can handle it: */
if (!mtrr_need_cleanup())
return 0;
/* print original var MTRRs at first, for debugging: */
/* Print original var MTRRs at first, for debugging: */
printk(KERN_DEBUG "original variable MTRRs\n");
print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
extra_remove_size = 0;
extra_remove_base = 1 << (32 - PAGE_SHIFT);
x_remove_size = 0;
x_remove_base = 1 << (32 - PAGE_SHIFT);
if (mtrr_tom2)
extra_remove_size =
(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
extra_remove_size);
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
* [0, 1M) should always be coverred by var mtrr with WB
* and fixed mtrrs should take effective before var mtrr for it
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
nr_range = add_range_with_merge(range, nr_range, 0,
(1ULL<<(20 - PAGE_SHIFT)) - 1);
/* sort the ranges */
/* Sort the ranges: */
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
range_sums = sum_ranges(range, nr_range);
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_chunk_size && mtrr_gran_size) {
i = 0;
mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
extra_remove_base, extra_remove_size, i);
x_remove_base, x_remove_size, i);
mtrr_print_out_one_result(i);
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits)
continue;
mtrr_calc_range_state(chunk_size, gran_size,
extra_remove_base, extra_remove_size, i);
x_remove_base, x_remove_size, i);
if (debug_print) {
mtrr_print_out_one_result(i);
printk(KERN_INFO "\n");
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits)
}
}
/* try to find the optimal index */
/* Try to find the optimal index: */
index_good = mtrr_search_optimal_index();
if (index_good != -1) {
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits)
i = index_good;
mtrr_print_out_one_result(i);
/* convert ranges to var ranges state */
/* Convert ranges to var ranges state: */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
* Note this won't check if the MTRRs < 4GB where the magic bit doesn't
* apply to are wrong, but so far we don't know of any such case in the wild.
*/
#define Tom2Enabled (1U << 21)
#define Tom2ForceMemTypeWB (1U << 22)
#define Tom2Enabled (1U << 21)
#define Tom2ForceMemTypeWB (1U << 22)
int __init amd_special_default_mtrr(void)
{
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void)
return 0;
if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
return 0;
/* In case some hypervisor doesn't pass SYSCFG through */
/* In case some hypervisor doesn't pass SYSCFG through: */
if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
return 0;
/*
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void)
return 0;
}
static u64 __init real_trim_memory(unsigned long start_pfn,
unsigned long limit_pfn)
static u64 __init
real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
{
u64 trim_start, trim_size;
trim_start = start_pfn;
trim_start <<= PAGE_SHIFT;
trim_size = limit_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
return e820_update_range(trim_start, trim_size, E820_RAM,
E820_RESERVED);
return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
}
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
* @end_pfn: ending page frame number
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
* memory configurations. This routine checks that the highest MTRR matches
* the end of memory, to make sure the MTRRs having a write back type cover
* all of the memory the kernel is intending to use. If not, it'll trim any
* all of the memory the kernel is intending to use. If not, it'll trim any
* memory off the end by adjusting end_pfn, removing it from the kernel's
* allocation pools, warning the user with an obnoxious message.
*/
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 total_trim_size;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
/*
* Make sure we only trim uncachable memory on machines that
* support the Intel MTRR architecture:
*/
if (!is_cpu(INTEL) || disable_mtrr_trim)
return 0;
rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
/* get it and store it aside */
/* Get it and store it aside: */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
range_state[i].type = type;
}
/* Find highest cached pfn */
/* Find highest cached pfn: */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_WRBACK)
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
if (!highest_pfn) {
printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
return 0;
}
/* check entries number */
/* Check entries number: */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
num[type]++;
}
/* no entry for WB? */
/* No entry for WB? */
if (!num[MTRR_TYPE_WRBACK])
return 0;
/* check if we only had WB and UC */
/* Check if we only had WB and UC: */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
}
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
/* Check the head: */
total_trim_size = 0;
/* check the head */
if (range[0].start)
total_trim_size += real_trim_memory(0, range[0].start);
/* check the holes */
/* Check the holes: */
for (i = 0; i < nr_range - 1; i++) {
if (range[i].end + 1 < range[i+1].start)
total_trim_size += real_trim_memory(range[i].end + 1,
range[i+1].start);
}
/* check the top */
/* Check the top: */
i = nr_range - 1;
if (range[i].end + 1 < end_pfn)
total_trim_size += real_trim_memory(range[i].end + 1,
end_pfn);
if (total_trim_size) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
" all of memory, losing %lluMB of RAM.\n",
total_trim_size >> 20);
pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
if (!changed_by_mtrr_cleanup)
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
pr_info("update e820 for mtrr\n");
update_e820();
return 1;
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
return 0;
}

View File

@@ -1,38 +1,40 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/io.h>
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include "mtrr.h"
static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
{
unsigned long flags;
unsigned char arr, ccr3, rcr, shift;
unsigned long flags;
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
/* Save flags and disable interrupts */
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
((unsigned char *) base)[3] = getCx86(arr);
((unsigned char *) base)[2] = getCx86(arr + 1);
((unsigned char *) base)[1] = getCx86(arr + 2);
((unsigned char *)base)[3] = getCx86(arr);
((unsigned char *)base)[2] = getCx86(arr + 1);
((unsigned char *)base)[1] = getCx86(arr + 2);
rcr = getCx86(CX86_RCR_BASE + reg);
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
/* Enable interrupts if it was enabled previously */
local_irq_restore(flags);
shift = ((unsigned char *) base)[1] & 0x0f;
*base >>= PAGE_SHIFT;
/* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
/*
* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
* Note: shift==0xf means 4G, this is unsupported.
*/
if (shift)
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
}
}
/*
* cyrix_get_free_region - get a free ARR.
*
* @base: the starting (base) address of the region.
* @size: the size (in bytes) of the region.
*
* Returns: the index of the region on success, else -1 on error.
*/
static int
cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
/* [SUMMARY] Get a free ARR.
<base> The starting (base) address of the region.
<size> The size (in bytes) of the region.
[RETURNS] The index of the region on success, else -1 on error.
*/
{
int i;
mtrr_type ltype;
unsigned long lbase, lsize;
mtrr_type ltype;
int i;
switch (replace_reg) {
case 7:
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
cyrix_get_arr(7, &lbase, &lsize, &ltype);
if (lsize == 0)
return 7;
/* Else try ARR0-ARR6 first */
/* Else try ARR0-ARR6 first */
} else {
for (i = 0; i < 7; i++) {
cyrix_get_arr(i, &lbase, &lsize, &ltype);
if (lsize == 0)
return i;
}
/* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
/*
* ARR0-ARR6 isn't free
* try ARR7 but its size must be at least 256K
*/
cyrix_get_arr(i, &lbase, &lsize, &ltype);
if ((lsize == 0) && (size >= 0x40))
return i;
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
return -ENOSPC;
}
static u32 cr4 = 0;
static u32 ccr3;
static u32 cr4, ccr3;
static void prepare_set(void)
{
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if ( cpu_has_pge ) {
if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
}
/* Disable and flush caches. Note that wbinvd flushes the TLBs as
a side-effect */
/*
* Disable and flush caches.
* Note that wbinvd flushes the TLBs as a side-effect
*/
cr0 = read_cr0() | X86_CR0_CD;
wbinvd();
write_cr0(cr0);
@@ -147,22 +156,21 @@ static void prepare_set(void)
/* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
}
static void post_set(void)
{
/* Flush caches and TLBs */
/* Flush caches and TLBs */
wbinvd();
/* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, ccr3);
/* Enable caches */
/* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
/* Restore value of CR4 */
if ( cpu_has_pge )
/* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(cr4);
}
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
size >>= 6;
size &= 0x7fff; /* make sure arr_size <= 14 */
for (arr_size = 0; size; arr_size++, size >>= 1) ;
for (arr_size = 0; size; arr_size++, size >>= 1)
;
if (reg < 7) {
switch (type) {
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
prepare_set();
base <<= PAGE_SHIFT;
setCx86(arr, ((unsigned char *) &base)[3]);
setCx86(arr + 1, ((unsigned char *) &base)[2]);
setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
setCx86(arr + 0, ((unsigned char *)&base)[3]);
setCx86(arr + 1, ((unsigned char *)&base)[2]);
setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
setCx86(CX86_RCR_BASE + reg, arr_type);
post_set();
}
typedef struct {
unsigned long base;
unsigned long size;
mtrr_type type;
unsigned long base;
unsigned long size;
mtrr_type type;
} arr_state_t;
static arr_state_t arr_state[8] = {
@@ -247,16 +256,17 @@ static void cyrix_set_all(void)
setCx86(CX86_CCR0 + i, ccr_state[i]);
for (; i < 7; i++)
setCx86(CX86_CCR4 + i, ccr_state[i]);
for (i = 0; i < 8; i++)
cyrix_set_arr(i, arr_state[i].base,
for (i = 0; i < 8; i++) {
cyrix_set_arr(i, arr_state[i].base,
arr_state[i].size, arr_state[i].type);
}
post_set();
}
static struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
// .init = cyrix_arr_init,
.set_all = cyrix_set_all,
.set = cyrix_set_arr,
.get = cyrix_get_arr,
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void)
set_mtrr_ops(&cyrix_mtrr_ops);
return 0;
}
//arch_initcall(cyrix_init_mtrr);

View File

@@ -1,28 +1,34 @@
/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
because MTRRs can span upto 40 bits (36bits on most modern x86) */
/*
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
* because MTRRs can span upto 40 bits (36bits on most modern x86)
*/
#define DEBUG
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/io.h>
#include <asm/processor-flags.h>
#include <asm/cpufeature.h>
#include <asm/tlbflush.h>
#include <asm/system.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/system.h>
#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
#include <asm/tlbflush.h>
#include <asm/pat.h>
#include "mtrr.h"
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
};
static struct fixed_range_block fixed_range_blocks[] = {
{ MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
{ MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
{ MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
{ MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
{ MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
{ MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
{}
};
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask;
static int mtrr_state_set;
u64 mtrr_tom2;
struct mtrr_state_type mtrr_state = {};
struct mtrr_state_type mtrr_state;
EXPORT_SYMBOL_GPL(mtrr_state);
/**
/*
* BIOS is expected to clear MtrrFixDramModEn bit, see for example
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
* Look of multiple ranges matching this address and pick type
* as per MTRR precedence
*/
if (!(mtrr_state.enabled & 2)) {
if (!(mtrr_state.enabled & 2))
return mtrr_state.def_type;
}
prev_match = 0xFF;
for (i = 0; i < num_var_ranges; ++i) {
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
if (start_state != end_state)
return 0xFE;
if ((start & mask) != (base & mask)) {
if ((start & mask) != (base & mask))
continue;
}
curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
if (prev_match == 0xFF) {
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
curr_match = MTRR_TYPE_WRTHROUGH;
}
if (prev_match != curr_match) {
if (prev_match != curr_match)
return MTRR_TYPE_UNCACHABLE;
}
}
if (mtrr_tom2) {
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end)
return mtrr_state.def_type;
}
/* Get the MSR pair relating to a var range */
/* Get the MSR pair relating to a var range */
static void
get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
{
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
/* fill the MSR pair relating to a var range */
/* Fill the MSR pair relating to a var range */
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
{
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index,
vr[index].mask_hi = mask_hi;
}
static void
get_fixed_ranges(mtrr_type * frs)
static void get_fixed_ranges(mtrr_type *frs)
{
unsigned int *p = (unsigned int *) frs;
unsigned int *p = (unsigned int *)frs;
int i;
k8_check_syscfg_dram_mod_en();
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void)
if (!last_fixed_end)
return;
printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start,
last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
pr_debug(" %05X-%05X %s\n", last_fixed_start,
last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
last_fixed_end = 0;
}
static void __init update_fixed_last(unsigned base, unsigned end,
mtrr_type type)
mtrr_type type)
{
last_fixed_start = base;
last_fixed_end = end;
last_fixed_type = type;
}
static void __init print_fixed(unsigned base, unsigned step,
const mtrr_type *types)
static void __init
print_fixed(unsigned base, unsigned step, const mtrr_type *types)
{
unsigned i;
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void)
unsigned int i;
int high_width;
printk(KERN_DEBUG "MTRR default type: %s\n",
mtrr_attrib_to_str(mtrr_state.def_type));
pr_debug("MTRR default type: %s\n",
mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
mtrr_state.enabled & 1 ? "en" : "dis");
pr_debug("MTRR fixed ranges %sabled:\n",
mtrr_state.enabled & 1 ? "en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
print_fixed(0x80000 + i * 0x20000, 0x04000,
mtrr_state.fixed_ranges + (i + 1) * 8);
for (i = 0; i < 8; ++i)
print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
print_fixed(0xC0000 + i * 0x08000, 0x01000,
mtrr_state.fixed_ranges + (i + 3) * 8);
/* tail */
print_fixed_last();
}
printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
pr_debug("MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
if (size_or_mask & 0xffffffffUL)
high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
else
high_width = ffs(size_or_mask>>32) + 32 - 1;
high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
i,
high_width,
mtrr_state.var_ranges[i].base_hi,
mtrr_state.var_ranges[i].base_lo >> 12,
high_width,
mtrr_state.var_ranges[i].mask_hi,
mtrr_state.var_ranges[i].mask_lo >> 12,
mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
i,
high_width,
mtrr_state.var_ranges[i].base_hi,
mtrr_state.var_ranges[i].base_lo >> 12,
high_width,
mtrr_state.var_ranges[i].mask_hi,
mtrr_state.var_ranges[i].mask_lo >> 12,
mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
else
printk(KERN_DEBUG " %u disabled\n", i);
}
if (mtrr_tom2) {
printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
mtrr_tom2, mtrr_tom2>>20);
pr_debug(" %u disabled\n", i);
}
if (mtrr_tom2)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
/* Grab all of the MTRR state for this CPU into *state */
/* Grab all of the MTRR state for this CPU into *state */
void __init get_mtrr_state(void)
{
unsigned int i;
struct mtrr_var_range *vrs;
unsigned lo, dummy;
unsigned long flags;
unsigned lo, dummy;
unsigned int i;
vrs = mtrr_state.var_ranges;
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void)
if (amd_special_default_mtrr()) {
unsigned low, high;
/* TOP_MEM2 */
rdmsr(MSR_K8_TOP_MEM2, low, high);
mtrr_tom2 = high;
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void)
post_set();
local_irq_restore(flags);
}
/* Some BIOS's are fucked and don't set all MTRRs the same! */
/* Some BIOS's are messed up and don't set all MTRRs the same! */
void __init mtrr_state_warn(void)
{
unsigned long mask = smp_changes_mask;
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void)
if (!mask)
return;
if (mask & MTRR_CHANGE_MASK_FIXED)
printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_VARIABLE)
printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
printk(KERN_INFO "mtrr: corrected configuration.\n");
}
/* Doesn't attempt to pass an error out to MTRR users
because it's quite complicated in some cases and probably not
worth it because the best error handling is to ignore it. */
/*
* Doesn't attempt to pass an error out to MTRR users
* because it's quite complicated in some cases and probably not
* worth it because the best error handling is to ignore it.
*/
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
{
if (wrmsr_safe(msr, a, b) < 0)
if (wrmsr_safe(msr, a, b) < 0) {
printk(KERN_ERR
"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
smp_processor_id(), msr, a, b);
}
}
/**
* set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
* set_fixed_range - checks & updates a fixed-range MTRR if it
* differs from the value it should have
* @msr: MSR address of the MTTR which should be checked and updated
* @changed: pointer which indicates whether the MTRR needed to be changed
* @msrwords: pointer to the MSR values which the MSR should have
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
*
* Returns: The index of the region on success, else negative on error.
*/
int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
int
generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
{
int i, max;
mtrr_type ltype;
unsigned long lbase, lsize;
mtrr_type ltype;
int i, max;
max = num_var_ranges;
if (replace_reg >= 0 && replace_reg < max)
return replace_reg;
for (i = 0; i < max; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
if (lsize == 0)
return i;
}
return -ENOSPC;
}
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
/* Invalid (i.e. free) range */
/* Invalid (i.e. free) range */
*base = 0;
*size = 0;
*type = 0;
@@ -471,27 +482,31 @@ out_put_cpu:
}
/**
* set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
* set_fixed_ranges - checks & updates the fixed-range MTRRs if they
* differ from the saved set
* @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
*/
static int set_fixed_ranges(mtrr_type * frs)
static int set_fixed_ranges(mtrr_type *frs)
{
unsigned long long *saved = (unsigned long long *) frs;
unsigned long long *saved = (unsigned long long *)frs;
bool changed = false;
int block=-1, range;
int block = -1, range;
k8_check_syscfg_dram_mod_en();
while (fixed_range_blocks[++block].ranges)
for (range=0; range < fixed_range_blocks[block].ranges; range++)
set_fixed_range(fixed_range_blocks[block].base_msr + range,
&changed, (unsigned int *) saved++);
while (fixed_range_blocks[++block].ranges) {
for (range = 0; range < fixed_range_blocks[block].ranges; range++)
set_fixed_range(fixed_range_blocks[block].base_msr + range,
&changed, (unsigned int *)saved++);
}
return changed;
}
/* Set the MSR pair relating to a var range. Returns TRUE if
changes are made */
/*
* Set the MSR pair relating to a var range.
* Returns true if changes are made.
*/
static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
{
unsigned int lo, hi;
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
|| (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
changed = true;
}
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi;
*/
static unsigned long set_mtrr_state(void)
{
unsigned int i;
unsigned long change_mask = 0;
unsigned int i;
for (i = 0; i < num_var_ranges; i++)
for (i = 0; i < num_var_ranges; i++) {
if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
change_mask |= MTRR_CHANGE_MASK_VARIABLE;
}
if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
change_mask |= MTRR_CHANGE_MASK_FIXED;
/* Set_mtrr_restore restores the old value of MTRRdefType,
so to set it we fiddle with the saved value */
/*
* Set_mtrr_restore restores the old value of MTRRdefType,
* so to set it we fiddle with the saved value:
*/
if ((deftype_lo & 0xff) != mtrr_state.def_type
|| ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
(mtrr_state.enabled << 10);
change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
}
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void)
}
static unsigned long cr4 = 0;
static unsigned long cr4;
static DEFINE_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts - they
* would run extremely slow and would only increase the pain. The caller must
* ensure that local interrupts are disabled and are reenabled after post_set()
* has been called.
* Since we are disabling the cache don't allow any interrupts,
* they would run extremely slow and would only increase the pain.
*
* The caller must ensure that local interrupts are disabled and
* are reenabled after post_set() has been called.
*/
static void prepare_set(void) __acquires(set_atomicity_lock)
{
unsigned long cr0;
/* Note that this is not ideal, since the cache is only flushed/disabled
for this CPU while the MTRRs are changed, but changing this requires
more invasive changes to the way the kernel boots */
/*
* Note that this is not ideal
* since the cache is only flushed/disabled for this CPU while the
* MTRRs are changed, but changing this requires more invasive
* changes to the way the kernel boots
*/
spin_lock(&set_atomicity_lock);
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if ( cpu_has_pge ) {
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
__flush_tlb();
/* Save MTRR state */
/* Save MTRR state */
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Disable MTRRs, and set the default type to uncached */
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
}
static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
/* Flush TLBs (no need to flush caches - they are disabled) */
__flush_tlb();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
/* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
/* Restore value of CR4 */
if ( cpu_has_pge )
/* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(cr4);
spin_unlock(&set_atomicity_lock);
}
@@ -623,24 +647,27 @@ static void generic_set_all(void)
post_set();
local_irq_restore(flags);
/* Use the atomic bitops to update the global mask */
/* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof mask * 8; ++count) {
if (mask & 0x01)
set_bit(count, &smp_changes_mask);
mask >>= 1;
}
}
/**
* generic_set_mtrr - set variable MTRR register on the local CPU.
*
* @reg: The register to set.
* @base: The base address of the region.
* @size: The size of the region. If this is 0 the region is disabled.
* @type: The type of the region.
*
* Returns nothing.
*/
static void generic_set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
/* [SUMMARY] Set variable MTRR register on the local CPU.
<reg> The register to set.
<base> The base address of the region.
<size> The size of the region. If this is 0 the region is disabled.
<type> The type of the region.
[RETURNS] Nothing.
*/
{
unsigned long flags;
struct mtrr_var_range *vr;
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
prepare_set();
if (size == 0) {
/* The invalid bit is kept in the mask, so we simply clear the
relevant mask register to disable a range. */
/*
* The invalid bit is kept in the mask, so we simply
* clear the relevant mask register to disable a range.
*/
mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
memset(vr, 0, sizeof(struct mtrr_var_range));
} else {
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
local_irq_restore(flags);
}
int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
int generic_validate_add_page(unsigned long base, unsigned long size,
unsigned int type)
{
unsigned long lbase, last;
/* For Intel PPro stepping <= 7, must be 4 MiB aligned
and not touch 0x70000000->0x7003FFFF */
/*
* For Intel PPro stepping <= 7
* must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
*/
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
}
if (!(base + size < 0x70000 || base > 0x7003F) &&
(type == MTRR_TYPE_WRCOMB
|| type == MTRR_TYPE_WRBACK)) {
printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
return -EINVAL;
}
}
/* Check upper bits of base and last are equal and lower bits are 0
for base and 1 for last */
/*
* Check upper bits of base and last are equal and lower bits are 0
* for base and 1 for last
*/
last = base + size - 1;
for (lbase = base; !(lbase & 1) && (last & 1);
lbase = lbase >> 1, last = last >> 1) ;
lbase = lbase >> 1, last = last >> 1)
;
if (lbase != last) {
printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
base, size);
pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
return -EINVAL;
}
return 0;
}
static int generic_have_wrcomb(void)
{
unsigned long config, dummy;
rdmsr(MSR_MTRRcap, config, dummy);
return (config & (1 << 10));
return config & (1 << 10);
}
int positive_have_wrcomb(void)
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void)
return 1;
}
/* generic structure...
/*
* Generic structure...
*/
struct mtrr_ops generic_mtrr_ops = {
.use_intel_if = 1,
.set_all = generic_set_all,
.get = generic_get_mtrr,
.get_free_region = generic_get_free_region,
.set = generic_set_mtrr,
.validate_add_page = generic_validate_add_page,
.have_wrcomb = generic_have_wrcomb,
.use_intel_if = 1,
.set_all = generic_set_all,
.get = generic_get_mtrr,
.get_free_region = generic_get_free_region,
.set = generic_set_mtrr,
.validate_add_page = generic_validate_add_page,
.have_wrcomb = generic_have_wrcomb,
};

View File

@@ -1,27 +1,28 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/capability.h>
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/uaccess.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/init.h>
#define LINE_SIZE 80
#include <asm/mtrr.h>
#include "mtrr.h"
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
static const char *const mtrr_strings[MTRR_NUM_TYPES] =
{
"uncachable", /* 0 */
"write-combining", /* 1 */
"?", /* 2 */
"?", /* 3 */
"write-through", /* 4 */
"write-protect", /* 5 */
"write-back", /* 6 */
"uncachable", /* 0 */
"write-combining", /* 1 */
"?", /* 2 */
"?", /* 3 */
"write-through", /* 4 */
"write-protect", /* 5 */
"write-back", /* 6 */
};
const char *mtrr_attrib_to_str(int x)
@@ -35,8 +36,8 @@ static int
mtrr_file_add(unsigned long base, unsigned long size,
unsigned int type, bool increment, struct file *file, int page)
{
unsigned int *fcount = FILE_FCOUNT(file);
int reg, max;
unsigned int *fcount = FILE_FCOUNT(file);
max = num_var_ranges;
if (fcount == NULL) {
@@ -61,8 +62,8 @@ static int
mtrr_file_del(unsigned long base, unsigned long size,
struct file *file, int page)
{
int reg;
unsigned int *fcount = FILE_FCOUNT(file);
int reg;
if (!page) {
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size,
return reg;
}
/* RED-PEN: seq_file can seek now. this is ignored. */
/*
* seq_file can seek but we ignore it.
*
* Format of control line:
* "base=%Lx size=%Lx type=%s" or "disable=%d"
*/
static ssize_t
mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
/* Format of control line:
"base=%Lx size=%Lx type=%s" OR:
"disable=%d"
*/
{
int i, err;
unsigned long reg;
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EPERM;
if (!len)
return -EINVAL;
memset(line, 0, LINE_SIZE);
if (len > LINE_SIZE)
len = LINE_SIZE;
if (copy_from_user(line, buf, len - 1))
return -EFAULT;
linelen = strlen(line);
ptr = line + linelen - 1;
if (linelen && *ptr == '\n')
*ptr = '\0';
if (!strncmp(line, "disable=", 8)) {
reg = simple_strtoul(line + 8, &ptr, 0);
err = mtrr_del_page(reg, 0, 0);
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return err;
return len;
}
if (strncmp(line, "base=", 5))
return -EINVAL;
base = simple_strtoull(line + 5, &ptr, 0);
for (; isspace(*ptr); ++ptr) ;
for (; isspace(*ptr); ++ptr)
;
if (strncmp(ptr, "size=", 5))
return -EINVAL;
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
for (; isspace(*ptr); ++ptr) ;
for (; isspace(*ptr); ++ptr)
;
if (strncmp(ptr, "type=", 5))
return -EINVAL;
ptr += 5;
for (; isspace(*ptr); ++ptr) ;
for (; isspace(*ptr); ++ptr)
;
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
continue;
base >>= PAGE_SHIFT;
size >>= PAGE_SHIFT;
err =
mtrr_add_page((unsigned long) base, (unsigned long) size, i,
true);
err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
if (err < 0)
return err;
return len;
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
case MTRRIOC32_SET_PAGE_ENTRY:
case MTRRIOC32_DEL_PAGE_ENTRY:
case MTRRIOC32_KILL_PAGE_ENTRY: {
struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg;
struct mtrr_sentry32 __user *s32;
s32 = (struct mtrr_sentry32 __user *)__arg;
err = get_user(sentry.base, &s32->base);
err |= get_user(sentry.size, &s32->size);
err |= get_user(sentry.type, &s32->type);
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
}
case MTRRIOC32_GET_ENTRY:
case MTRRIOC32_GET_PAGE_ENTRY: {
struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
struct mtrr_gentry32 __user *g32;
g32 = (struct mtrr_gentry32 __user *)__arg;
err = get_user(gentry.regnum, &g32->regnum);
err |= get_user(gentry.base, &g32->base);
err |= get_user(gentry.size, &g32->size);
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
if (err)
return err;
switch(cmd) {
switch (cmd) {
case MTRRIOC_GET_ENTRY:
case MTRRIOC_GET_PAGE_ENTRY:
if (copy_to_user(arg, &gentry, sizeof gentry))
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#ifdef CONFIG_COMPAT
case MTRRIOC32_GET_ENTRY:
case MTRRIOC32_GET_PAGE_ENTRY: {
struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
struct mtrr_gentry32 __user *g32;
g32 = (struct mtrr_gentry32 __user *)__arg;
err = put_user(gentry.base, &g32->base);
err |= put_user(gentry.size, &g32->size);
err |= put_user(gentry.regnum, &g32->regnum);
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
return err;
}
static int
mtrr_close(struct inode *ino, struct file *file)
static int mtrr_close(struct inode *ino, struct file *file)
{
int i, max;
unsigned int *fcount = FILE_FCOUNT(file);
int i, max;
if (fcount != NULL) {
max = num_var_ranges;
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset);
static int mtrr_open(struct inode *inode, struct file *file)
{
if (!mtrr_if)
if (!mtrr_if)
return -EIO;
if (!mtrr_if->get)
return -ENXIO;
if (!mtrr_if->get)
return -ENXIO;
return single_open(file, mtrr_seq_show, NULL);
}
static const struct file_operations mtrr_fops = {
.owner = THIS_MODULE,
.open = mtrr_open,
.read = seq_read,
.llseek = seq_lseek,
.write = mtrr_write,
.unlocked_ioctl = mtrr_ioctl,
.compat_ioctl = mtrr_ioctl,
.release = mtrr_close,
.owner = THIS_MODULE,
.open = mtrr_open,
.read = seq_read,
.llseek = seq_lseek,
.write = mtrr_write,
.unlocked_ioctl = mtrr_ioctl,
.compat_ioctl = mtrr_ioctl,
.release = mtrr_close,
};
static int mtrr_seq_show(struct seq_file *seq, void *offset)
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
max = num_var_ranges;
for (i = 0; i < max; i++) {
mtrr_if->get(i, &base, &size, &type);
if (size == 0)
if (size == 0) {
mtrr_usage_table[i] = 0;
else {
if (size < (0x100000 >> PAGE_SHIFT)) {
/* less than 1MB */
factor = 'K';
size <<= PAGE_SHIFT - 10;
} else {
factor = 'M';
size >>= 20 - PAGE_SHIFT;
}
/* RED-PEN: base can be > 32bit */
len += seq_printf(seq,
"reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
mtrr_usage_table[i], mtrr_attrib_to_str(type));
continue;
}
if (size < (0x100000 >> PAGE_SHIFT)) {
/* less than 1MB */
factor = 'K';
size <<= PAGE_SHIFT - 10;
} else {
factor = 'M';
size >>= 20 - PAGE_SHIFT;
}
/* Base can be > 32bit */
len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
"(%5luMB), size=%5lu%cB, count=%d: %s\n",
i, base, base >> (20 - PAGE_SHIFT), size,
factor, mtrr_usage_table[i],
mtrr_attrib_to_str(type));
}
return 0;
}
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void)
proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
return 0;
}
arch_initcall(mtrr_if_init);
#endif /* CONFIG_PROC_FS */

View File

@@ -25,43 +25,48 @@
Operating System Writer's Guide" (Intel document number 242692),
section 11.11.7
This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
on 6-7 March 2002.
Source: Intel Architecture Software Developers Manual, Volume 3:
This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
on 6-7 March 2002.
Source: Intel Architecture Software Developers Manual, Volume 3:
System Programming Guide; Section 9.11. (1997 edition - PPro).
*/
#define DEBUG
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
#include <linux/kvm_para.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/sort.h>
#include <linux/cpu.h>
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/sort.h>
#include <asm/processor.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/kvm_para.h>
#include "mtrr.h"
u32 num_var_ranges = 0;
u32 num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
struct mtrr_ops * mtrr_if = NULL;
struct mtrr_ops *mtrr_if;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void set_mtrr_ops(struct mtrr_ops * ops)
void set_mtrr_ops(struct mtrr_ops *ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
mtrr_ops[ops->vendor] = ops;
@@ -72,30 +77,36 @@ static int have_wrcomb(void)
{
struct pci_dev *dev;
u8 rev;
if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
/* ServerWorks LE chipsets < rev 6 have problems with write-combining
Don't allow it and leave room for other chipsets to be tagged */
dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
if (dev != NULL) {
/*
* ServerWorks LE chipsets < rev 6 have problems with
* write-combining. Don't allow it and leave room for other
* chipsets to be tagged
*/
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
if (rev <= 5) {
printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
}
/* Intel 450NX errata # 23. Non ascending cacheline evictions to
write combining memory may resulting in data corruption */
/*
* Intel 450NX errata # 23. Non ascending cacheline evictions to
* write combining memory may resulting in data corruption
*/
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
pci_dev_put(dev);
}
return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
}
return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
}
/* This function returns the number of variable MTRRs */
@@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void)
{
unsigned long config = 0, dummy;
if (use_intel()) {
if (use_intel())
rdmsr(MSR_MTRRcap, config, dummy);
} else if (is_cpu(AMD))
else if (is_cpu(AMD))
config = 2;
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
config = 8;
num_var_ranges = config & 0xff;
}
@@ -130,10 +142,12 @@ struct set_mtrr_data {
mtrr_type smp_type;
};
/**
* ipi_handler - Synchronisation handler. Executed by "other" CPUs.
*
* Returns nothing.
*/
static void ipi_handler(void *info)
/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
[RETURNS] Nothing.
*/
{
#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
@@ -142,18 +156,19 @@ static void ipi_handler(void *info)
local_irq_save(flags);
atomic_dec(&data->count);
while(!atomic_read(&data->gate))
while (!atomic_read(&data->gate))
cpu_relax();
/* The master has cleared me to execute */
if (data->smp_reg != ~0U)
mtrr_if->set(data->smp_reg, data->smp_base,
if (data->smp_reg != ~0U) {
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
else
} else {
mtrr_if->set_all();
}
atomic_dec(&data->count);
while(atomic_read(&data->gate))
while (atomic_read(&data->gate))
cpu_relax();
atomic_dec(&data->count);
@@ -161,7 +176,8 @@ static void ipi_handler(void *info)
#endif
}
static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
static inline int types_compatible(mtrr_type type1, mtrr_type type2)
{
return type1 == MTRR_TYPE_UNCACHABLE ||
type2 == MTRR_TYPE_UNCACHABLE ||
(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
@@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
* @type: mtrr type
*
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
*
*
* 1. Send IPI to do the following:
* 2. Disable Interrupts
* 3. Wait for all procs to do so
* 3. Wait for all procs to do so
* 4. Enter no-fill cache mode
* 5. Flush caches
* 6. Clear PGE bit
@@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
* 10. Enable all range registers
* 11. Flush all TLBs and caches again
* 12. Enter normal cache mode and reenable caching
* 13. Set PGE
* 13. Set PGE
* 14. Wait for buddies to catch up
* 15. Enable interrupts.
*
*
* What does that mean for us? Well, first we set data.count to the number
* of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
* until it hits 0 and proceed. We set the data.gate flag and reset data.count.
* Meanwhile, they are waiting for that flag to be set. Once it's set, each
* CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
* differently, so we call mtrr_if->set() callback and let them take care of it.
* When they're done, they again decrement data->count and wait for data.gate to
* be reset.
* When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
* Meanwhile, they are waiting for that flag to be set. Once it's set, each
* CPU goes through the transition of updating MTRRs.
* The CPU vendors may each do it differently,
* so we call mtrr_if->set() callback and let them take care of it.
* When they're done, they again decrement data->count and wait for data.gate
* to be reset.
* When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
* Everyone then enables interrupts and we all continue on.
*
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
* becomes nops.
*/
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
static void
set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
{
struct set_mtrr_data data;
unsigned long flags;
@@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base,
data.smp_size = size;
data.smp_type = type;
atomic_set(&data.count, num_booting_cpus() - 1);
/* make sure data.count is visible before unleashing other CPUs */
smp_wmb();
atomic_set(&data.gate,0);
/* Start the ball rolling on other CPUs */
/* Make sure data.count is visible before unleashing other CPUs */
smp_wmb();
atomic_set(&data.gate, 0);
/* Start the ball rolling on other CPUs */
if (smp_call_function(ipi_handler, &data, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
local_irq_save(flags);
while(atomic_read(&data.count))
while (atomic_read(&data.count))
cpu_relax();
/* ok, reset count and toggle gate */
/* Ok, reset count and toggle gate */
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
atomic_set(&data.gate,1);
atomic_set(&data.gate, 1);
/* do our MTRR business */
/* Do our MTRR business */
/* HACK!
/*
* HACK!
* We use this same function to initialize the mtrrs on boot.
* The state of the boot cpu's mtrrs has been saved, and we want
* to replicate across all the APs.
* to replicate across all the APs.
* If we're doing that @reg is set to something special...
*/
if (reg != ~0U)
mtrr_if->set(reg,base,size,type);
if (reg != ~0U)
mtrr_if->set(reg, base, size, type);
/* wait for the others */
while(atomic_read(&data.count))
/* Wait for the others */
while (atomic_read(&data.count))
cpu_relax();
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
atomic_set(&data.gate,0);
atomic_set(&data.gate, 0);
/*
* Wait here for everyone to have seen the gate change
* So we're the last ones to touch 'data'
*/
while(atomic_read(&data.count))
while (atomic_read(&data.count))
cpu_relax();
local_irq_restore(flags);
}
/**
* mtrr_add_page - Add a memory type region
* @base: Physical base address of region in pages (in units of 4 kB!)
* @size: Physical size of region in pages (4 kB)
* @type: Type of MTRR desired
* @increment: If this is true do usage counting on the region
* mtrr_add_page - Add a memory type region
* @base: Physical base address of region in pages (in units of 4 kB!)
* @size: Physical size of region in pages (4 kB)
* @type: Type of MTRR desired
* @increment: If this is true do usage counting on the region
*
* Memory type region registers control the caching on newer Intel and
* non Intel processors. This function allows drivers to request an
* MTRR is added. The details and hardware specifics of each processor's
* implementation are hidden from the caller, but nevertheless the
* caller should expect to need to provide a power of two size on an
* equivalent power of two boundary.
* Memory type region registers control the caching on newer Intel and
* non Intel processors. This function allows drivers to request an
* MTRR is added. The details and hardware specifics of each processor's
* implementation are hidden from the caller, but nevertheless the
* caller should expect to need to provide a power of two size on an
* equivalent power of two boundary.
*
* If the region cannot be added either because all regions are in use
* or the CPU cannot support it a negative value is returned. On success
* the register number for this entry is returned, but should be treated
* as a cookie only.
* If the region cannot be added either because all regions are in use
* or the CPU cannot support it a negative value is returned. On success
* the register number for this entry is returned, but should be treated
* as a cookie only.
*
* On a multiprocessor machine the changes are made to all processors.
* This is required on x86 by the Intel processors.
* On a multiprocessor machine the changes are made to all processors.
* This is required on x86 by the Intel processors.
*
* The available types are
* The available types are
*
* %MTRR_TYPE_UNCACHABLE - No caching
* %MTRR_TYPE_UNCACHABLE - No caching
*
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
*
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
*
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
*
* BUGS: Needs a quiet flag for the cases where drivers do not mind
* failures and do not wish system log messages to be sent.
* BUGS: Needs a quiet flag for the cases where drivers do not mind
* failures and do not wish system log messages to be sent.
*/
int mtrr_add_page(unsigned long base, unsigned long size,
int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment)
{
unsigned long lbase, lsize;
int i, replace, error;
mtrr_type ltype;
unsigned long lbase, lsize;
if (!mtrr_if)
return -ENXIO;
if ((error = mtrr_if->validate_add_page(base,size,type)))
error = mtrr_if->validate_add_page(base, size, type);
if (error)
return error;
if (type >= MTRR_NUM_TYPES) {
printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
pr_warning("mtrr: type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
printk(KERN_WARNING
"mtrr: your processor doesn't support write-combining\n");
pr_warning("mtrr: your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
printk(KERN_WARNING "mtrr: zero sized request\n");
pr_warning("mtrr: zero sized request\n");
return -EINVAL;
}
if (base & size_or_mask || size & size_or_mask) {
printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* No CPU hotplug when we change MTRR entries */
get_online_cpus();
/* Search for existing MTRR */
/* Search for existing MTRR */
mutex_lock(&mtrr_mutex);
for (i = 0; i < num_var_ranges; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
if (!lsize || base > lbase + lsize - 1 ||
base + size - 1 < lbase)
continue;
/* At this point we know there is some kind of overlap/enclosure */
/*
* At this point we know there is some kind of
* overlap/enclosure
*/
if (base < lbase || base + size - 1 > lbase + lsize - 1) {
if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
if (base <= lbase &&
base + size - 1 >= lbase + lsize - 1) {
/* New region encloses an existing region */
if (type == ltype) {
replace = replace == -1 ? i : -2;
continue;
}
else if (types_compatible(type, ltype))
} else if (types_compatible(type, ltype))
continue;
}
printk(KERN_WARNING
"mtrr: 0x%lx000,0x%lx000 overlaps existing"
" 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
" 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
}
/* New region is enclosed by an existing region */
/* New region is enclosed by an existing region */
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
}
if (increment)
@@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
goto out;
}
/* Search for an empty MTRR */
/* Search for an empty MTRR */
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
set_mtrr(i, base, size, type);
@@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
mtrr_usage_table[replace] = 0;
}
}
} else
printk(KERN_INFO "mtrr: no more MTRRs available\n");
} else {
pr_info("mtrr: no more MTRRs available\n");
}
error = i;
out:
mutex_unlock(&mtrr_mutex);
@@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
printk(KERN_WARNING
"mtrr: size and base must be multiples of 4 kiB\n");
printk(KERN_DEBUG
"mtrr: size: 0x%lx base: 0x%lx\n", size, base);
pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size)
}
/**
* mtrr_add - Add a memory type region
* @base: Physical base address of region
* @size: Physical size of region
* @type: Type of MTRR desired
* @increment: If this is true do usage counting on the region
* mtrr_add - Add a memory type region
* @base: Physical base address of region
* @size: Physical size of region
* @type: Type of MTRR desired
* @increment: If this is true do usage counting on the region
*
* Memory type region registers control the caching on newer Intel and
* non Intel processors. This function allows drivers to request an
* MTRR is added. The details and hardware specifics of each processor's
* implementation are hidden from the caller, but nevertheless the
* caller should expect to need to provide a power of two size on an
* equivalent power of two boundary.
* Memory type region registers control the caching on newer Intel and
* non Intel processors. This function allows drivers to request an
* MTRR is added. The details and hardware specifics of each processor's
* implementation are hidden from the caller, but nevertheless the
* caller should expect to need to provide a power of two size on an
* equivalent power of two boundary.
*
* If the region cannot be added either because all regions are in use
* or the CPU cannot support it a negative value is returned. On success
* the register number for this entry is returned, but should be treated
* as a cookie only.
* If the region cannot be added either because all regions are in use
* or the CPU cannot support it a negative value is returned. On success
* the register number for this entry is returned, but should be treated
* as a cookie only.
*
* On a multiprocessor machine the changes are made to all processors.
* This is required on x86 by the Intel processors.
* On a multiprocessor machine the changes are made to all processors.
* This is required on x86 by the Intel processors.
*
* The available types are
* The available types are
*
* %MTRR_TYPE_UNCACHABLE - No caching
* %MTRR_TYPE_UNCACHABLE - No caching
*
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
* %MTRR_TYPE_WRBACK - Write data back in bursts whenever
*
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
* %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
*
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
* %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
*
* BUGS: Needs a quiet flag for the cases where drivers do not mind
* failures and do not wish system log messages to be sent.
* BUGS: Needs a quiet flag for the cases where drivers do not mind
* failures and do not wish system log messages to be sent.
*/
int
mtrr_add(unsigned long base, unsigned long size, unsigned int type,
bool increment)
int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
bool increment)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
increment);
}
EXPORT_SYMBOL(mtrr_add);
/**
* mtrr_del_page - delete a memory type region
* @reg: Register returned by mtrr_add
* @base: Physical base address
* @size: Size of region
* mtrr_del_page - delete a memory type region
* @reg: Register returned by mtrr_add
* @base: Physical base address
* @size: Size of region
*
* If register is supplied then base and size are ignored. This is
* how drivers should call it.
* If register is supplied then base and size are ignored. This is
* how drivers should call it.
*
* Releases an MTRR region. If the usage count drops to zero the
* register is freed and the region returns to default state.
* On success the register is returned, on failure a negative error
* code.
* Releases an MTRR region. If the usage count drops to zero the
* register is freed and the region returns to default state.
* On success the register is returned, on failure a negative error
* code.
*/
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
{
int i, max;
@@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
size);
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
base, size);
goto out;
}
}
if (reg >= max) {
printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
pr_warning("mtrr: register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
pr_warning("mtrr: MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
pr_warning("mtrr: reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
put_online_cpus();
return error;
}
/**
* mtrr_del - delete a memory type region
* @reg: Register returned by mtrr_add
* @base: Physical base address
* @size: Size of region
*
* If register is supplied then base and size are ignored. This is
* how drivers should call it.
*
* Releases an MTRR region. If the usage count drops to zero the
* register is freed and the region returns to default state.
* On success the register is returned, on failure a negative error
* code.
*/
int
mtrr_del(int reg, unsigned long base, unsigned long size)
/**
* mtrr_del - delete a memory type region
* @reg: Register returned by mtrr_add
* @base: Physical base address
* @size: Size of region
*
* If register is supplied then base and size are ignored. This is
* how drivers should call it.
*
* Releases an MTRR region. If the usage count drops to zero the
* register is freed and the region returns to default state.
* On success the register is returned, on failure a negative error
* code.
*/
int mtrr_del(int reg, unsigned long base, unsigned long size)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
}
EXPORT_SYMBOL(mtrr_add);
EXPORT_SYMBOL(mtrr_del);
/* HACK ALERT!
/*
* HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
@@ -576,29 +593,28 @@ struct mtrr_value {
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
{
int i;
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i,
&mtrr_value[i].lbase,
&mtrr_value[i].lsize,
&mtrr_value[i].ltype);
mtrr_if->get(i, &mtrr_value[i].lbase,
&mtrr_value[i].lsize,
&mtrr_value[i].ltype);
}
return 0;
}
static int mtrr_restore(struct sys_device * sysdev)
static int mtrr_restore(struct sys_device *sysdev)
{
int i;
for (i = 0; i < num_var_ranges; i++) {
if (mtrr_value[i].lsize)
set_mtrr(i,
mtrr_value[i].lbase,
mtrr_value[i].lsize,
mtrr_value[i].ltype);
if (mtrr_value[i].lsize) {
set_mtrr(i, mtrr_value[i].lbase,
mtrr_value[i].lsize,
mtrr_value[i].ltype);
}
}
return 0;
}
@@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup;
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
* This needs to be called early; before any of the other CPUs are
* This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
*
*
*/
void __init mtrr_bp_init(void)
{
u32 phys_addr;
init_ifs();
phys_addr = 32;
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = 0xff000000; /* 36 bits */
size_or_mask = 0xff000000; /* 36 bits */
size_and_mask = 0x00f00000;
phys_addr = 36;
/* This is an AMD specific MSR, but we assume(hope?) that
Intel will implement it to when they extend the address
bus of the Xeon. */
/*
* This is an AMD specific MSR, but we assume(hope?) that
* Intel will implement it to when they extend the address
* bus of the Xeon.
*/
if (cpuid_eax(0x80000000) >= 0x80000008) {
phys_addr = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
@@ -649,9 +668,11 @@ void __init mtrr_bp_init(void)
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
/* VIA C* family have Intel style MTRRs, but
don't support PAE */
size_or_mask = 0xfff00000; /* 32 bits */
/*
* VIA C* family have Intel style MTRRs,
* but don't support PAE
*/
size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
phys_addr = 32;
}
@@ -694,7 +715,6 @@ void __init mtrr_bp_init(void)
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
}
}
}
}
@@ -706,12 +726,17 @@ void mtrr_ap_init(void)
if (!mtrr_if || !use_intel())
return;
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
* but this routine will be called in cpu boot time, holding the lock
* breaks it. This routine is called in two cases: 1.very earily time
* of software resume, when there absolutely isn't mtrr entry changes;
* 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
* prevent mtrr entry changes
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
* holding the lock breaks it.
*
* This routine is called in two cases:
*
* 1. very earily time of software resume, when there absolutely
* isn't mtrr entry changes;
*
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent mtrr entry changes
*/
local_irq_save(flags);
@@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
return 0;
if (use_intel()) {
if (!changed_by_mtrr_cleanup)
mtrr_state_warn();
} else {
/* The CPUs haven't MTRR and seem to not support SMP. They have
* specific drivers, we use a tricky method to support
* suspend/resume for them.
* TBD: is there any system with such CPU which supports
* suspend/resume? if no, we should remove the code.
*/
sysdev_driver_register(&cpu_sysdev_class,
&mtrr_sysdev_driver);
return 0;
}
/*
* The CPU has no MTRR and seems to not support SMP. They have
* specific drivers, we use a tricky method to support
* suspend/resume for them.
*
* TBD: is there any system with such CPU which supports
* suspend/resume? If no, we should remove the code.
*/
sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
return 0;
}
subsys_initcall(mtrr_init_finialize);

View File

@@ -1,5 +1,5 @@
/*
* local mtrr defines.
* local MTRR defines.
*/
#include <linux/types.h>
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
// void (*init)(void);
void (*set)(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void (*set_all)(void);
void (*get)(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type);
unsigned long *size, mtrr_type *type);
int (*get_free_region)(unsigned long base, unsigned long size,
int replace_reg);
int (*validate_add_page)(unsigned long base, unsigned long size,
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void);
/* library functions for processor-specific routines */
struct set_mtrr_context {
unsigned long flags;
unsigned long cr4val;
u32 deftype_lo;
u32 deftype_hi;
u32 ccr3;
unsigned long flags;
unsigned long cr4val;
u32 deftype_lo;
u32 deftype_hi;
u32 ccr3;
};
void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);
extern void set_mtrr_ops(struct mtrr_ops * ops);
extern void set_mtrr_ops(struct mtrr_ops *ops);
extern u64 size_or_mask, size_and_mask;
extern struct mtrr_ops * mtrr_if;
extern struct mtrr_ops *mtrr_if;
#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)

View File

@@ -1,24 +1,25 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include "mtrr.h"
/* Put the processor into a state where MTRRs can be safely set */
/* Put the processor into a state where MTRRs can be safely set */
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
{
unsigned int cr0;
/* Disable interrupts locally */
/* Disable interrupts locally */
local_irq_save(ctxt->flags);
if (use_intel() || is_cpu(CYRIX)) {
/* Save value of CR4 and clear Page Global Enable (bit 7) */
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
ctxt->cr4val = read_cr4();
write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
write_cr0(cr0);
wbinvd();
if (use_intel())
/* Save MTRR state */
if (use_intel()) {
/* Save MTRR state */
rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
else
/* Cyrix ARRs - everything else were excluded at the top */
} else {
/*
* Cyrix ARRs -
* everything else were excluded at the top
*/
ctxt->ccr3 = getCx86(CX86_CCR3);
}
}
}
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
{
if (use_intel())
/* Disable MTRRs, and set the default type to uncached */
if (use_intel()) {
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
ctxt->deftype_hi);
else if (is_cpu(CYRIX))
/* Cyrix ARRs - everything else were excluded at the top */
setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
} else {
if (is_cpu(CYRIX)) {
/* Cyrix ARRs - everything else were excluded at the top */
setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
}
}
}
/* Restore the processor after a set_mtrr_prepare */
/* Restore the processor after a set_mtrr_prepare */
void set_mtrr_done(struct set_mtrr_context *ctxt)
{
if (use_intel() || is_cpu(CYRIX)) {
/* Flush caches and TLBs */
/* Flush caches and TLBs */
wbinvd();
/* Restore MTRRdefType */
if (use_intel())
/* Restore MTRRdefType */
if (use_intel()) {
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
else
/* Cyrix ARRs - everything else was excluded at the top */
mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
ctxt->deftype_hi);
} else {
/*
* Cyrix ARRs -
* everything else was excluded at the top
*/
setCx86(CX86_CCR3, ctxt->ccr3);
}
/* Enable caches */
/* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
/* Restore value of CR4 */
/* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(ctxt->cr4val);
}
/* Re-enable interrupts locally (if enabled previously) */
/* Re-enable interrupts locally (if enabled previously) */
local_irq_restore(ctxt->flags);
}

View File

@@ -6,6 +6,7 @@
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
*
* For licencing details see kernel-base/COPYING
*/
@@ -20,6 +21,7 @@
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -27,12 +29,52 @@
static u64 perf_counter_mask __read_mostly;
/* The maximal number of PEBS counters: */
#define MAX_PEBS_COUNTERS 4
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
/* The size of a per-cpu BTS buffer in bytes: */
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
/* The BTS overflow threshold in bytes from the end of the buffer: */
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
/*
* Bits in the debugctlmsr controlling branch tracing.
*/
#define X86_DEBUGCTL_TR (1 << 6)
#define X86_DEBUGCTL_BTS (1 << 7)
#define X86_DEBUGCTL_BTINT (1 << 8)
#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
*/
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_counter_reset[MAX_PEBS_COUNTERS];
};
struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
struct debug_store *ds;
};
/*
@@ -58,6 +100,8 @@ struct x86_pmu {
int apic;
u64 max_period;
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter,
u64 prev_raw_count, new_raw_count;
s64 delta;
if (idx == X86_PMC_IDX_FIXED_BTS)
return 0;
/*
* Careful: an NMI might modify the previous counter value.
*
@@ -666,10 +713,110 @@ static void release_pmc_hardware(void)
#endif
}
static inline bool bts_available(void)
{
return x86_pmu.enable_bts != NULL;
}
static inline void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
if (!ds)
return;
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
(u32)((u64)(unsigned long)ds),
(u32)((u64)(unsigned long)ds >> 32));
}
static inline void fini_debug_store_on_cpu(int cpu)
{
if (!per_cpu(cpu_hw_counters, cpu).ds)
return;
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
static void release_bts_hardware(void)
{
int cpu;
if (!bts_available())
return;
get_online_cpus();
for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
if (!ds)
continue;
per_cpu(cpu_hw_counters, cpu).ds = NULL;
kfree((void *)(unsigned long)ds->bts_buffer_base);
kfree(ds);
}
put_online_cpus();
}
static int reserve_bts_hardware(void)
{
int cpu, err = 0;
if (!bts_available())
return 0;
get_online_cpus();
for_each_possible_cpu(cpu) {
struct debug_store *ds;
void *buffer;
err = -ENOMEM;
buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer))
break;
ds = kzalloc(sizeof(*ds), GFP_KERNEL);
if (unlikely(!ds)) {
kfree(buffer);
break;
}
ds->bts_buffer_base = (u64)(unsigned long)buffer;
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum =
ds->bts_buffer_base + BTS_BUFFER_SIZE;
ds->bts_interrupt_threshold =
ds->bts_absolute_maximum - BTS_OVFL_TH;
per_cpu(cpu_hw_counters, cpu).ds = ds;
err = 0;
}
if (err)
release_bts_hardware();
else {
for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu);
}
put_online_cpus();
return err;
}
static void hw_perf_counter_destroy(struct perf_counter *counter)
{
if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_bts_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
return 0;
}
static void intel_pmu_enable_bts(u64 config)
{
unsigned long debugctlmsr;
debugctlmsr = get_debugctlmsr();
debugctlmsr |= X86_DEBUGCTL_TR;
debugctlmsr |= X86_DEBUGCTL_BTS;
debugctlmsr |= X86_DEBUGCTL_BTINT;
if (!(config & ARCH_PERFMON_EVENTSEL_OS))
debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
if (!(config & ARCH_PERFMON_EVENTSEL_USR))
debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
update_debugctlmsr(debugctlmsr);
}
static void intel_pmu_disable_bts(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
unsigned long debugctlmsr;
if (!cpuc->ds)
return;
debugctlmsr = get_debugctlmsr();
debugctlmsr &=
~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
update_debugctlmsr(debugctlmsr);
}
/*
* Setup the hardware configuration for a given attr_type
*/
@@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
err = 0;
if (!atomic_inc_not_zero(&active_counters)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
err = -EBUSY;
else
if (atomic_read(&active_counters) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
err = reserve_bts_hardware();
}
if (!err)
atomic_inc(&active_counters);
mutex_unlock(&pmc_reserve_mutex);
}
@@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
if (config == -1LL)
return -EINVAL;
/*
* Branch tracing:
*/
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */
if (!bts_available())
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
return -EOPNOTSUPP;
}
hwc->config |= config;
return 0;
@@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void)
static void intel_pmu_disable_all(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
if (!cpuc->enabled)
return;
cpuc->enabled = 0;
barrier();
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
}
static void amd_pmu_disable_all(void)
@@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void)
static void intel_pmu_enable_all(void)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
if (cpuc->enabled)
return;
cpuc->enabled = 1;
barrier();
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_counter *counter =
cpuc->counters[X86_PMC_IDX_FIXED_BTS];
if (WARN_ON_ONCE(!counter))
return;
intel_pmu_enable_bts(counter->hw.config);
}
}
static void amd_pmu_enable_all(void)
@@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
static inline void
intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
return;
}
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc, idx);
return;
@@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter,
s64 period = hwc->sample_period;
int err, ret = 0;
if (idx == X86_PMC_IDX_FIXED_BTS)
return 0;
/*
* If we are way outside a reasoable range then just skip forward:
*/
@@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
if (!__get_cpu_var(cpu_hw_counters).enabled)
return;
intel_pmu_enable_bts(hwc->config);
return;
}
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc, idx);
return;
@@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
{
unsigned int event;
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
if (unlikely((event ==
x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
(hwc->sample_period == 1)))
return X86_PMC_IDX_FIXED_BTS;
if (!x86_pmu.num_counters_fixed)
return -1;
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter)
int idx;
idx = fixed_mode_idx(counter, hwc);
if (idx >= 0) {
if (idx == X86_PMC_IDX_FIXED_BTS) {
/* BTS is already occupied. */
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
hwc->config_base = 0;
hwc->counter_base = 0;
hwc->idx = idx;
} else if (idx >= 0) {
/*
* Try to get the fixed counter, if that is already taken
* then try to get a generic counter:
@@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
struct perf_sample_data *data)
{
struct debug_store *ds = cpuc->ds;
struct bts_record {
u64 from;
u64 to;
u64 flags;
};
struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
unsigned long orig_ip = data->regs->ip;
struct bts_record *at, *top;
if (!counter)
return;
if (!ds)
return;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;
ds->bts_index = ds->bts_buffer_base;
for (; at < top; at++) {
data->regs->ip = at->from;
data->addr = at->to;
perf_counter_output(counter, 1, data);
}
data->regs->ip = orig_ip;
data->addr = 0;
/* There's new data available. */
counter->pending_kill = POLL_IN;
}
static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter)
* that we are disabling:
*/
x86_perf_counter_update(counter, hwc, idx);
/* Drain the remaining BTS records. */
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
struct perf_sample_data data;
struct pt_regs regs;
data.regs = &regs;
intel_pmu_drain_bts_buffer(cpuc, &data);
}
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
@@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter)
static void intel_pmu_reset(void)
{
struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
unsigned long flags;
int idx;
@@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void)
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
if (ds)
ds->bts_index = ds->bts_buffer_base;
local_irq_restore(flags);
}
@@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_counters);
perf_disable();
intel_pmu_drain_bts_buffer(cpuc, &data);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
@@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = {
* the generic counter period:
*/
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
};
static struct x86_pmu amd_pmu = {
@@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
void hw_perf_counter_setup_online(int cpu)
{
init_debug_store_on_cpu(cpu);
}

View File

@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
return (msr - MSR_K7_PERFCTR0);
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return (msr - MSR_ARCH_PERFMON_PERFCTR0);
return msr - MSR_ARCH_PERFMON_PERFCTR0;
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_PERFCTR0);
return msr - MSR_P6_PERFCTR0;
case 15:
return (msr - MSR_P4_BPU_PERFCTR0);
return msr - MSR_P4_BPU_PERFCTR0;
}
}
return 0;
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
return (msr - MSR_K7_EVNTSEL0);
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_EVNTSEL0);
return msr - MSR_P6_EVNTSEL0;
case 15:
return (msr - MSR_P4_BSU_ESCR0);
return msr - MSR_P4_BSU_ESCR0;
}
}
return 0;
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
{
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
return (!test_bit(counter, perfctr_nmi_owner));
return !test_bit(counter, perfctr_nmi_owner);
}
/* checks the an msr for availability */
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
counter = nmi_perfctr_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
return (!test_bit(counter, perfctr_nmi_owner));
return !test_bit(counter, perfctr_nmi_owner);
}
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
*/
counter_val = (u64)cpu_khz * 1000;
do_div(counter_val, retval);
if (counter_val > 0x7fffffffULL) {
if (counter_val > 0x7fffffffULL) {
u64 count = (u64)cpu_khz * 1000;
do_div(count, 0x7fffffffUL);
retval = count + 1;
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
if(descr)
if (descr)
pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsrl(perfctr_msr, 0 - count);
}
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
if(descr)
if (descr)
pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsr(perfctr_msr, (u32)(-count), 0);
}
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
/* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
/* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
apic_write(APIC_LVTPC, APIC_DM_NMI);
/* P6/ARCH_PERFMON has 32 bit counter write */
write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
}
static const struct wd_ops p6_wd_ops = {
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
if (smp_num_siblings == 2) {
unsigned int ebx, apicid;
ebx = cpuid_ebx(1);
apicid = (ebx >> 24) & 0xff;
ht_num = apicid & 1;
ebx = cpuid_ebx(1);
apicid = (ebx >> 24) & 0xff;
ht_num = apicid & 1;
} else
#endif
ht_num = 0;
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
| P4_ESCR_OS
| P4_ESCR_OS
| P4_ESCR_USR;
cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
unsigned dummy;
/*
* P4 quirks:
* P4 quirks:
* - An overflown perfctr will assert its interrupt
* until the OVF flag in its CCCR is cleared.
* - LVTPC is masked on interrupt and must be
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
* NOTE: Corresponding bit = 0 in ebx indicates event present.
*/
cpuid(10, &(eax.full), &ebx, &unused, &unused);
if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
if ((eax.split.mask_length <
(ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
return 0;

View File

@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
#endif
seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
#ifdef CONFIG_X86_64
seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
c->x86_phys_bits, c->x86_virt_bits);
#endif
seq_printf(m, "power management:");
for (i = 0; i < 32; i++) {
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (i < ARRAY_SIZE(x86_power_flags) &&
x86_power_flags[i])
seq_printf(m, "%s%s",
x86_power_flags[i][0]?" ":"",
x86_power_flags[i][0] ? " " : "",
x86_power_flags[i]);
else
seq_printf(m, " [%d]", i);

View File

@@ -49,17 +49,17 @@ static inline int __vmware_platform(void)
static unsigned long __vmware_get_tsc_khz(void)
{
uint64_t tsc_hz;
uint32_t eax, ebx, ecx, edx;
uint64_t tsc_hz;
uint32_t eax, ebx, ecx, edx;
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
if (ebx == UINT_MAX)
return 0;
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
return tsc_hz;
if (ebx == UINT_MAX)
return 0;
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
return tsc_hz;
}
/*

View File

@@ -27,9 +27,7 @@ static void doublefault_fn(void)
if (ptr_ok(gdt)) {
gdt += GDT_ENTRY_TSS << 3;
tss = *(u16 *)(gdt+2);
tss += *(u8 *)(gdt+4) << 16;
tss += *(u8 *)(gdt+7) << 24;
tss = get_desc_base((struct desc_struct *)gdt);
printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
if (ptr_ok(tss)) {

View File

@@ -509,15 +509,15 @@ enum bts_field {
bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
};
static inline unsigned long bts_get(const char *base, enum bts_field field)
static inline unsigned long bts_get(const char *base, unsigned long field)
{
base += (ds_cfg.sizeof_ptr_field * field);
return *(unsigned long *)base;
}
static inline void bts_set(char *base, enum bts_field field, unsigned long val)
static inline void bts_set(char *base, unsigned long field, unsigned long val)
{
base += (ds_cfg.sizeof_ptr_field * field);;
base += (ds_cfg.sizeof_ptr_field * field);
(*(unsigned long *)base) = val;
}

View File

@@ -15,7 +15,6 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
#include <linux/ftrace.h>
#include <asm/stacktrace.h>

View File

@@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
{
int x = e820x->nr_map;
if (x == ARRAY_SIZE(e820x->map)) {
if (x >= ARRAY_SIZE(e820x->map)) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}

View File

@@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP
jne 1f
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
subl $20, %ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)

View File

@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
@@ -236,8 +235,8 @@ void fixup_irqs(void)
}
if (desc->chip->set_affinity)
desc->chip->set_affinity(irq, affinity);
else if (desc->action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
else if (desc->action)
printk_once("Cannot set affinity for irq %i\n", irq);
}
#if 0

View File

@@ -34,7 +34,6 @@
struct kvm_para_state {
u8 mmu_queue[MMU_QUEUE_SIZE];
int mmu_queue_len;
enum paravirt_lazy_mode mode;
};
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
{
struct kvm_para_state *state = kvm_para_state();
if (state->mode != PARAVIRT_LAZY_MMU) {
if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
kvm_mmu_op(buffer, len);
return;
}
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
static void kvm_enter_lazy_mmu(void)
{
struct kvm_para_state *state = kvm_para_state();
paravirt_enter_lazy_mmu();
state->mode = paravirt_get_lazy_mode();
}
static void kvm_leave_lazy_mmu(void)
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
mmu_queue_flush(state);
paravirt_leave_lazy_mmu();
state->mode = paravirt_get_lazy_mode();
}
static void __init paravirt_ops_setup(void)

View File

@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
struct timespec ts;
int low, high;
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
vcpu_time = &get_cpu_var(hv_clock);

View File

@@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type)
MP_bus_info(&bus);
}
ioapic.type = MP_IOAPIC;
ioapic.apicid = 2;
ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
ioapic.flags = MPC_APIC_USABLE;
ioapic.apicaddr = 0xFEC00000;
ioapic.type = MP_IOAPIC;
ioapic.apicid = 2;
ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
ioapic.flags = MPC_APIC_USABLE;
ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE;
MP_ioapic_info(&ioapic);
/*

View File

@@ -1,6 +1,7 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
* Copyright 2009 Intel Corporation; author: H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf,
for (; count; count -= 8) {
err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
if (err) {
if (err == -EFAULT) /* Fix idiotic error code */
err = -EIO;
if (err)
break;
}
if (copy_to_user(tmp, &data, 8)) {
err = -EFAULT;
break;
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
break;
}
err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
if (err) {
if (err == -EFAULT) /* Fix idiotic error code */
err = -EIO;
if (err)
break;
}
tmp += 2;
bytes += 8;
}
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
return bytes ? bytes : err;
}
static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
{
u32 __user *uregs = (u32 __user *)arg;
u32 regs[8];
int cpu = iminor(file->f_path.dentry->d_inode);
int err;
switch (ioc) {
case X86_IOC_RDMSR_REGS:
if (!(file->f_mode & FMODE_READ)) {
err = -EBADF;
break;
}
if (copy_from_user(&regs, uregs, sizeof regs)) {
err = -EFAULT;
break;
}
err = rdmsr_safe_regs_on_cpu(cpu, regs);
if (err)
break;
if (copy_to_user(uregs, &regs, sizeof regs))
err = -EFAULT;
break;
case X86_IOC_WRMSR_REGS:
if (!(file->f_mode & FMODE_WRITE)) {
err = -EBADF;
break;
}
if (copy_from_user(&regs, uregs, sizeof regs)) {
err = -EFAULT;
break;
}
err = wrmsr_safe_regs_on_cpu(cpu, regs);
if (err)
break;
if (copy_to_user(uregs, &regs, sizeof regs))
err = -EFAULT;
break;
default:
err = -ENOTTY;
break;
}
return err;
}
static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = {
.read = msr_read,
.write = msr_write,
.open = msr_open,
.unlocked_ioctl = msr_ioctl,
.compat_ioctl = msr_ioctl,
};
static int __cpuinit msr_device_create(int cpu)

View File

@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = {
#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.read_msr_amd = native_read_msr_amd_safe,
.rdmsr_regs = native_rdmsr_safe_regs,
.write_msr = native_write_msr_safe,
.wrmsr_regs = native_wrmsr_safe_regs,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.read_tscp = native_read_tscp,

View File

@@ -3,6 +3,7 @@
#include <linux/dmar.h>
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
@@ -32,7 +33,14 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
int iommu_pass_through;
/*
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
* If this variable is 1, IOMMU implementations do no DMA ranslation for
* devices and allow every device to access to whole physical memory. This is
* useful if a user want to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
*/
int iommu_pass_through __read_mostly;
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
@@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void)
size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
kmemleak_ignore(dma32_bootmem_ptr);
if (dma32_bootmem_ptr)
dma32_bootmem_size = size;
else
@@ -147,7 +160,7 @@ again:
return NULL;
addr = page_to_phys(page);
if (!is_buffer_dma_capable(dma_mask, addr, size)) {
if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {

View File

@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir)
static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size)
{
return force_iommu ||
!is_buffer_dma_capable(*dev->dma_mask, addr, size);
return force_iommu || !dma_capable(dev, addr, size);
}
static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{
return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
return !dma_capable(dev, addr, size);
}
/* Map a single continuous physical area into the IOMMU.

View File

@@ -14,7 +14,7 @@
static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
if (hwdev && !dma_capable(hwdev, bus, size)) {
if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, get_order(size));
}
static void nommu_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
flush_write_buffers();
}
static void nommu_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems,
enum dma_data_direction dir)
{
flush_write_buffers();
}
struct dma_map_ops nommu_dma_ops = {
.alloc_coherent = dma_generic_alloc_coherent,
.free_coherent = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.is_phys = 1,
.alloc_coherent = dma_generic_alloc_coherent,
.free_coherent = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
};
void __init no_iommu_init(void)

View File

@@ -13,31 +13,6 @@
int swiotlb __read_mostly;
void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
{
return alloc_bootmem_low_pages(size);
}
void *swiotlb_alloc(unsigned order, unsigned long nslabs)
{
return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
}
dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
{
return paddr;
}
phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
{
return baddr;
}
int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
{
return 0;
}
static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{

View File

@@ -63,9 +63,6 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
/*
* Return saved PC of a blocked thread.
*/
@@ -361,14 +358,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
bool preload_fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
__unlazy_fpu(prev_p);
/* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5)
if (preload_fpu)
prefetch(next->xstate);
/*
@@ -409,6 +413,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
/* If we're going to preload the fpu context, make sure clts
is run while we're batching the cpu state updates. */
if (preload_fpu)
clts();
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -418,15 +427,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
if (preload_fpu)
__math_state_restore();
/*
* Restore %gs if needed (which is common)

View File

@@ -57,9 +57,6 @@
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -399,9 +396,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
bool preload_fpu;
/*
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*/
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
/* we're going to use this soon, after a few expensive things */
if (next_p->fpu_counter > 5)
if (preload_fpu)
prefetch(next->xstate);
/*
@@ -432,6 +437,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
/* Must be after DS reload */
unlazy_fpu(prev_p);
/* Make sure cpu is ready for new context */
if (preload_fpu)
clts();
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -472,9 +484,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
/* Must be after DS reload */
unlazy_fpu(prev_p);
/*
* Switch the PDA and FPU contexts.
*/
@@ -493,15 +502,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
*
* tsk_used_math() checks prevent calling math_state_restore(),
* which can sleep in the case of !tsk_used_math()
/*
* Preload the FPU context, now that we've determined that the
* task is likely to be using it.
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
if (preload_fpu)
__math_state_restore();
/*
* There's a problem with moving the arch_install_thread_hw_breakpoint()
* call before current is updated. Suppose a kernel breakpoint is

View File

@@ -860,6 +860,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
#ifdef CONFIG_X86_32

View File

@@ -436,7 +436,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
if (sched_mc_power_savings || sched_smt_power_savings)
if ((sched_mc_power_savings || sched_smt_power_savings) &&
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
return c->llc_shared_map;

View File

@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <asm/desc.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
* and APM bios ones we just ignore here.
*/
if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
u32 *desc;
struct desc_struct *desc;
unsigned long base;
seg &= ~7UL;
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
addr = -1L; /* bogus selector, access would fault */
else {
desc = child->mm->context.ldt + seg;
base = ((desc[0] >> 16) |
((desc[1] & 0xff) << 16) |
(desc[1] & 0xff000000));
base = get_desc_base(desc);
/* 16-bit code segment? */
if (!((desc[1] >> 22) & 1))
if (!desc->d)
addr &= 0xffff;
addr += base;
}

View File

@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void)
if (!is_uv_system())
return 0;
proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
&proc_uv_ptc_operations);
if (!proc_uv_ptc) {
printk(KERN_ERR "unable to create %s proc entry\n",
UV_PTC_BASENAME);
return -EINVAL;
}
proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
return 0;
}

View File

@@ -76,7 +76,7 @@ char ignore_fpu_irq;
* F0 0F bug workaround.. We have a special link segment
* for this.
*/
gate_desc idt_table[256]
gate_desc idt_table[NR_VECTORS]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
#endif
@@ -765,27 +765,6 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
#endif
}
#ifdef CONFIG_X86_32
unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
{
struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
unsigned long new_kesp = kesp - base;
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
/* Set up base for espfix segment */
desc &= 0x00f0ff0000000000ULL;
desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
((((__u64)base) << 32) & 0xff00000000000000ULL) |
((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
(lim_pages & 0xffff);
*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
return new_kesp;
}
#endif
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
@@ -794,6 +773,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
/*
* __math_state_restore assumes that cr0.TS is already clear and the
* fpu state is all ready for use. Used during context switch.
*/
void __math_state_restore(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
@@ -825,17 +826,8 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
stts();
force_sig(SIGSEGV, tsk);
return;
}
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
__math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);