Merge branch 'x86/mpparse' into x86/devel

Conflicts:

	arch/x86/Kconfig
	arch/x86/kernel/io_apic_32.c
	arch/x86/kernel/setup_64.c
	arch/x86/mm/init_32.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2008-07-08 11:14:58 +02:00
90 changed files with 3899 additions and 2993 deletions

View File

@@ -38,6 +38,7 @@
#include <asm/setup.h>
#include <asm/mmzone.h>
#include <asm/bios_ebda.h>
#include <asm/proto.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
/*
* 4) physnode_map - the mapping between a pfn and owning node
* physnode_map keeps track of the physical memory layout of a generic
* numa node on a 256Mb break (each element of the array will
* represent 256Mb of memory and will be marked by the node id. so,
* numa node on a 64Mb break (each element of the array will
* represent 64Mb of memory and will be marked by the node id. so,
* if the first gig is on node 0, and the second gig is on node 1
* physnode_map will contain:
*
* physnode_map[0-3] = 0;
* physnode_map[4-7] = 1;
* physnode_map[8- ] = -1;
* physnode_map[0-15] = 0;
* physnode_map[16-31] = 1;
* physnode_map[32- ] = -1;
*/
s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
EXPORT_SYMBOL(physnode_map);
@@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
printk(KERN_DEBUG " ");
for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
printk("%ld ", pfn);
printk(KERN_CONT "%ld ", pfn);
}
printk("\n");
printk(KERN_CONT "\n");
}
unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
@@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
#endif
extern unsigned long find_max_low_pfn(void);
extern void add_one_highpage_init(struct page *, int, int);
extern unsigned long highend_pfn, highstart_pfn;
#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -119,11 +119,11 @@ int __init get_memcfg_numa_flat(void)
{
printk("NUMA - single node, flat memory mode\n");
/* Run the memory configuration and find the top of memory. */
propagate_e820_map();
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
e820_register_active_regions(0, 0, max_pfn);
memory_present(0, 0, max_pfn);
node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
/* Indicate there is one node available. */
nodes_clear(node_online_map);
@@ -159,9 +159,17 @@ static void __init allocate_pgdat(int nid)
if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid])
NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
else {
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
min_low_pfn += PFN_UP(sizeof(pg_data_t));
unsigned long pgdat_phys;
pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
(nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
sizeof(pg_data_t),
PAGE_SIZE);
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
"NODE_DATA");
}
printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
nid, (unsigned long)NODE_DATA(nid));
}
/*
@@ -199,8 +207,12 @@ void __init remap_numa_kva(void)
int node;
for_each_online_node(node) {
printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
(unsigned long)vaddr,
node_remap_start_pfn[node] + pfn);
set_pmd_pfn((ulong) vaddr,
node_remap_start_pfn[node] + pfn,
PAGE_KERNEL_LARGE);
@@ -212,17 +224,21 @@ static unsigned long calculate_numa_remap_pages(void)
{
int nid;
unsigned long size, reserve_pages = 0;
unsigned long pfn;
for_each_online_node(nid) {
unsigned old_end_pfn = node_end_pfn[nid];
u64 node_kva_target;
u64 node_kva_final;
/*
* The acpi/srat node info can show hot-add memroy zones
* where memory could be added but not currently present.
*/
printk("node %d pfn: [%lx - %lx]\n",
nid, node_start_pfn[nid], node_end_pfn[nid]);
if (node_start_pfn[nid] > max_pfn)
continue;
if (!node_end_pfn[nid])
continue;
if (node_end_pfn[nid] > max_pfn)
node_end_pfn[nid] = max_pfn;
@@ -234,39 +250,45 @@ static unsigned long calculate_numa_remap_pages(void)
/* now the roundup is correct, convert to PAGE_SIZE pages */
size = size * PTRS_PER_PTE;
/*
* Validate the region we are allocating only contains valid
* pages.
*/
for (pfn = node_end_pfn[nid] - size;
pfn < node_end_pfn[nid]; pfn++)
if (!page_is_ram(pfn))
break;
node_kva_target = round_down(node_end_pfn[nid] - size,
PTRS_PER_PTE);
node_kva_target <<= PAGE_SHIFT;
do {
node_kva_final = find_e820_area(node_kva_target,
((u64)node_end_pfn[nid])<<PAGE_SHIFT,
((u64)size)<<PAGE_SHIFT,
LARGE_PAGE_BYTES);
node_kva_target -= LARGE_PAGE_BYTES;
} while (node_kva_final == -1ULL &&
(node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
if (pfn != node_end_pfn[nid])
size = 0;
if (node_kva_final == -1ULL)
panic("Can not get kva ram\n");
printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
size, nid);
node_remap_size[nid] = size;
node_remap_offset[nid] = reserve_pages;
reserve_pages += size;
printk("Shrinking node %d from %ld pages to %ld pages\n",
nid, node_end_pfn[nid], node_end_pfn[nid] - size);
printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
size, nid, node_kva_final>>PAGE_SHIFT);
if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
/*
* Align node_end_pfn[] and node_remap_start_pfn[] to
* pmd boundary. remap_numa_kva will barf otherwise.
*/
printk("Shrinking node %d further by %ld pages for proper alignment\n",
nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
size += node_end_pfn[nid] & (PTRS_PER_PTE-1);
}
/*
* prevent kva address below max_low_pfn want it on system
* with less memory later.
* layout will be: KVA address , KVA RAM
*
* we are supposed to only record the one less then max_low_pfn
* but we could have some hole in high memory, and it will only
* check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
* to use it as free.
* So reserve_early here, hope we don't run out of that array
*/
reserve_early(node_kva_final,
node_kva_final+(((u64)size)<<PAGE_SHIFT),
"KVA RAM");
node_end_pfn[nid] -= size;
node_remap_start_pfn[nid] = node_end_pfn[nid];
shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
remove_active_range(nid, node_remap_start_pfn[nid],
node_remap_start_pfn[nid] + size);
}
printk("Reserving total of %ld pages for numa KVA remap\n",
reserve_pages);
@@ -284,8 +306,7 @@ static void init_remap_allocator(int nid)
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid],
(ulong) pfn_to_kaddr(highstart_pfn
+ node_remap_offset[nid] + node_remap_size[nid]));
(ulong) node_remap_end_vaddr[nid]);
}
extern void setup_bootmem_allocator(void);
@@ -293,7 +314,7 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
unsigned long wasted_pages;
long kva_target_pfn;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -302,34 +323,38 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundary between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
/* call find_max_low_pfn at first, it could update max_pfn */
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
remove_all_active_ranges();
get_memcfg_numa();
kva_pages = calculate_numa_remap_pages();
kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
/* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
kva_start_pfn = find_max_low_pfn() - kva_pages;
kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
do {
kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
max_low_pfn<<PAGE_SHIFT,
kva_pages<<PAGE_SHIFT,
PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
kva_target_pfn -= PTRS_PER_PTE;
} while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
#ifdef CONFIG_BLK_DEV_INITRD
/* Numa kva area is below the initrd */
if (initrd_start)
kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
- kva_pages;
#endif
if (kva_start_pfn == -1UL)
panic("Can not get kva space\n");
/*
* We waste pages past at the end of the KVA for no good reason other
* than how it is located. This is bad.
*/
wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
kva_start_pfn -= wasted_pages;
kva_pages += wasted_pages;
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
/* avoid clash with initrd */
reserve_early(kva_start_pfn<<PAGE_SHIFT,
(kva_start_pfn + kva_pages)<<PAGE_SHIFT,
"KVA PG");
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > system_max_low_pfn)
@@ -365,16 +390,8 @@ unsigned long __init setup_memory(void)
return max_low_pfn;
}
void __init numa_kva_reserve(void)
{
if (kva_pages)
reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
BOOTMEM_DEFAULT);
}
void __init zone_sizes_init(void)
{
int nid;
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] =
@@ -384,27 +401,18 @@ void __init zone_sizes_init(void)
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
#endif
/* If SRAT has not registered memory, register it now */
if (find_max_pfn_with_active_regions() == 0) {
for_each_online_node(nid) {
if (node_has_online_mem(nid))
add_active_range(nid, node_start_pfn[nid],
node_end_pfn[nid]);
}
}
free_area_init_nodes(max_zone_pfns);
return;
}
void __init set_highmem_pages_init(int bad_ppro)
void __init set_highmem_pages_init(void)
{
#ifdef CONFIG_HIGHMEM
struct zone *zone;
struct page *page;
int nid;
for_each_zone(zone) {
unsigned long node_pfn, zone_start_pfn, zone_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
if (!is_highmem(zone))
continue;
@@ -412,16 +420,12 @@ void __init set_highmem_pages_init(int bad_ppro)
zone_start_pfn = zone->zone_start_pfn;
zone_end_pfn = zone_start_pfn + zone->spanned_pages;
nid = zone_to_nid(zone);
printk("Initializing %s for node %d (%08lx:%08lx)\n",
zone->name, zone_to_nid(zone),
zone_start_pfn, zone_end_pfn);
zone->name, nid, zone_start_pfn, zone_end_pfn);
for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
add_one_highpage_init(page, node_pfn, bad_ppro);
}
add_highpages_with_active_regions(nid, zone_start_pfn,
zone_end_pfn);
}
totalram_pages += totalhigh_pages;
#endif

View File

@@ -225,13 +225,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
update_page_count(PG_LEVEL_4K, pages_4k);
}
static inline int page_kills_ppro(unsigned long pagenr)
{
if (pagenr >= 0x70000 && pagenr <= 0x7003F)
return 1;
return 0;
}
/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
@@ -292,29 +285,60 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
static void __init add_one_highpage_init(struct page *page, int pfn)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
totalhigh_pages++;
} else
SetPageReserved(page);
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
struct add_highpages_data {
unsigned long start_pfn;
unsigned long end_pfn;
};
static void __init add_highpages_work_fn(unsigned long start_pfn,
unsigned long end_pfn, void *datax)
{
int node_pfn;
struct page *page;
unsigned long final_start_pfn, final_end_pfn;
struct add_highpages_data *data;
data = (struct add_highpages_data *)datax;
final_start_pfn = max(start_pfn, data->start_pfn);
final_end_pfn = min(end_pfn, data->end_pfn);
if (final_start_pfn >= final_end_pfn)
return;
for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
node_pfn++) {
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
add_one_highpage_init(page, node_pfn);
}
}
void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
struct add_highpages_data data;
data.start_pfn = start_pfn;
data.end_pfn = end_pfn;
work_with_active_regions(nid, add_highpages_work_fn, &data);
}
#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
static void __init set_highmem_pages_init(void)
{
int pfn;
add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
/*
* Holes under sparsemem might not have no mem_map[]:
*/
if (pfn_valid(pfn))
add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
}
totalram_pages += totalhigh_pages;
}
#endif /* !CONFIG_NUMA */
@@ -322,7 +346,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
#else
# define kmap_init() do { } while (0)
# define permanent_kmaps_init(pgd_base) do { } while (0)
# define set_highmem_pages_init(bad_ppro) do { } while (0)
# define set_highmem_pages_init() do { } while (0)
#endif /* CONFIG_HIGHMEM */
pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
@@ -569,13 +593,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc;
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
int tmp, bad_ppro;
int tmp;
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
bad_ppro = ppro_with_ram_bug();
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
@@ -587,7 +609,7 @@ void __init mem_init(void)
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
set_highmem_pages_init(bad_ppro);
set_highmem_pages_init();
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
@@ -776,3 +798,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
free_init_pages("initrd memory", start, end);
}
#endif
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
return reserve_bootmem(phys, len, flags);
}

View File

@@ -48,6 +48,18 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
/*
* PFN of last memory page.
*/
unsigned long end_pfn;
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
* apertures, ACPI and other tables without having to play with fixmaps.
*/
unsigned long max_pfn_mapped;
static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -808,12 +820,14 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
#ifdef CONFIG_NUMA
int nid, next_nid;
#endif
unsigned long pfn = phys >> PAGE_SHIFT;
int ret;
if (pfn >= end_pfn) {
/*
@@ -821,11 +835,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
* firmware tables:
*/
if (pfn < max_pfn_mapped)
return;
return -EFAULT;
printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
phys, len);
return;
return -EFAULT;
}
/* Should check here against the e820 map to avoid double free */
@@ -833,9 +847,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
nid = phys_to_nid(phys);
next_nid = phys_to_nid(phys + len - 1);
if (nid == next_nid)
reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
else
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
ret = reserve_bootmem(phys, len, flags);
if (ret != 0)
return ret;
#else
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#endif
@@ -844,6 +862,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
dma_reserve += len / PAGE_SIZE;
set_dma_reserve(dma_reserve);
}
return 0;
}
int kern_addr_valid(unsigned long addr)

View File

@@ -57,18 +57,22 @@ static __init void early_get_boot_cpu_id(void)
/*
* Find possible boot-time SMP configuration:
*/
#ifdef CONFIG_X86_MPPARSE
early_find_smp_config();
#endif
#ifdef CONFIG_ACPI
/*
* Read APIC information from ACPI tables.
*/
early_acpi_boot_init();
#endif
#ifdef CONFIG_X86_MPPARSE
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
early_get_smp_config();
#endif
early_init_lapic_mapping();
}

View File

@@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
else
bootmap_start = round_up(start, PAGE_SIZE);
/*
* SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
*/
bootmap = early_node_mem(nodeid, bootmap_start, end,