Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (36 commits) x86, mm: Correct the implementation of is_untracked_pat_range() x86/pat: Trivial: don't create debugfs for memtype if pat is disabled x86, mtrr: Fix sorting of mtrr after subtracting x86: Move find_smp_config() earlier and avoid bootmem usage x86, platform: Change is_untracked_pat_range() to bool; cleanup init x86: Change is_ISA_range() into an inline function x86, mm: is_untracked_pat_range() takes a normal semiclosed range x86, mm: Call is_untracked_pat_range() rather than is_ISA_range() x86: UV SGI: Don't track GRU space in PAT x86: SGI UV: Fix BAU initialization x86, numa: Use near(er) online node instead of roundrobin for NUMA x86, numa, bootmem: Only free bootmem on NUMA failure path x86: Change crash kernel to reserve via reserve_early() x86: Eliminate redundant/contradicting cache line size config options x86: When cleaning MTRRs, do not fold WP into UC x86: remove "extern" from function prototypes in <asm/proto.h> x86, mm: Report state of NX protections during boot x86, mm: Clean up and simplify NX enablement x86, pageattr: Make set_memory_(x|nx) aware of NX support x86, sleep: Always save the value of EFER ... Fix up conflicts (added both iommu_shutdown and is_untracked_pat_range) to 'struct x86_platform_ops') in arch/x86/include/asm/x86_init.h arch/x86/kernel/x86_init.c
这个提交包含在:
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
||||
use_gbpages = direct_gbpages;
|
||||
#endif
|
||||
|
||||
set_nx();
|
||||
if (nx_enabled)
|
||||
printk(KERN_INFO "NX (Execute Disable) protection: active\n");
|
||||
|
||||
/* Enable PSE if available */
|
||||
if (cpu_has_pse)
|
||||
set_in_cr4(X86_CR4_PSE);
|
||||
|
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
|
||||
pkmap_page_table = pte;
|
||||
}
|
||||
|
||||
static void __init add_one_highpage_init(struct page *page, int pfn)
|
||||
static void __init add_one_highpage_init(struct page *page)
|
||||
{
|
||||
ClearPageReserved(page);
|
||||
init_page_count(page);
|
||||
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
|
||||
if (!pfn_valid(node_pfn))
|
||||
continue;
|
||||
page = pfn_to_page(node_pfn);
|
||||
add_one_highpage_init(page, node_pfn);
|
||||
add_one_highpage_init(page);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void)
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
||||
void __init initmem_init(unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
int acpi, int k8)
|
||||
{
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
highstart_pfn = highend_pfn = max_pfn;
|
||||
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void)
|
||||
const int rodata_test_data = 0xC3;
|
||||
EXPORT_SYMBOL_GPL(rodata_test_data);
|
||||
|
||||
static int kernel_set_to_readonly;
|
||||
int kernel_set_to_readonly __read_mostly;
|
||||
|
||||
void set_kernel_text_rw(void)
|
||||
{
|
||||
|
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start,
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NUMA
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
int acpi, int k8)
|
||||
{
|
||||
unsigned long bootmap_size, bootmap;
|
||||
|
||||
@@ -694,12 +695,12 @@ void __init mem_init(void)
|
||||
const int rodata_test_data = 0xC3;
|
||||
EXPORT_SYMBOL_GPL(rodata_test_data);
|
||||
|
||||
static int kernel_set_to_readonly;
|
||||
int kernel_set_to_readonly;
|
||||
|
||||
void set_kernel_text_rw(void)
|
||||
{
|
||||
unsigned long start = PFN_ALIGN(_stext);
|
||||
unsigned long end = PFN_ALIGN(__start_rodata);
|
||||
unsigned long start = PFN_ALIGN(_text);
|
||||
unsigned long end = PFN_ALIGN(__stop___ex_table);
|
||||
|
||||
if (!kernel_set_to_readonly)
|
||||
return;
|
||||
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void)
|
||||
pr_debug("Set kernel text: %lx - %lx for read write\n",
|
||||
start, end);
|
||||
|
||||
/*
|
||||
* Make the kernel identity mapping for text RW. Kernel text
|
||||
* mapping will always be RO. Refer to the comment in
|
||||
* static_protections() in pageattr.c
|
||||
*/
|
||||
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
void set_kernel_text_ro(void)
|
||||
{
|
||||
unsigned long start = PFN_ALIGN(_stext);
|
||||
unsigned long end = PFN_ALIGN(__start_rodata);
|
||||
unsigned long start = PFN_ALIGN(_text);
|
||||
unsigned long end = PFN_ALIGN(__stop___ex_table);
|
||||
|
||||
if (!kernel_set_to_readonly)
|
||||
return;
|
||||
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void)
|
||||
pr_debug("Set kernel text: %lx - %lx for read only\n",
|
||||
start, end);
|
||||
|
||||
/*
|
||||
* Set the kernel identity mapping for text RO.
|
||||
*/
|
||||
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
void mark_rodata_ro(void)
|
||||
{
|
||||
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
|
||||
unsigned long start = PFN_ALIGN(_text);
|
||||
unsigned long rodata_start =
|
||||
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
|
||||
unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
|
||||
unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
|
||||
unsigned long data_start = (unsigned long) &_sdata;
|
||||
|
||||
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
||||
(end - start) >> 10);
|
||||
@@ -751,6 +764,14 @@ void mark_rodata_ro(void)
|
||||
printk(KERN_INFO "Testing CPA: again\n");
|
||||
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
free_init_pages("unused kernel memory",
|
||||
(unsigned long) page_address(virt_to_page(text_end)),
|
||||
(unsigned long)
|
||||
page_address(virt_to_page(rodata_start)));
|
||||
free_init_pages("unused kernel memory",
|
||||
(unsigned long) page_address(virt_to_page(rodata_end)),
|
||||
(unsigned long) page_address(virt_to_page(data_start)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -24,6 +24,9 @@
|
||||
#include <asm/apic.h>
|
||||
#include <asm/k8.h>
|
||||
|
||||
static struct bootnode __initdata nodes[8];
|
||||
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
|
||||
|
||||
static __init int find_northbridge(void)
|
||||
{
|
||||
int num;
|
||||
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void)
|
||||
* need to get boot_cpu_id so can use that to create apicid_to_node
|
||||
* in k8_scan_nodes()
|
||||
*/
|
||||
/*
|
||||
* Find possible boot-time SMP configuration:
|
||||
*/
|
||||
#ifdef CONFIG_X86_MPPARSE
|
||||
early_find_smp_config();
|
||||
#endif
|
||||
#ifdef CONFIG_ACPI
|
||||
/*
|
||||
* Read APIC information from ACPI tables.
|
||||
*/
|
||||
early_acpi_boot_init();
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MPPARSE
|
||||
/*
|
||||
* get boot-time SMP configuration:
|
||||
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void)
|
||||
early_init_lapic_mapping();
|
||||
}
|
||||
|
||||
int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
int __init k8_get_nodes(struct bootnode *physnodes)
|
||||
{
|
||||
unsigned numnodes, cores, bits, apicid_base;
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
for_each_node_mask(i, nodes_parsed) {
|
||||
physnodes[ret].start = nodes[i].start;
|
||||
physnodes[ret].end = nodes[i].end;
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
unsigned long start = PFN_PHYS(start_pfn);
|
||||
unsigned long end = PFN_PHYS(end_pfn);
|
||||
unsigned numnodes;
|
||||
unsigned long prevbase;
|
||||
struct bootnode nodes[8];
|
||||
int i, j, nb, found = 0;
|
||||
int i, nb, found = 0;
|
||||
u32 nodeid, reg;
|
||||
|
||||
if (!early_pci_allowed())
|
||||
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
if (nb < 0)
|
||||
return nb;
|
||||
|
||||
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
|
||||
pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
|
||||
|
||||
reg = read_pci_config(0, nb, 0, 0x60);
|
||||
numnodes = ((reg >> 4) & 0xF) + 1;
|
||||
if (numnodes <= 1)
|
||||
return -1;
|
||||
|
||||
printk(KERN_INFO "Number of nodes %d\n", numnodes);
|
||||
pr_info("Number of physical nodes %d\n", numnodes);
|
||||
|
||||
memset(&nodes, 0, sizeof(nodes));
|
||||
prevbase = 0;
|
||||
for (i = 0; i < 8; i++) {
|
||||
unsigned long base, limit;
|
||||
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
nodeid = limit & 7;
|
||||
if ((base & 3) == 0) {
|
||||
if (i < numnodes)
|
||||
printk("Skipping disabled node %d\n", i);
|
||||
pr_info("Skipping disabled node %d\n", i);
|
||||
continue;
|
||||
}
|
||||
if (nodeid >= numnodes) {
|
||||
printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
|
||||
base, limit);
|
||||
pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
|
||||
base, limit);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!limit) {
|
||||
printk(KERN_INFO "Skipping node entry %d (base %lx)\n",
|
||||
i, base);
|
||||
pr_info("Skipping node entry %d (base %lx)\n",
|
||||
i, base);
|
||||
continue;
|
||||
}
|
||||
if ((base >> 8) & 3 || (limit >> 8) & 3) {
|
||||
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
|
||||
nodeid, (base>>8)&3, (limit>>8) & 3);
|
||||
pr_err("Node %d using interleaving mode %lx/%lx\n",
|
||||
nodeid, (base >> 8) & 3, (limit >> 8) & 3);
|
||||
return -1;
|
||||
}
|
||||
if (node_isset(nodeid, node_possible_map)) {
|
||||
printk(KERN_INFO "Node %d already present. Skipping\n",
|
||||
nodeid);
|
||||
if (node_isset(nodeid, nodes_parsed)) {
|
||||
pr_info("Node %d already present, skipping\n",
|
||||
nodeid);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
limit |= (1<<24)-1;
|
||||
limit++;
|
||||
|
||||
if (limit > max_pfn << PAGE_SHIFT)
|
||||
limit = max_pfn << PAGE_SHIFT;
|
||||
if (limit > end)
|
||||
limit = end;
|
||||
if (limit <= base)
|
||||
continue;
|
||||
|
||||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
if (limit > end)
|
||||
limit = end;
|
||||
if (limit == base) {
|
||||
printk(KERN_ERR "Empty node %d\n", nodeid);
|
||||
pr_err("Empty node %d\n", nodeid);
|
||||
continue;
|
||||
}
|
||||
if (limit < base) {
|
||||
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
|
||||
pr_err("Node %d bogus settings %lx-%lx.\n",
|
||||
nodeid, base, limit);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Could sort here, but pun for now. Should not happen anyroads. */
|
||||
if (prevbase > base) {
|
||||
printk(KERN_ERR "Node map not sorted %lx,%lx\n",
|
||||
pr_err("Node map not sorted %lx,%lx\n",
|
||||
prevbase, base);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
|
||||
nodeid, base, limit);
|
||||
pr_info("Node %d MemBase %016lx Limit %016lx\n",
|
||||
nodeid, base, limit);
|
||||
|
||||
found++;
|
||||
|
||||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
|
||||
prevbase = base;
|
||||
|
||||
node_set(nodeid, node_possible_map);
|
||||
node_set(nodeid, nodes_parsed);
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init k8_scan_nodes(void)
|
||||
{
|
||||
unsigned int bits;
|
||||
unsigned int cores;
|
||||
unsigned int apicid_base;
|
||||
int i;
|
||||
|
||||
BUG_ON(nodes_empty(nodes_parsed));
|
||||
node_possible_map = nodes_parsed;
|
||||
memnode_shift = compute_hash_shift(nodes, 8, NULL);
|
||||
if (memnode_shift < 0) {
|
||||
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
|
||||
pr_err("No NUMA node hash function found. Contact maintainer\n");
|
||||
return -1;
|
||||
}
|
||||
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
|
||||
pr_info("Using node hash shift of %d\n", memnode_shift);
|
||||
|
||||
/* use the coreid bits from early_identify_cpu */
|
||||
bits = boot_cpu_data.x86_coreid_bits;
|
||||
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||
/* need to get boot_cpu_id early for system with apicid lifting */
|
||||
early_get_boot_cpu_id();
|
||||
if (boot_cpu_physical_apicid > 0) {
|
||||
printk(KERN_INFO "BSP APIC ID: %02x\n",
|
||||
boot_cpu_physical_apicid);
|
||||
pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
|
||||
apicid_base = boot_cpu_physical_apicid;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (nodes[i].start == nodes[i].end)
|
||||
continue;
|
||||
for_each_node_mask(i, node_possible_map) {
|
||||
int j;
|
||||
|
||||
e820_register_active_regions(i,
|
||||
nodes[i].start >> PAGE_SHIFT,
|
||||
|
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid)
|
||||
(ulong) node_remap_end_vaddr[nid]);
|
||||
}
|
||||
|
||||
void __init initmem_init(unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
int acpi, int k8)
|
||||
{
|
||||
int nid;
|
||||
long kva_target_pfn;
|
||||
|
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
bootmap = early_node_mem(nodeid, bootmap_start, end,
|
||||
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
|
||||
if (bootmap == NULL) {
|
||||
if (nodedata_phys < start || nodedata_phys >= end)
|
||||
free_bootmem(nodedata_phys, pgdat_size);
|
||||
if (nodedata_phys < start || nodedata_phys >= end) {
|
||||
/*
|
||||
* only need to free it if it is from other node
|
||||
* bootmem
|
||||
*/
|
||||
if (nid != nodeid)
|
||||
free_bootmem(nodedata_phys, pgdat_size);
|
||||
}
|
||||
node_data[nodeid] = NULL;
|
||||
return;
|
||||
}
|
||||
@@ -306,8 +312,71 @@ void __init numa_init_array(void)
|
||||
|
||||
#ifdef CONFIG_NUMA_EMU
|
||||
/* Numa emulation */
|
||||
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
||||
static struct bootnode physnodes[MAX_NUMNODES] __initdata;
|
||||
static char *cmdline __initdata;
|
||||
|
||||
static int __init setup_physnodes(unsigned long start, unsigned long end,
|
||||
int acpi, int k8)
|
||||
{
|
||||
int nr_nodes = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
if (acpi)
|
||||
nr_nodes = acpi_get_nodes(physnodes);
|
||||
#endif
|
||||
#ifdef CONFIG_K8_NUMA
|
||||
if (k8)
|
||||
nr_nodes = k8_get_nodes(physnodes);
|
||||
#endif
|
||||
/*
|
||||
* Basic sanity checking on the physical node map: there may be errors
|
||||
* if the SRAT or K8 incorrectly reported the topology or the mem=
|
||||
* kernel parameter is used.
|
||||
*/
|
||||
for (i = 0; i < nr_nodes; i++) {
|
||||
if (physnodes[i].start == physnodes[i].end)
|
||||
continue;
|
||||
if (physnodes[i].start > end) {
|
||||
physnodes[i].end = physnodes[i].start;
|
||||
continue;
|
||||
}
|
||||
if (physnodes[i].end < start) {
|
||||
physnodes[i].start = physnodes[i].end;
|
||||
continue;
|
||||
}
|
||||
if (physnodes[i].start < start)
|
||||
physnodes[i].start = start;
|
||||
if (physnodes[i].end > end)
|
||||
physnodes[i].end = end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all nodes that have no memory or were truncated because of the
|
||||
* limited address range.
|
||||
*/
|
||||
for (i = 0; i < nr_nodes; i++) {
|
||||
if (physnodes[i].start == physnodes[i].end)
|
||||
continue;
|
||||
physnodes[ret].start = physnodes[i].start;
|
||||
physnodes[ret].end = physnodes[i].end;
|
||||
ret++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If no physical topology was detected, a single node is faked to cover
|
||||
* the entire address space.
|
||||
*/
|
||||
if (!ret) {
|
||||
physnodes[ret].start = start;
|
||||
physnodes[ret].end = end;
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setups up nid to range from addr to addr + size. If the end
|
||||
* boundary is greater than max_addr, then max_addr is used instead.
|
||||
@@ -315,11 +384,9 @@ static char *cmdline __initdata;
|
||||
* allocation past addr and -1 otherwise. addr is adjusted to be at
|
||||
* the end of the node.
|
||||
*/
|
||||
static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
|
||||
u64 size, u64 max_addr)
|
||||
static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
nodes[nid].start = *addr;
|
||||
*addr += size;
|
||||
if (*addr >= max_addr) {
|
||||
@@ -334,13 +401,112 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
|
||||
* to max_addr. The return value is the number of nodes allocated.
|
||||
*/
|
||||
static int __init split_nodes_interleave(u64 addr, u64 max_addr,
|
||||
int nr_phys_nodes, int nr_nodes)
|
||||
{
|
||||
nodemask_t physnode_mask = NODE_MASK_NONE;
|
||||
u64 size;
|
||||
int big;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (nr_nodes <= 0)
|
||||
return -1;
|
||||
if (nr_nodes > MAX_NUMNODES) {
|
||||
pr_info("numa=fake=%d too large, reducing to %d\n",
|
||||
nr_nodes, MAX_NUMNODES);
|
||||
nr_nodes = MAX_NUMNODES;
|
||||
}
|
||||
|
||||
size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes;
|
||||
/*
|
||||
* Calculate the number of big nodes that can be allocated as a result
|
||||
* of consolidating the remainder.
|
||||
*/
|
||||
big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) /
|
||||
FAKE_NODE_MIN_SIZE;
|
||||
|
||||
size &= FAKE_NODE_MIN_HASH_MASK;
|
||||
if (!size) {
|
||||
pr_err("Not enough memory for each node. "
|
||||
"NUMA emulation disabled.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_phys_nodes; i++)
|
||||
if (physnodes[i].start != physnodes[i].end)
|
||||
node_set(i, physnode_mask);
|
||||
|
||||
/*
|
||||
* Continue to fill physical nodes with fake nodes until there is no
|
||||
* memory left on any of them.
|
||||
*/
|
||||
while (nodes_weight(physnode_mask)) {
|
||||
for_each_node_mask(i, physnode_mask) {
|
||||
u64 end = physnodes[i].start + size;
|
||||
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
|
||||
|
||||
if (ret < big)
|
||||
end += FAKE_NODE_MIN_SIZE;
|
||||
|
||||
/*
|
||||
* Continue to add memory to this fake node if its
|
||||
* non-reserved memory is less than the per-node size.
|
||||
*/
|
||||
while (end - physnodes[i].start -
|
||||
e820_hole_size(physnodes[i].start, end) < size) {
|
||||
end += FAKE_NODE_MIN_SIZE;
|
||||
if (end > physnodes[i].end) {
|
||||
end = physnodes[i].end;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there won't be at least FAKE_NODE_MIN_SIZE of
|
||||
* non-reserved memory in ZONE_DMA32 for the next node,
|
||||
* this one must extend to the boundary.
|
||||
*/
|
||||
if (end < dma32_end && dma32_end - end -
|
||||
e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
|
||||
end = dma32_end;
|
||||
|
||||
/*
|
||||
* If there won't be enough non-reserved memory for the
|
||||
* next node, this one must extend to the end of the
|
||||
* physical node.
|
||||
*/
|
||||
if (physnodes[i].end - end -
|
||||
e820_hole_size(end, physnodes[i].end) < size)
|
||||
end = physnodes[i].end;
|
||||
|
||||
/*
|
||||
* Avoid allocating more nodes than requested, which can
|
||||
* happen as a result of rounding down each node's size
|
||||
* to FAKE_NODE_MIN_SIZE.
|
||||
*/
|
||||
if (nodes_weight(physnode_mask) + ret >= nr_nodes)
|
||||
end = physnodes[i].end;
|
||||
|
||||
if (setup_node_range(ret++, &physnodes[i].start,
|
||||
end - physnodes[i].start,
|
||||
physnodes[i].end) < 0)
|
||||
node_clear(i, physnode_mask);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Splits num_nodes nodes up equally starting at node_start. The return value
|
||||
* is the number of nodes split up and addr is adjusted to be at the end of the
|
||||
* last node allocated.
|
||||
*/
|
||||
static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
|
||||
u64 max_addr, int node_start,
|
||||
static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start,
|
||||
int num_nodes)
|
||||
{
|
||||
unsigned int big;
|
||||
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
|
||||
if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
|
||||
break;
|
||||
}
|
||||
return i - node_start + 1;
|
||||
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
|
||||
* always assigned to a final node and can be asymmetric. Returns the number of
|
||||
* nodes split.
|
||||
*/
|
||||
static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
|
||||
u64 max_addr, int node_start, u64 size)
|
||||
static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
|
||||
u64 size)
|
||||
{
|
||||
int i = node_start;
|
||||
size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
|
||||
while (!setup_node_range(i++, nodes, addr, size, max_addr))
|
||||
while (!setup_node_range(i++, addr, size, max_addr))
|
||||
;
|
||||
return i - node_start;
|
||||
}
|
||||
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
|
||||
* Sets up the system RAM area from start_pfn to last_pfn according to the
|
||||
* numa=fake command-line option.
|
||||
*/
|
||||
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
||||
|
||||
static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
|
||||
static int __init numa_emulation(unsigned long start_pfn,
|
||||
unsigned long last_pfn, int acpi, int k8)
|
||||
{
|
||||
u64 size, addr = start_pfn << PAGE_SHIFT;
|
||||
u64 max_addr = last_pfn << PAGE_SHIFT;
|
||||
int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
|
||||
int num_phys_nodes;
|
||||
|
||||
memset(&nodes, 0, sizeof(nodes));
|
||||
num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
|
||||
/*
|
||||
* If the numa=fake command-line is just a single number N, split the
|
||||
* system RAM into N fake nodes.
|
||||
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn
|
||||
if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
|
||||
long n = simple_strtol(cmdline, NULL, 0);
|
||||
|
||||
num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
|
||||
num_nodes = split_nodes_interleave(addr, max_addr,
|
||||
num_phys_nodes, n);
|
||||
if (num_nodes < 0)
|
||||
return num_nodes;
|
||||
goto out;
|
||||
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn
|
||||
size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
|
||||
if (size)
|
||||
for (i = 0; i < coeff; i++, num_nodes++)
|
||||
if (setup_node_range(num_nodes, nodes,
|
||||
&addr, size, max_addr) < 0)
|
||||
if (setup_node_range(num_nodes, &addr,
|
||||
size, max_addr) < 0)
|
||||
goto done;
|
||||
if (!*cmdline)
|
||||
break;
|
||||
@@ -473,7 +640,7 @@ done:
|
||||
if (addr < max_addr) {
|
||||
if (coeff_flag && coeff < 0) {
|
||||
/* Split remaining nodes into num-sized chunks */
|
||||
num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
|
||||
num_nodes += split_nodes_by_size(&addr, max_addr,
|
||||
num_nodes, num);
|
||||
goto out;
|
||||
}
|
||||
@@ -482,7 +649,7 @@ done:
|
||||
/* Split remaining nodes into coeff chunks */
|
||||
if (coeff <= 0)
|
||||
break;
|
||||
num_nodes += split_nodes_equally(nodes, &addr, max_addr,
|
||||
num_nodes += split_nodes_equally(&addr, max_addr,
|
||||
num_nodes, coeff);
|
||||
break;
|
||||
case ',':
|
||||
@@ -490,8 +657,8 @@ done:
|
||||
break;
|
||||
default:
|
||||
/* Give one final node */
|
||||
setup_node_range(num_nodes, nodes, &addr,
|
||||
max_addr - addr, max_addr);
|
||||
setup_node_range(num_nodes, &addr, max_addr - addr,
|
||||
max_addr);
|
||||
num_nodes++;
|
||||
}
|
||||
}
|
||||
@@ -505,14 +672,10 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to vacate all active ranges that may have been registered by
|
||||
* SRAT and set acpi_numa to -1 so that srat_disabled() always returns
|
||||
* true. NUMA emulation has succeeded so we will not scan ACPI nodes.
|
||||
* We need to vacate all active ranges that may have been registered for
|
||||
* the e820 memory map.
|
||||
*/
|
||||
remove_all_active_ranges();
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
acpi_numa = -1;
|
||||
#endif
|
||||
for_each_node_mask(i, node_possible_map) {
|
||||
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
|
||||
nodes[i].end >> PAGE_SHIFT);
|
||||
@@ -524,7 +687,8 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_NUMA_EMU */
|
||||
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
|
||||
int acpi, int k8)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
|
||||
nodes_clear(node_online_map);
|
||||
|
||||
#ifdef CONFIG_NUMA_EMU
|
||||
if (cmdline && !numa_emulation(start_pfn, last_pfn))
|
||||
if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
|
||||
return;
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
|
||||
last_pfn << PAGE_SHIFT))
|
||||
if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
|
||||
last_pfn << PAGE_SHIFT))
|
||||
return;
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_K8_NUMA
|
||||
if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
|
||||
last_pfn<<PAGE_SHIFT))
|
||||
if (!numa_off && k8 && !k8_scan_nodes())
|
||||
return;
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt)
|
||||
early_param("numa", numa_setup);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
static __init int find_near_online_node(int node)
|
||||
{
|
||||
int n, val;
|
||||
int min_val = INT_MAX;
|
||||
int best_node = -1;
|
||||
|
||||
for_each_online_node(n) {
|
||||
val = node_distance(node, n);
|
||||
|
||||
if (val < min_val) {
|
||||
min_val = val;
|
||||
best_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
return best_node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup early cpu_to_node.
|
||||
*
|
||||
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void)
|
||||
if (node == NUMA_NO_NODE)
|
||||
continue;
|
||||
if (!node_online(node))
|
||||
continue;
|
||||
node = find_near_online_node(node);
|
||||
numa_set_node(cpu, node);
|
||||
}
|
||||
}
|
||||
|
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
|
||||
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
|
||||
pgprot_val(forbidden) |= _PAGE_RW;
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
|
||||
/*
|
||||
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
|
||||
* kernel text mappings for the large page aligned text, rodata sections
|
||||
* will be always read-only. For the kernel identity mappings covering
|
||||
* the holes caused by this alignment can be anything that user asks.
|
||||
*
|
||||
* This will preserve the large page mappings for kernel text/data
|
||||
* at no extra cost.
|
||||
*/
|
||||
if (kernel_set_to_readonly &&
|
||||
within(address, (unsigned long)_text,
|
||||
(unsigned long)__end_rodata_hpage_align))
|
||||
pgprot_val(forbidden) |= _PAGE_RW;
|
||||
#endif
|
||||
|
||||
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
|
||||
|
||||
return prot;
|
||||
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
|
||||
|
||||
int set_memory_x(unsigned long addr, int numpages)
|
||||
{
|
||||
if (!(__supported_pte_mask & _PAGE_NX))
|
||||
return 0;
|
||||
|
||||
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
||||
}
|
||||
EXPORT_SYMBOL(set_memory_x);
|
||||
|
||||
int set_memory_nx(unsigned long addr, int numpages)
|
||||
{
|
||||
if (!(__supported_pte_mask & _PAGE_NX))
|
||||
return 0;
|
||||
|
||||
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
||||
}
|
||||
EXPORT_SYMBOL(set_memory_nx);
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/fcntl.h>
|
||||
#include <asm/e820.h>
|
||||
@@ -388,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
|
||||
}
|
||||
|
||||
/* Low ISA region is always mapped WB in page table. No need to track */
|
||||
if (is_ISA_range(start, end - 1)) {
|
||||
if (x86_platform.is_untracked_pat_range(start, end)) {
|
||||
if (new_type)
|
||||
*new_type = _PAGE_CACHE_WB;
|
||||
return 0;
|
||||
@@ -499,7 +500,7 @@ int free_memtype(u64 start, u64 end)
|
||||
return 0;
|
||||
|
||||
/* Low ISA region is always mapped WB. No need to track */
|
||||
if (is_ISA_range(start, end - 1))
|
||||
if (x86_platform.is_untracked_pat_range(start, end))
|
||||
return 0;
|
||||
|
||||
is_range_ram = pat_pagerange_is_ram(start, end);
|
||||
@@ -582,7 +583,7 @@ static unsigned long lookup_memtype(u64 paddr)
|
||||
int rettype = _PAGE_CACHE_WB;
|
||||
struct memtype *entry;
|
||||
|
||||
if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
|
||||
if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
|
||||
return rettype;
|
||||
|
||||
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
|
||||
@@ -1018,8 +1019,10 @@ static const struct file_operations memtype_fops = {
|
||||
|
||||
static int __init pat_memtype_list_init(void)
|
||||
{
|
||||
debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
|
||||
NULL, &memtype_fops);
|
||||
if (pat_enabled) {
|
||||
debugfs_create_file("pat_memtype_list", S_IRUSR,
|
||||
arch_debugfs_dir, NULL, &memtype_fops);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -3,10 +3,8 @@
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
int nx_enabled;
|
||||
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
static int disable_nx __cpuinitdata;
|
||||
|
||||
/*
|
||||
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str)
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
if (!strncmp(str, "on", 2)) {
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
disable_nx = 0;
|
||||
} else if (!strncmp(str, "off", 3)) {
|
||||
disable_nx = 1;
|
||||
__supported_pte_mask &= ~_PAGE_NX;
|
||||
}
|
||||
x86_configure_nx();
|
||||
return 0;
|
||||
}
|
||||
early_param("noexec", noexec_setup);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
void __init set_nx(void)
|
||||
void __cpuinit x86_configure_nx(void)
|
||||
{
|
||||
unsigned int v[4], l, h;
|
||||
|
||||
if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
|
||||
cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
|
||||
|
||||
if ((v[3] & (1 << 20)) && !disable_nx) {
|
||||
rdmsr(MSR_EFER, l, h);
|
||||
l |= EFER_NX;
|
||||
wrmsr(MSR_EFER, l, h);
|
||||
nx_enabled = 1;
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void set_nx(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void __cpuinit check_efer(void)
|
||||
{
|
||||
unsigned long efer;
|
||||
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
if (!(efer & EFER_NX) || disable_nx)
|
||||
if (cpu_has_nx && !disable_nx)
|
||||
__supported_pte_mask |= _PAGE_NX;
|
||||
else
|
||||
__supported_pte_mask &= ~_PAGE_NX;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __init x86_report_nx(void)
|
||||
{
|
||||
if (!cpu_has_nx) {
|
||||
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
|
||||
"missing in CPU or disabled in BIOS!\n");
|
||||
} else {
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
if (disable_nx) {
|
||||
printk(KERN_INFO "NX (Execute Disable) protection: "
|
||||
"disabled by kernel command line option\n");
|
||||
} else {
|
||||
printk(KERN_INFO "NX (Execute Disable) protection: "
|
||||
"active\n");
|
||||
}
|
||||
#else
|
||||
/* 32bit non-PAE kernel, NX cannot be used */
|
||||
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
|
||||
"cannot be enabled: non-PAE kernel!\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
|
||||
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
|
||||
start, end);
|
||||
e820_register_active_regions(node, start >> PAGE_SHIFT,
|
||||
end >> PAGE_SHIFT);
|
||||
|
||||
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
|
||||
update_nodes_add(node, start, end);
|
||||
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
|
||||
|
||||
void __init acpi_numa_arch_fixup(void) {}
|
||||
|
||||
int __init acpi_get_nodes(struct bootnode *physnodes)
|
||||
{
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
for_each_node_mask(i, nodes_parsed) {
|
||||
physnodes[ret].start = nodes[i].start;
|
||||
physnodes[ret].end = nodes[i].end;
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Use the information discovered above to actually set up the nodes. */
|
||||
int __init acpi_scan_nodes(unsigned long start, unsigned long end)
|
||||
{
|
||||
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
|
||||
for (i = 0; i < MAX_NUMNODES; i++)
|
||||
cutoff_node(i, start, end);
|
||||
|
||||
if (!nodes_cover_memory(nodes)) {
|
||||
bad_srat();
|
||||
return -1;
|
||||
}
|
||||
|
||||
memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
|
||||
memblk_nodeid);
|
||||
if (memnode_shift < 0) {
|
||||
@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
|
||||
return -1;
|
||||
}
|
||||
|
||||
for_each_node_mask(i, nodes_parsed)
|
||||
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
|
||||
nodes[i].end >> PAGE_SHIFT);
|
||||
if (!nodes_cover_memory(nodes)) {
|
||||
bad_srat();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Account for nodes with cpus and no memory */
|
||||
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
|
||||
|
||||
@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
|
||||
for (i = 0; i < num_nodes; i++)
|
||||
if (fake_nodes[i].start != fake_nodes[i].end)
|
||||
node_set(i, nodes_parsed);
|
||||
WARN_ON(!nodes_cover_memory(fake_nodes));
|
||||
}
|
||||
|
||||
static int null_slit_node_compare(int a, int b)
|
||||
|
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
@@ -43,7 +44,7 @@ union smp_flush_state {
|
||||
spinlock_t tlbstate_lock;
|
||||
DECLARE_BITMAP(flush_cpumask, NR_CPUS);
|
||||
};
|
||||
char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
|
||||
char pad[INTERNODE_CACHE_BYTES];
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
/* State is put into the per CPU data section, but padded
|
||||
|
在新工单中引用
屏蔽一个用户