Merge branch 'akpm' (more incoming from Andrew)
Merge second patch-bomb from Andrew Morton: - A little DM fix - the MM queue * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (154 commits) ksm: allocate roots when needed mm: cleanup "swapcache" in do_swap_page mm,ksm: swapoff might need to copy mm,ksm: FOLL_MIGRATION do migration_entry_wait ksm: shrink 32-bit rmap_item back to 32 bytes ksm: treat unstable nid like in stable tree ksm: add some comments tmpfs: fix mempolicy object leaks tmpfs: fix use-after-free of mempolicy object mm/fadvise.c: drain all pagevecs if POSIX_FADV_DONTNEED fails to discard all pages mm: export mmu notifier invalidates mm: accelerate mm_populate() treatment of THP pages mm: use long type for page counts in mm_populate() and get_user_pages() mm: accurately document nr_free_*_pages functions with code comments HWPOISON: change order of error_states[]'s elements HWPOISON: fix misjudgement of page_action() for errors on mlocked pages memcg: stop warning on memcg_propagate_kmem net: change type of virtio_chan->p9_max_pages vmscan: change type of vm_total_pages to unsigned long fs/nfsd: change type of max_delegations, nfsd_drc_max_mem and nfsd_drc_mem_used ...
Šī revīzija ir iekļauta:
@@ -57,8 +57,8 @@ static inline int numa_cpu_node(int cpu)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern void __cpuinit numa_set_node(int cpu, int node);
|
||||
extern void __cpuinit numa_clear_node(int cpu);
|
||||
extern void numa_set_node(int cpu, int node);
|
||||
extern void numa_clear_node(int cpu);
|
||||
extern void __init init_cpu_to_node(void);
|
||||
extern void __cpuinit numa_add_cpu(int cpu);
|
||||
extern void __cpuinit numa_remove_cpu(int cpu);
|
||||
|
@@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
|
||||
* as a pte too.
|
||||
*/
|
||||
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
|
||||
extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
|
||||
extern phys_addr_t slow_virt_to_phys(void *__address);
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
@@ -696,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic);
|
||||
|
||||
int acpi_unmap_lsapic(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
|
||||
#endif
|
||||
|
||||
per_cpu(x86_cpu_to_apicid, cpu) = -1;
|
||||
set_cpu_present(cpu, false);
|
||||
num_processors--;
|
||||
|
@@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p)
|
||||
setup_bios_corruption_check();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In the memory hotplug case, the kernel needs info from SRAT to
|
||||
* determine which memory is hotpluggable before allocating memory
|
||||
* using memblock.
|
||||
*/
|
||||
acpi_boot_table_init();
|
||||
early_acpi_boot_init();
|
||||
early_parse_srat();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
|
||||
(max_pfn_mapped<<PAGE_SHIFT) - 1);
|
||||
@@ -1101,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
/*
|
||||
* Parse the ACPI tables for possible boot-time SMP configuration.
|
||||
*/
|
||||
acpi_boot_table_init();
|
||||
|
||||
early_acpi_boot_init();
|
||||
|
||||
initmem_init();
|
||||
memblock_find_dma_reserve();
|
||||
|
||||
|
@@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
|
||||
|
||||
return __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
int arch_remove_memory(u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct zone *zone;
|
||||
|
||||
zone = page_zone(pfn_to_page(start_pfn));
|
||||
return __remove_pages(zone, start_pfn, nr_pages);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@@ -707,6 +707,343 @@ int arch_add_memory(int nid, u64 start, u64 size)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_add_memory);
|
||||
|
||||
#define PAGE_INUSE 0xFD
|
||||
|
||||
static void __meminit free_pagetable(struct page *page, int order)
|
||||
{
|
||||
struct zone *zone;
|
||||
bool bootmem = false;
|
||||
unsigned long magic;
|
||||
unsigned int nr_pages = 1 << order;
|
||||
|
||||
/* bootmem page has reserved flag */
|
||||
if (PageReserved(page)) {
|
||||
__ClearPageReserved(page);
|
||||
bootmem = true;
|
||||
|
||||
magic = (unsigned long)page->lru.next;
|
||||
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
|
||||
while (nr_pages--)
|
||||
put_page_bootmem(page++);
|
||||
} else
|
||||
__free_pages_bootmem(page, order);
|
||||
} else
|
||||
free_pages((unsigned long)page_address(page), order);
|
||||
|
||||
/*
|
||||
* SECTION_INFO pages and MIX_SECTION_INFO pages
|
||||
* are all allocated by bootmem.
|
||||
*/
|
||||
if (bootmem) {
|
||||
zone = page_zone(page);
|
||||
zone_span_writelock(zone);
|
||||
zone->present_pages += nr_pages;
|
||||
zone_span_writeunlock(zone);
|
||||
totalram_pages += nr_pages;
|
||||
}
|
||||
}
|
||||
|
||||
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
|
||||
{
|
||||
pte_t *pte;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
pte = pte_start + i;
|
||||
if (pte_val(*pte))
|
||||
return;
|
||||
}
|
||||
|
||||
/* free a pte talbe */
|
||||
free_pagetable(pmd_page(*pmd), 0);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
pmd = pmd_start + i;
|
||||
if (pmd_val(*pmd))
|
||||
return;
|
||||
}
|
||||
|
||||
/* free a pmd talbe */
|
||||
free_pagetable(pud_page(*pud), 0);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
/* Return true if pgd is changed, otherwise return false. */
|
||||
static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
|
||||
{
|
||||
pud_t *pud;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
pud = pud_start + i;
|
||||
if (pud_val(*pud))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* free a pud table */
|
||||
free_pagetable(pgd_page(*pgd), 0);
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_clear(pgd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pte_t *pte;
|
||||
void *page_addr;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
pte = pte_start + pte_index(addr);
|
||||
for (; addr < end; addr = next, pte++) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (!pte_present(*pte))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We mapped [0,1G) memory as identity mapping when
|
||||
* initializing, in arch/x86/kernel/head_64.S. These
|
||||
* pagetables cannot be removed.
|
||||
*/
|
||||
phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
|
||||
if (phys_addr < (phys_addr_t)0x40000000)
|
||||
return;
|
||||
|
||||
if (IS_ALIGNED(addr, PAGE_SIZE) &&
|
||||
IS_ALIGNED(next, PAGE_SIZE)) {
|
||||
/*
|
||||
* Do not free direct mapping pages since they were
|
||||
* freed when offlining, or simplely not in use.
|
||||
*/
|
||||
if (!direct)
|
||||
free_pagetable(pte_page(*pte), 0);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
|
||||
/* For non-direct mapping, pages means nothing. */
|
||||
pages++;
|
||||
} else {
|
||||
/*
|
||||
* If we are here, we are freeing vmemmap pages since
|
||||
* direct mapped memory ranges to be freed are aligned.
|
||||
*
|
||||
* If we are not removing the whole page, it means
|
||||
* other page structs in this page are being used and
|
||||
* we canot remove them. So fill the unused page_structs
|
||||
* with 0xFD, and remove the page when it is wholly
|
||||
* filled with 0xFD.
|
||||
*/
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pte_page(*pte));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
|
||||
free_pagetable(pte_page(*pte), 0);
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Call free_pte_table() in remove_pmd_table(). */
|
||||
flush_tlb_all();
|
||||
if (direct)
|
||||
update_page_count(PG_LEVEL_4K, -pages);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pte_t *pte_base;
|
||||
pmd_t *pmd;
|
||||
void *page_addr;
|
||||
|
||||
pmd = pmd_start + pmd_index(addr);
|
||||
for (; addr < end; addr = next, pmd++) {
|
||||
next = pmd_addr_end(addr, end);
|
||||
|
||||
if (!pmd_present(*pmd))
|
||||
continue;
|
||||
|
||||
if (pmd_large(*pmd)) {
|
||||
if (IS_ALIGNED(addr, PMD_SIZE) &&
|
||||
IS_ALIGNED(next, PMD_SIZE)) {
|
||||
if (!direct)
|
||||
free_pagetable(pmd_page(*pmd),
|
||||
get_order(PMD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pages++;
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pmd_page(*pmd));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PMD_SIZE)) {
|
||||
free_pagetable(pmd_page(*pmd),
|
||||
get_order(PMD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
|
||||
remove_pte_table(pte_base, addr, next, direct);
|
||||
free_pte_table(pte_base, pmd);
|
||||
}
|
||||
|
||||
/* Call free_pmd_table() in remove_pud_table(). */
|
||||
if (direct)
|
||||
update_page_count(PG_LEVEL_2M, -pages);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
||||
bool direct)
|
||||
{
|
||||
unsigned long next, pages = 0;
|
||||
pmd_t *pmd_base;
|
||||
pud_t *pud;
|
||||
void *page_addr;
|
||||
|
||||
pud = pud_start + pud_index(addr);
|
||||
for (; addr < end; addr = next, pud++) {
|
||||
next = pud_addr_end(addr, end);
|
||||
|
||||
if (!pud_present(*pud))
|
||||
continue;
|
||||
|
||||
if (pud_large(*pud)) {
|
||||
if (IS_ALIGNED(addr, PUD_SIZE) &&
|
||||
IS_ALIGNED(next, PUD_SIZE)) {
|
||||
if (!direct)
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
pages++;
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pud_page(*pud));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PUD_SIZE)) {
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE));
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
pmd_base = (pmd_t *)pud_page_vaddr(*pud);
|
||||
remove_pmd_table(pmd_base, addr, next, direct);
|
||||
free_pmd_table(pmd_base, pud);
|
||||
}
|
||||
|
||||
if (direct)
|
||||
update_page_count(PG_LEVEL_1G, -pages);
|
||||
}
|
||||
|
||||
/* start and end are both virtual address. */
|
||||
static void __meminit
|
||||
remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
{
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
bool pgd_changed = false;
|
||||
|
||||
for (; start < end; start = next) {
|
||||
next = pgd_addr_end(start, end);
|
||||
|
||||
pgd = pgd_offset_k(start);
|
||||
if (!pgd_present(*pgd))
|
||||
continue;
|
||||
|
||||
pud = (pud_t *)pgd_page_vaddr(*pgd);
|
||||
remove_pud_table(pud, start, next, direct);
|
||||
if (free_pud_table(pud, pgd))
|
||||
pgd_changed = true;
|
||||
}
|
||||
|
||||
if (pgd_changed)
|
||||
sync_global_pgds(start, end - 1);
|
||||
|
||||
flush_tlb_all();
|
||||
}
|
||||
|
||||
void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long start = (unsigned long)memmap;
|
||||
unsigned long end = (unsigned long)(memmap + nr_pages);
|
||||
|
||||
remove_pagetable(start, end, false);
|
||||
}
|
||||
|
||||
static void __meminit
|
||||
kernel_physical_mapping_remove(unsigned long start, unsigned long end)
|
||||
{
|
||||
start = (unsigned long)__va(start);
|
||||
end = (unsigned long)__va(end);
|
||||
|
||||
remove_pagetable(start, end, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
int __ref arch_remove_memory(u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct zone *zone;
|
||||
int ret;
|
||||
|
||||
zone = page_zone(pfn_to_page(start_pfn));
|
||||
kernel_physical_mapping_remove(start, start + size);
|
||||
ret = __remove_pages(zone, start_pfn, nr_pages);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
static struct kcore_list kcore_vsyscall;
|
||||
@@ -1019,6 +1356,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
struct page *start_page, unsigned long size)
|
||||
{
|
||||
unsigned long addr = (unsigned long)start_page;
|
||||
unsigned long end = (unsigned long)(start_page + size);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
unsigned int nr_pages;
|
||||
struct page *page;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
pte_t *pte = NULL;
|
||||
|
||||
pgd = pgd_offset_k(addr);
|
||||
if (pgd_none(*pgd)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
}
|
||||
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
if (pud_none(*pud)) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
continue;
|
||||
}
|
||||
get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
|
||||
|
||||
if (!cpu_has_pse) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
get_page_bootmem(section_nr, pmd_page(*pmd),
|
||||
MIX_SECTION_INFO);
|
||||
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
if (pte_none(*pte))
|
||||
continue;
|
||||
get_page_bootmem(section_nr, pte_page(*pte),
|
||||
SECTION_INFO);
|
||||
} else {
|
||||
next = pmd_addr_end(addr, end);
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
nr_pages = 1 << (get_order(PMD_SIZE));
|
||||
page = pmd_page(*pmd);
|
||||
while (nr_pages--)
|
||||
get_page_bootmem(section_nr, page++,
|
||||
SECTION_INFO);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void __meminit vmemmap_populate_print_last(void)
|
||||
{
|
||||
if (p_start) {
|
||||
|
@@ -56,7 +56,7 @@ early_param("numa", numa_setup);
|
||||
/*
|
||||
* apicid, cpu, node mappings
|
||||
*/
|
||||
s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
|
||||
s16 __apicid_to_node[MAX_LOCAL_APIC] = {
|
||||
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
|
||||
};
|
||||
|
||||
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
|
||||
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
|
||||
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
|
||||
|
||||
void __cpuinit numa_set_node(int cpu, int node)
|
||||
void numa_set_node(int cpu, int node)
|
||||
{
|
||||
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
|
||||
|
||||
@@ -101,7 +101,7 @@ void __cpuinit numa_set_node(int cpu, int node)
|
||||
set_cpu_numa_node(cpu, node);
|
||||
}
|
||||
|
||||
void __cpuinit numa_clear_node(int cpu)
|
||||
void numa_clear_node(int cpu)
|
||||
{
|
||||
numa_set_node(cpu, NUMA_NO_NODE);
|
||||
}
|
||||
@@ -213,10 +213,9 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
|
||||
* Allocate node data. Try node-local memory and then any node.
|
||||
* Never allocate in DMA zone.
|
||||
*/
|
||||
nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
|
||||
nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
|
||||
if (!nd_pa) {
|
||||
pr_err("Cannot find %zu bytes in node %d\n",
|
||||
nd_size, nid);
|
||||
pr_err("Cannot find %zu bytes in any node\n", nd_size);
|
||||
return;
|
||||
}
|
||||
nd = __va(nd_pa);
|
||||
@@ -561,10 +560,12 @@ static int __init numa_init(int (*init_func)(void))
|
||||
for (i = 0; i < MAX_LOCAL_APIC; i++)
|
||||
set_apicid_to_node(i, NUMA_NO_NODE);
|
||||
|
||||
nodes_clear(numa_nodes_parsed);
|
||||
/*
|
||||
* Do not clear numa_nodes_parsed or zero numa_meminfo here, because
|
||||
* SRAT was parsed earlier in early_parse_srat().
|
||||
*/
|
||||
nodes_clear(node_possible_map);
|
||||
nodes_clear(node_online_map);
|
||||
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
|
||||
WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
|
||||
numa_reset_distance();
|
||||
|
||||
|
@@ -529,21 +529,13 @@ out_unlock:
|
||||
return do_split;
|
||||
}
|
||||
|
||||
static int split_large_page(pte_t *kpte, unsigned long address)
|
||||
int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
|
||||
{
|
||||
unsigned long pfn, pfninc = 1;
|
||||
unsigned int i, level;
|
||||
pte_t *pbase, *tmp;
|
||||
pte_t *tmp;
|
||||
pgprot_t ref_prot;
|
||||
struct page *base;
|
||||
|
||||
if (!debug_pagealloc)
|
||||
spin_unlock(&cpa_lock);
|
||||
base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
|
||||
if (!debug_pagealloc)
|
||||
spin_lock(&cpa_lock);
|
||||
if (!base)
|
||||
return -ENOMEM;
|
||||
struct page *base = virt_to_page(pbase);
|
||||
|
||||
spin_lock(&pgd_lock);
|
||||
/*
|
||||
@@ -551,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
|
||||
* up for us already:
|
||||
*/
|
||||
tmp = lookup_address(address, &level);
|
||||
if (tmp != kpte)
|
||||
goto out_unlock;
|
||||
if (tmp != kpte) {
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
pbase = (pte_t *)page_address(base);
|
||||
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
|
||||
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
|
||||
/*
|
||||
@@ -601,21 +594,31 @@ static int split_large_page(pte_t *kpte, unsigned long address)
|
||||
* going on.
|
||||
*/
|
||||
__flush_tlb_all();
|
||||
|
||||
base = NULL;
|
||||
|
||||
out_unlock:
|
||||
/*
|
||||
* If we dropped out via the lookup_address check under
|
||||
* pgd_lock then stick the page back into the pool:
|
||||
*/
|
||||
if (base)
|
||||
__free_page(base);
|
||||
spin_unlock(&pgd_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int split_large_page(pte_t *kpte, unsigned long address)
|
||||
{
|
||||
pte_t *pbase;
|
||||
struct page *base;
|
||||
|
||||
if (!debug_pagealloc)
|
||||
spin_unlock(&cpa_lock);
|
||||
base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
|
||||
if (!debug_pagealloc)
|
||||
spin_lock(&cpa_lock);
|
||||
if (!base)
|
||||
return -ENOMEM;
|
||||
|
||||
pbase = (pte_t *)page_address(base);
|
||||
if (__split_large_page(kpte, address, pbase))
|
||||
__free_page(base);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
|
||||
int primary)
|
||||
{
|
||||
|
@@ -141,11 +141,126 @@ static inline int save_add_info(void) {return 1;}
|
||||
static inline int save_add_info(void) {return 0;}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
static void __init
|
||||
handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
|
||||
{
|
||||
int overlap, i;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
|
||||
start_pfn = PFN_DOWN(start);
|
||||
end_pfn = PFN_UP(end);
|
||||
|
||||
/*
|
||||
* For movablemem_map=acpi:
|
||||
*
|
||||
* SRAT: |_____| |_____| |_________| |_________| ......
|
||||
* node id: 0 1 1 2
|
||||
* hotpluggable: n y y n
|
||||
* movablemem_map: |_____| |_________|
|
||||
*
|
||||
* Using movablemem_map, we can prevent memblock from allocating memory
|
||||
* on ZONE_MOVABLE at boot time.
|
||||
*
|
||||
* Before parsing SRAT, memblock has already reserve some memory ranges
|
||||
* for other purposes, such as for kernel image. We cannot prevent
|
||||
* kernel from using these memory, so we need to exclude these memory
|
||||
* even if it is hotpluggable.
|
||||
* Furthermore, to ensure the kernel has enough memory to boot, we make
|
||||
* all the memory on the node which the kernel resides in
|
||||
* un-hotpluggable.
|
||||
*/
|
||||
if (hotpluggable && movablemem_map.acpi) {
|
||||
/* Exclude ranges reserved by memblock. */
|
||||
struct memblock_type *rgn = &memblock.reserved;
|
||||
|
||||
for (i = 0; i < rgn->cnt; i++) {
|
||||
if (end <= rgn->regions[i].base ||
|
||||
start >= rgn->regions[i].base +
|
||||
rgn->regions[i].size)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the memory range overlaps the memory reserved by
|
||||
* memblock, then the kernel resides in this node.
|
||||
*/
|
||||
node_set(node, movablemem_map.numa_nodes_kernel);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the kernel resides in this node, then the whole node
|
||||
* should not be hotpluggable.
|
||||
*/
|
||||
if (node_isset(node, movablemem_map.numa_nodes_kernel))
|
||||
goto out;
|
||||
|
||||
insert_movablemem_map(start_pfn, end_pfn);
|
||||
|
||||
/*
|
||||
* numa_nodes_hotplug nodemask represents which nodes are put
|
||||
* into movablemem_map.map[].
|
||||
*/
|
||||
node_set(node, movablemem_map.numa_nodes_hotplug);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* For movablemem_map=nn[KMG]@ss[KMG]:
|
||||
*
|
||||
* SRAT: |_____| |_____| |_________| |_________| ......
|
||||
* node id: 0 1 1 2
|
||||
* user specified: |__| |___|
|
||||
* movablemem_map: |___| |_________| |______| ......
|
||||
*
|
||||
* Using movablemem_map, we can prevent memblock from allocating memory
|
||||
* on ZONE_MOVABLE at boot time.
|
||||
*
|
||||
* NOTE: In this case, SRAT info will be ingored.
|
||||
*/
|
||||
overlap = movablemem_map_overlap(start_pfn, end_pfn);
|
||||
if (overlap >= 0) {
|
||||
/*
|
||||
* If part of this range is in movablemem_map, we need to
|
||||
* add the range after it to extend the range to the end
|
||||
* of the node, because from the min address specified to
|
||||
* the end of the node will be ZONE_MOVABLE.
|
||||
*/
|
||||
start_pfn = max(start_pfn,
|
||||
movablemem_map.map[overlap].start_pfn);
|
||||
insert_movablemem_map(start_pfn, end_pfn);
|
||||
|
||||
/*
|
||||
* Set the nodemask, so that if the address range on one node
|
||||
* is not continuse, we can add the subsequent ranges on the
|
||||
* same node into movablemem_map.
|
||||
*/
|
||||
node_set(node, movablemem_map.numa_nodes_hotplug);
|
||||
} else {
|
||||
if (node_isset(node, movablemem_map.numa_nodes_hotplug))
|
||||
/*
|
||||
* Insert the range if we already have movable ranges
|
||||
* on the same node.
|
||||
*/
|
||||
insert_movablemem_map(start_pfn, end_pfn);
|
||||
}
|
||||
out:
|
||||
return;
|
||||
}
|
||||
#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
||||
static inline void
|
||||
handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
|
||||
|
||||
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
|
||||
int __init
|
||||
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
{
|
||||
u64 start, end;
|
||||
u32 hotpluggable;
|
||||
int node, pxm;
|
||||
|
||||
if (srat_disabled())
|
||||
@@ -154,7 +269,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
goto out_err_bad_srat;
|
||||
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
|
||||
goto out_err;
|
||||
if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
|
||||
hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
|
||||
if (hotpluggable && !save_add_info())
|
||||
goto out_err;
|
||||
|
||||
start = ma->base_address;
|
||||
@@ -174,9 +290,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||
|
||||
node_set(node, numa_nodes_parsed);
|
||||
|
||||
printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
|
||||
printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
|
||||
node, pxm,
|
||||
(unsigned long long) start, (unsigned long long) end - 1);
|
||||
(unsigned long long) start, (unsigned long long) end - 1,
|
||||
hotpluggable ? "Hot Pluggable": "");
|
||||
|
||||
handle_movablemem(node, start, end, hotpluggable);
|
||||
|
||||
return 0;
|
||||
out_err_bad_srat:
|
||||
|
Atsaukties uz šo jaunā problēmā
Block a user