x86, mm: introduce vmem_altmap to augment vmemmap_populate()
In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reported-by: kbuild test robot <lkp@intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Esse commit está contido em:
@@ -17,6 +17,7 @@
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/memory_hotplug.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/vmalloc.h>
|
||||
@@ -506,10 +507,25 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
|
||||
unsigned long i;
|
||||
int err = 0;
|
||||
int start_sec, end_sec;
|
||||
struct vmem_altmap *altmap;
|
||||
|
||||
/* during initialize mem_map, align hot-added range to section */
|
||||
start_sec = pfn_to_section_nr(phys_start_pfn);
|
||||
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
|
||||
|
||||
altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
|
||||
if (altmap) {
|
||||
/*
|
||||
* Validate altmap is within bounds of the total request
|
||||
*/
|
||||
if (altmap->base_pfn != phys_start_pfn
|
||||
|| vmem_altmap_offset(altmap) > nr_pages) {
|
||||
pr_warn_once("memory add fail, invalid altmap\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
altmap->alloc = 0;
|
||||
}
|
||||
|
||||
for (i = start_sec; i <= end_sec; i++) {
|
||||
err = __add_section(nid, zone, section_nr_to_pfn(i));
|
||||
|
||||
@@ -731,7 +747,8 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
|
||||
pgdat_resize_unlock(zone->zone_pgdat, &flags);
|
||||
}
|
||||
|
||||
static int __remove_section(struct zone *zone, struct mem_section *ms)
|
||||
static int __remove_section(struct zone *zone, struct mem_section *ms,
|
||||
unsigned long map_offset)
|
||||
{
|
||||
unsigned long start_pfn;
|
||||
int scn_nr;
|
||||
@@ -748,7 +765,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
|
||||
start_pfn = section_nr_to_pfn(scn_nr);
|
||||
__remove_zone(zone, start_pfn);
|
||||
|
||||
sparse_remove_one_section(zone, ms);
|
||||
sparse_remove_one_section(zone, ms, map_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -767,9 +784,32 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
unsigned long i;
|
||||
int sections_to_remove;
|
||||
resource_size_t start, size;
|
||||
int ret = 0;
|
||||
unsigned long map_offset = 0;
|
||||
int sections_to_remove, ret = 0;
|
||||
|
||||
/* In the ZONE_DEVICE case device driver owns the memory region */
|
||||
if (is_dev_zone(zone)) {
|
||||
struct page *page = pfn_to_page(phys_start_pfn);
|
||||
struct vmem_altmap *altmap;
|
||||
|
||||
altmap = to_vmem_altmap((unsigned long) page);
|
||||
if (altmap)
|
||||
map_offset = vmem_altmap_offset(altmap);
|
||||
} else {
|
||||
resource_size_t start, size;
|
||||
|
||||
start = phys_start_pfn << PAGE_SHIFT;
|
||||
size = nr_pages * PAGE_SIZE;
|
||||
|
||||
ret = release_mem_region_adjustable(&iomem_resource, start,
|
||||
size);
|
||||
if (ret) {
|
||||
resource_size_t endres = start + size - 1;
|
||||
|
||||
pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
|
||||
&start, &endres, ret);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only remove entire sections
|
||||
@@ -777,23 +817,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
|
||||
BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
|
||||
BUG_ON(nr_pages % PAGES_PER_SECTION);
|
||||
|
||||
start = phys_start_pfn << PAGE_SHIFT;
|
||||
size = nr_pages * PAGE_SIZE;
|
||||
|
||||
/* in the ZONE_DEVICE case device driver owns the memory region */
|
||||
if (!is_dev_zone(zone))
|
||||
ret = release_mem_region_adjustable(&iomem_resource, start, size);
|
||||
if (ret) {
|
||||
resource_size_t endres = start + size - 1;
|
||||
|
||||
pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
|
||||
&start, &endres, ret);
|
||||
}
|
||||
|
||||
sections_to_remove = nr_pages / PAGES_PER_SECTION;
|
||||
for (i = 0; i < sections_to_remove; i++) {
|
||||
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
|
||||
ret = __remove_section(zone, __pfn_to_section(pfn));
|
||||
|
||||
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
|
||||
map_offset = 0;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
@@ -43,6 +43,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/pfn.h>
|
||||
@@ -4485,8 +4486,9 @@ static inline unsigned long wait_table_bits(unsigned long size)
|
||||
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
||||
unsigned long start_pfn, enum memmap_context context)
|
||||
{
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
|
||||
unsigned long end_pfn = start_pfn + size;
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
unsigned long pfn;
|
||||
struct zone *z;
|
||||
unsigned long nr_initialised = 0;
|
||||
@@ -4494,6 +4496,13 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
||||
if (highest_memmap_pfn < end_pfn - 1)
|
||||
highest_memmap_pfn = end_pfn - 1;
|
||||
|
||||
/*
|
||||
* Honor reservation requested by the driver for this ZONE_DEVICE
|
||||
* memory
|
||||
*/
|
||||
if (altmap && start_pfn == altmap->base_pfn)
|
||||
start_pfn += altmap->reserve;
|
||||
|
||||
z = &pgdat->node_zones[zone];
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
|
||||
/*
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
@@ -70,7 +71,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
|
||||
}
|
||||
|
||||
/* need to make sure size is all the same during early stage */
|
||||
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
|
||||
static void * __meminit alloc_block_buf(unsigned long size, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
@@ -87,6 +88,77 @@ void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
|
||||
{
|
||||
return altmap->base_pfn + altmap->reserve + altmap->alloc
|
||||
+ altmap->align;
|
||||
}
|
||||
|
||||
static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
|
||||
{
|
||||
unsigned long allocated = altmap->alloc + altmap->align;
|
||||
|
||||
if (altmap->free > allocated)
|
||||
return altmap->free - allocated;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
|
||||
* @altmap - reserved page pool for the allocation
|
||||
* @nr_pfns - size (in pages) of the allocation
|
||||
*
|
||||
* Allocations are aligned to the size of the request
|
||||
*/
|
||||
static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
|
||||
unsigned long nr_pfns)
|
||||
{
|
||||
unsigned long pfn = vmem_altmap_next_pfn(altmap);
|
||||
unsigned long nr_align;
|
||||
|
||||
nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
|
||||
nr_align = ALIGN(pfn, nr_align) - pfn;
|
||||
|
||||
if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
|
||||
return ULONG_MAX;
|
||||
altmap->alloc += nr_pfns;
|
||||
altmap->align += nr_align;
|
||||
return pfn + nr_align;
|
||||
}
|
||||
|
||||
static void * __meminit altmap_alloc_block_buf(unsigned long size,
|
||||
struct vmem_altmap *altmap)
|
||||
{
|
||||
unsigned long pfn, nr_pfns;
|
||||
void *ptr;
|
||||
|
||||
if (size & ~PAGE_MASK) {
|
||||
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
|
||||
__func__, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
nr_pfns = size >> PAGE_SHIFT;
|
||||
pfn = vmem_altmap_alloc(altmap, nr_pfns);
|
||||
if (pfn < ULONG_MAX)
|
||||
ptr = __va(__pfn_to_phys(pfn));
|
||||
else
|
||||
ptr = NULL;
|
||||
pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
|
||||
__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* need to make sure size is all the same during early stage */
|
||||
void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
|
||||
struct vmem_altmap *altmap)
|
||||
{
|
||||
if (altmap)
|
||||
return altmap_alloc_block_buf(size, altmap);
|
||||
return alloc_block_buf(size, node);
|
||||
}
|
||||
|
||||
void __meminit vmemmap_verify(pte_t *pte, int node,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
@@ -103,7 +175,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
|
||||
pte_t *pte = pte_offset_kernel(pmd, addr);
|
||||
if (pte_none(*pte)) {
|
||||
pte_t entry;
|
||||
void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
|
||||
void *p = alloc_block_buf(PAGE_SIZE, node);
|
||||
if (!p)
|
||||
return NULL;
|
||||
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
|
||||
|
@@ -748,7 +748,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
|
||||
if (!memmap)
|
||||
return;
|
||||
|
||||
for (i = 0; i < PAGES_PER_SECTION; i++) {
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (PageHWPoison(&memmap[i])) {
|
||||
atomic_long_sub(1, &num_poisoned_pages);
|
||||
ClearPageHWPoison(&memmap[i]);
|
||||
@@ -788,7 +788,8 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
|
||||
free_map_bootmem(memmap);
|
||||
}
|
||||
|
||||
void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
|
||||
void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
|
||||
unsigned long map_offset)
|
||||
{
|
||||
struct page *memmap = NULL;
|
||||
unsigned long *usemap = NULL, flags;
|
||||
@@ -804,7 +805,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
|
||||
}
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
|
||||
clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION);
|
||||
clear_hwpoisoned_pages(memmap + map_offset,
|
||||
PAGES_PER_SECTION - map_offset);
|
||||
free_section_usemap(memmap, usemap);
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
Referência em uma nova issue
Block a user