Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (46 commits) powerpc64: convert to dynamic percpu allocator sparc64: use embedding percpu first chunk allocator percpu: kill lpage first chunk allocator x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA percpu: update embedding first chunk allocator to handle sparse units percpu: use group information to allocate vmap areas sparsely vmalloc: implement pcpu_get_vm_areas() vmalloc: separate out insert_vmalloc_vm() percpu: add chunk->base_addr percpu: add pcpu_unit_offsets[] percpu: introduce pcpu_alloc_info and pcpu_group_info percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward percpu: add @align to pcpu_fc_alloc_fn_t percpu: make @dyn_size mandatory for pcpu_setup_first_chunk() percpu: drop @static_size from first chunk allocators percpu: generalize first chunk allocator selection percpu: build first chunk allocators selectively percpu: rename 4k first chunk allocator to page percpu: improve boot messages percpu: fix pcpu_reclaim() locking ... Fix trivial conflict as by Tejun Heo in kernel/sched.c
This commit is contained in:
@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
|
||||
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
||||
obj-$(CONFIG_FS_XIP) += filemap_xip.o
|
||||
obj-$(CONFIG_MIGRATION) += migrate.o
|
||||
ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
|
||||
ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
|
||||
obj-$(CONFIG_SMP) += percpu.o
|
||||
else
|
||||
obj-$(CONFIG_SMP) += allocpercpu.o
|
||||
|
@@ -5,6 +5,8 @@
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
#ifndef cache_line_size
|
||||
#define cache_line_size() L1_CACHE_BYTES
|
||||
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
|
||||
kfree(__percpu_disguise(__pdata));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(free_percpu);
|
||||
|
||||
/*
|
||||
* Generic percpu area setup.
|
||||
*/
|
||||
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
||||
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
||||
|
||||
EXPORT_SYMBOL(__per_cpu_offset);
|
||||
|
||||
void __init setup_per_cpu_areas(void)
|
||||
{
|
||||
unsigned long size, i;
|
||||
char *ptr;
|
||||
unsigned long nr_possible_cpus = num_possible_cpus();
|
||||
|
||||
/* Copy section for each CPU (we discard the original) */
|
||||
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
|
||||
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
__per_cpu_offset[i] = ptr - __per_cpu_start;
|
||||
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
|
||||
ptr += size;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
|
||||
|
@@ -36,7 +36,7 @@ struct test_node {
|
||||
};
|
||||
|
||||
static LIST_HEAD(test_list);
|
||||
static DEFINE_PER_CPU(void *, test_pointer);
|
||||
static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
|
||||
|
||||
/*
|
||||
* Some very simple testing. This function needs to be extended for
|
||||
@@ -86,9 +86,9 @@ static int __init kmemleak_test_init(void)
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL);
|
||||
per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
|
||||
pr_info("kmemleak: kmalloc(129) = %p\n",
|
||||
per_cpu(test_pointer, i));
|
||||
per_cpu(kmemleak_test_pointer, i));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -604,6 +604,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
|
||||
}
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
|
||||
|
||||
/**
|
||||
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
|
||||
* @mapping: address_space which was dirtied
|
||||
@@ -621,7 +623,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
|
||||
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
|
||||
unsigned long nr_pages_dirtied)
|
||||
{
|
||||
static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
|
||||
unsigned long ratelimit;
|
||||
unsigned long *p;
|
||||
|
||||
@@ -634,7 +635,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
|
||||
* tasks in balance_dirty_pages(). Period.
|
||||
*/
|
||||
preempt_disable();
|
||||
p = &__get_cpu_var(ratelimits);
|
||||
p = &__get_cpu_var(bdp_ratelimits);
|
||||
*p += nr_pages_dirtied;
|
||||
if (unlikely(*p >= ratelimit)) {
|
||||
*p = 0;
|
||||
|
1416
mm/percpu.c
1416
mm/percpu.c
File diff suppressed because it is too large
Load Diff
@@ -19,7 +19,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/quicklist.h>
|
||||
|
||||
DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
|
||||
DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
|
||||
|
||||
#define FRACTION_OF_NODE_MEM 16
|
||||
|
||||
|
@@ -2111,8 +2111,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
|
||||
*/
|
||||
#define NR_KMEM_CACHE_CPU 100
|
||||
|
||||
static DEFINE_PER_CPU(struct kmem_cache_cpu,
|
||||
kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
|
||||
static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
|
||||
kmem_cache_cpu);
|
||||
|
||||
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
|
||||
static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
|
||||
|
338
mm/vmalloc.c
338
mm/vmalloc.c
@@ -265,6 +265,7 @@ struct vmap_area {
|
||||
static DEFINE_SPINLOCK(vmap_area_lock);
|
||||
static struct rb_root vmap_area_root = RB_ROOT;
|
||||
static LIST_HEAD(vmap_area_list);
|
||||
static unsigned long vmap_area_pcpu_hole;
|
||||
|
||||
static struct vmap_area *__find_vmap_area(unsigned long addr)
|
||||
{
|
||||
@@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va)
|
||||
RB_CLEAR_NODE(&va->rb_node);
|
||||
list_del_rcu(&va->list);
|
||||
|
||||
/*
|
||||
* Track the highest possible candidate for pcpu area
|
||||
* allocation. Areas outside of vmalloc area can be returned
|
||||
* here too, consider only end addresses which fall inside
|
||||
* vmalloc area proper.
|
||||
*/
|
||||
if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
|
||||
vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
|
||||
|
||||
call_rcu(&va->rcu_head, rcu_free_va);
|
||||
}
|
||||
|
||||
@@ -1038,6 +1048,9 @@ void __init vmalloc_init(void)
|
||||
va->va_end = va->va_start + tmp->size;
|
||||
__insert_vmap_area(va);
|
||||
}
|
||||
|
||||
vmap_area_pcpu_hole = VMALLOC_END;
|
||||
|
||||
vmap_initialized = true;
|
||||
}
|
||||
|
||||
@@ -1122,13 +1135,34 @@ EXPORT_SYMBOL_GPL(map_vm_area);
|
||||
DEFINE_RWLOCK(vmlist_lock);
|
||||
struct vm_struct *vmlist;
|
||||
|
||||
static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
|
||||
unsigned long flags, void *caller)
|
||||
{
|
||||
struct vm_struct *tmp, **p;
|
||||
|
||||
vm->flags = flags;
|
||||
vm->addr = (void *)va->va_start;
|
||||
vm->size = va->va_end - va->va_start;
|
||||
vm->caller = caller;
|
||||
va->private = vm;
|
||||
va->flags |= VM_VM_AREA;
|
||||
|
||||
write_lock(&vmlist_lock);
|
||||
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
|
||||
if (tmp->addr >= vm->addr)
|
||||
break;
|
||||
}
|
||||
vm->next = *p;
|
||||
*p = vm;
|
||||
write_unlock(&vmlist_lock);
|
||||
}
|
||||
|
||||
static struct vm_struct *__get_vm_area_node(unsigned long size,
|
||||
unsigned long flags, unsigned long start, unsigned long end,
|
||||
int node, gfp_t gfp_mask, void *caller)
|
||||
{
|
||||
static struct vmap_area *va;
|
||||
struct vm_struct *area;
|
||||
struct vm_struct *tmp, **p;
|
||||
unsigned long align = 1;
|
||||
|
||||
BUG_ON(in_interrupt());
|
||||
@@ -1147,7 +1181,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
|
||||
if (unlikely(!size))
|
||||
return NULL;
|
||||
|
||||
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
|
||||
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
|
||||
if (unlikely(!area))
|
||||
return NULL;
|
||||
|
||||
@@ -1162,25 +1196,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
area->flags = flags;
|
||||
area->addr = (void *)va->va_start;
|
||||
area->size = size;
|
||||
area->pages = NULL;
|
||||
area->nr_pages = 0;
|
||||
area->phys_addr = 0;
|
||||
area->caller = caller;
|
||||
va->private = area;
|
||||
va->flags |= VM_VM_AREA;
|
||||
|
||||
write_lock(&vmlist_lock);
|
||||
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
|
||||
if (tmp->addr >= area->addr)
|
||||
break;
|
||||
}
|
||||
area->next = *p;
|
||||
*p = area;
|
||||
write_unlock(&vmlist_lock);
|
||||
|
||||
insert_vmalloc_vm(area, va, flags, caller);
|
||||
return area;
|
||||
}
|
||||
|
||||
@@ -1818,6 +1834,286 @@ void free_vm_area(struct vm_struct *area)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(free_vm_area);
|
||||
|
||||
static struct vmap_area *node_to_va(struct rb_node *n)
|
||||
{
|
||||
return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* pvm_find_next_prev - find the next and prev vmap_area surrounding @end
|
||||
* @end: target address
|
||||
* @pnext: out arg for the next vmap_area
|
||||
* @pprev: out arg for the previous vmap_area
|
||||
*
|
||||
* Returns: %true if either or both of next and prev are found,
|
||||
* %false if no vmap_area exists
|
||||
*
|
||||
* Find vmap_areas end addresses of which enclose @end. ie. if not
|
||||
* NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
|
||||
*/
|
||||
static bool pvm_find_next_prev(unsigned long end,
|
||||
struct vmap_area **pnext,
|
||||
struct vmap_area **pprev)
|
||||
{
|
||||
struct rb_node *n = vmap_area_root.rb_node;
|
||||
struct vmap_area *va = NULL;
|
||||
|
||||
while (n) {
|
||||
va = rb_entry(n, struct vmap_area, rb_node);
|
||||
if (end < va->va_end)
|
||||
n = n->rb_left;
|
||||
else if (end > va->va_end)
|
||||
n = n->rb_right;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (!va)
|
||||
return false;
|
||||
|
||||
if (va->va_end > end) {
|
||||
*pnext = va;
|
||||
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
|
||||
} else {
|
||||
*pprev = va;
|
||||
*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* pvm_determine_end - find the highest aligned address between two vmap_areas
|
||||
* @pnext: in/out arg for the next vmap_area
|
||||
* @pprev: in/out arg for the previous vmap_area
|
||||
* @align: alignment
|
||||
*
|
||||
* Returns: determined end address
|
||||
*
|
||||
* Find the highest aligned address between *@pnext and *@pprev below
|
||||
* VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned
|
||||
* down address is between the end addresses of the two vmap_areas.
|
||||
*
|
||||
* Please note that the address returned by this function may fall
|
||||
* inside *@pnext vmap_area. The caller is responsible for checking
|
||||
* that.
|
||||
*/
|
||||
static unsigned long pvm_determine_end(struct vmap_area **pnext,
|
||||
struct vmap_area **pprev,
|
||||
unsigned long align)
|
||||
{
|
||||
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
|
||||
unsigned long addr;
|
||||
|
||||
if (*pnext)
|
||||
addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
|
||||
else
|
||||
addr = vmalloc_end;
|
||||
|
||||
while (*pprev && (*pprev)->va_end > addr) {
|
||||
*pnext = *pprev;
|
||||
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
|
||||
* @offsets: array containing offset of each area
|
||||
* @sizes: array containing size of each area
|
||||
* @nr_vms: the number of areas to allocate
|
||||
* @align: alignment, all entries in @offsets and @sizes must be aligned to this
|
||||
* @gfp_mask: allocation mask
|
||||
*
|
||||
* Returns: kmalloc'd vm_struct pointer array pointing to allocated
|
||||
* vm_structs on success, %NULL on failure
|
||||
*
|
||||
* Percpu allocator wants to use congruent vm areas so that it can
|
||||
* maintain the offsets among percpu areas. This function allocates
|
||||
* congruent vmalloc areas for it. These areas tend to be scattered
|
||||
* pretty far, distance between two areas easily going up to
|
||||
* gigabytes. To avoid interacting with regular vmallocs, these areas
|
||||
* are allocated from top.
|
||||
*
|
||||
* Despite its complicated look, this allocator is rather simple. It
|
||||
* does everything top-down and scans areas from the end looking for
|
||||
* matching slot. While scanning, if any of the areas overlaps with
|
||||
* existing vmap_area, the base address is pulled down to fit the
|
||||
* area. Scanning is repeated till all the areas fit and then all
|
||||
* necessary data structres are inserted and the result is returned.
|
||||
*/
|
||||
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
|
||||
const size_t *sizes, int nr_vms,
|
||||
size_t align, gfp_t gfp_mask)
|
||||
{
|
||||
const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
|
||||
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
|
||||
struct vmap_area **vas, *prev, *next;
|
||||
struct vm_struct **vms;
|
||||
int area, area2, last_area, term_area;
|
||||
unsigned long base, start, end, last_end;
|
||||
bool purged = false;
|
||||
|
||||
gfp_mask &= GFP_RECLAIM_MASK;
|
||||
|
||||
/* verify parameters and allocate data structures */
|
||||
BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
|
||||
for (last_area = 0, area = 0; area < nr_vms; area++) {
|
||||
start = offsets[area];
|
||||
end = start + sizes[area];
|
||||
|
||||
/* is everything aligned properly? */
|
||||
BUG_ON(!IS_ALIGNED(offsets[area], align));
|
||||
BUG_ON(!IS_ALIGNED(sizes[area], align));
|
||||
|
||||
/* detect the area with the highest address */
|
||||
if (start > offsets[last_area])
|
||||
last_area = area;
|
||||
|
||||
for (area2 = 0; area2 < nr_vms; area2++) {
|
||||
unsigned long start2 = offsets[area2];
|
||||
unsigned long end2 = start2 + sizes[area2];
|
||||
|
||||
if (area2 == area)
|
||||
continue;
|
||||
|
||||
BUG_ON(start2 >= start && start2 < end);
|
||||
BUG_ON(end2 <= end && end2 > start);
|
||||
}
|
||||
}
|
||||
last_end = offsets[last_area] + sizes[last_area];
|
||||
|
||||
if (vmalloc_end - vmalloc_start < last_end) {
|
||||
WARN_ON(true);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
|
||||
vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
|
||||
if (!vas || !vms)
|
||||
goto err_free;
|
||||
|
||||
for (area = 0; area < nr_vms; area++) {
|
||||
vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
|
||||
vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
|
||||
if (!vas[area] || !vms[area])
|
||||
goto err_free;
|
||||
}
|
||||
retry:
|
||||
spin_lock(&vmap_area_lock);
|
||||
|
||||
/* start scanning - we scan from the top, begin with the last area */
|
||||
area = term_area = last_area;
|
||||
start = offsets[area];
|
||||
end = start + sizes[area];
|
||||
|
||||
if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
|
||||
base = vmalloc_end - last_end;
|
||||
goto found;
|
||||
}
|
||||
base = pvm_determine_end(&next, &prev, align) - end;
|
||||
|
||||
while (true) {
|
||||
BUG_ON(next && next->va_end <= base + end);
|
||||
BUG_ON(prev && prev->va_end > base + end);
|
||||
|
||||
/*
|
||||
* base might have underflowed, add last_end before
|
||||
* comparing.
|
||||
*/
|
||||
if (base + last_end < vmalloc_start + last_end) {
|
||||
spin_unlock(&vmap_area_lock);
|
||||
if (!purged) {
|
||||
purge_vmap_area_lazy();
|
||||
purged = true;
|
||||
goto retry;
|
||||
}
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
/*
|
||||
* If next overlaps, move base downwards so that it's
|
||||
* right below next and then recheck.
|
||||
*/
|
||||
if (next && next->va_start < base + end) {
|
||||
base = pvm_determine_end(&next, &prev, align) - end;
|
||||
term_area = area;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If prev overlaps, shift down next and prev and move
|
||||
* base so that it's right below new next and then
|
||||
* recheck.
|
||||
*/
|
||||
if (prev && prev->va_end > base + start) {
|
||||
next = prev;
|
||||
prev = node_to_va(rb_prev(&next->rb_node));
|
||||
base = pvm_determine_end(&next, &prev, align) - end;
|
||||
term_area = area;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* This area fits, move on to the previous one. If
|
||||
* the previous one is the terminal one, we're done.
|
||||
*/
|
||||
area = (area + nr_vms - 1) % nr_vms;
|
||||
if (area == term_area)
|
||||
break;
|
||||
start = offsets[area];
|
||||
end = start + sizes[area];
|
||||
pvm_find_next_prev(base + end, &next, &prev);
|
||||
}
|
||||
found:
|
||||
/* we've found a fitting base, insert all va's */
|
||||
for (area = 0; area < nr_vms; area++) {
|
||||
struct vmap_area *va = vas[area];
|
||||
|
||||
va->va_start = base + offsets[area];
|
||||
va->va_end = va->va_start + sizes[area];
|
||||
__insert_vmap_area(va);
|
||||
}
|
||||
|
||||
vmap_area_pcpu_hole = base + offsets[last_area];
|
||||
|
||||
spin_unlock(&vmap_area_lock);
|
||||
|
||||
/* insert all vm's */
|
||||
for (area = 0; area < nr_vms; area++)
|
||||
insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
|
||||
pcpu_get_vm_areas);
|
||||
|
||||
kfree(vas);
|
||||
return vms;
|
||||
|
||||
err_free:
|
||||
for (area = 0; area < nr_vms; area++) {
|
||||
if (vas)
|
||||
kfree(vas[area]);
|
||||
if (vms)
|
||||
kfree(vms[area]);
|
||||
}
|
||||
kfree(vas);
|
||||
kfree(vms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* pcpu_free_vm_areas - free vmalloc areas for percpu allocator
|
||||
* @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
|
||||
* @nr_vms: the number of allocated areas
|
||||
*
|
||||
* Free vm_structs and the array allocated by pcpu_get_vm_areas().
|
||||
*/
|
||||
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_vms; i++)
|
||||
free_vm_area(vms[i]);
|
||||
kfree(vms);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static void *s_start(struct seq_file *m, loff_t *pos)
|
||||
|
Reference in New Issue
Block a user