Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (46 commits)
  powerpc64: convert to dynamic percpu allocator
  sparc64: use embedding percpu first chunk allocator
  percpu: kill lpage first chunk allocator
  x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA
  percpu: update embedding first chunk allocator to handle sparse units
  percpu: use group information to allocate vmap areas sparsely
  vmalloc: implement pcpu_get_vm_areas()
  vmalloc: separate out insert_vmalloc_vm()
  percpu: add chunk->base_addr
  percpu: add pcpu_unit_offsets[]
  percpu: introduce pcpu_alloc_info and pcpu_group_info
  percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward
  percpu: add @align to pcpu_fc_alloc_fn_t
  percpu: make @dyn_size mandatory for pcpu_setup_first_chunk()
  percpu: drop @static_size from first chunk allocators
  percpu: generalize first chunk allocator selection
  percpu: build first chunk allocators selectively
  percpu: rename 4k first chunk allocator to page
  percpu: improve boot messages
  percpu: fix pcpu_reclaim() locking
  ...

Fix trivial conflict as by Tejun Heo in kernel/sched.c
This commit is contained in:
Linus Torvalds
2009-09-15 09:39:44 -07:00
80 changed files with 1912 additions and 1230 deletions

View File

@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
obj-$(CONFIG_SMP) += percpu.o
else
obj-$(CONFIG_SMP) += allocpercpu.o

View File

@@ -5,6 +5,8 @@
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bootmem.h>
#include <asm/sections.h>
#ifndef cache_line_size
#define cache_line_size() L1_CACHE_BYTES
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
kfree(__percpu_disguise(__pdata));
}
EXPORT_SYMBOL_GPL(free_percpu);
/*
* Generic percpu area setup.
*/
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
unsigned long size, i;
char *ptr;
unsigned long nr_possible_cpus = num_possible_cpus();
/* Copy section for each CPU (we discard the original) */
size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
ptr = alloc_bootmem_pages(size * nr_possible_cpus);
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
ptr += size;
}
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

View File

@@ -36,7 +36,7 @@ struct test_node {
};
static LIST_HEAD(test_list);
static DEFINE_PER_CPU(void *, test_pointer);
static DEFINE_PER_CPU(void *, kmemleak_test_pointer);
/*
* Some very simple testing. This function needs to be extended for
@@ -86,9 +86,9 @@ static int __init kmemleak_test_init(void)
}
for_each_possible_cpu(i) {
per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL);
per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
pr_info("kmemleak: kmalloc(129) = %p\n",
per_cpu(test_pointer, i));
per_cpu(kmemleak_test_pointer, i));
}
return 0;

View File

@@ -604,6 +604,8 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
}
}
static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
@@ -621,7 +623,6 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied)
{
static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
unsigned long ratelimit;
unsigned long *p;
@@ -634,7 +635,7 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
* tasks in balance_dirty_pages(). Period.
*/
preempt_disable();
p = &__get_cpu_var(ratelimits);
p = &__get_cpu_var(bdp_ratelimits);
*p += nr_pages_dirtied;
if (unlikely(*p >= ratelimit)) {
*p = 0;

File diff suppressed because it is too large Load Diff

View File

@@ -19,7 +19,7 @@
#include <linux/module.h>
#include <linux/quicklist.h>
DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
#define FRACTION_OF_NODE_MEM 16

View File

@@ -2111,8 +2111,8 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
*/
#define NR_KMEM_CACHE_CPU 100
static DEFINE_PER_CPU(struct kmem_cache_cpu,
kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
kmem_cache_cpu);
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);

View File

@@ -265,6 +265,7 @@ struct vmap_area {
static DEFINE_SPINLOCK(vmap_area_lock);
static struct rb_root vmap_area_root = RB_ROOT;
static LIST_HEAD(vmap_area_list);
static unsigned long vmap_area_pcpu_hole;
static struct vmap_area *__find_vmap_area(unsigned long addr)
{
@@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va)
RB_CLEAR_NODE(&va->rb_node);
list_del_rcu(&va->list);
/*
* Track the highest possible candidate for pcpu area
* allocation. Areas outside of vmalloc area can be returned
* here too, consider only end addresses which fall inside
* vmalloc area proper.
*/
if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
call_rcu(&va->rcu_head, rcu_free_va);
}
@@ -1038,6 +1048,9 @@ void __init vmalloc_init(void)
va->va_end = va->va_start + tmp->size;
__insert_vmap_area(va);
}
vmap_area_pcpu_hole = VMALLOC_END;
vmap_initialized = true;
}
@@ -1122,13 +1135,34 @@ EXPORT_SYMBOL_GPL(map_vm_area);
DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist;
static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, void *caller)
{
struct vm_struct *tmp, **p;
vm->flags = flags;
vm->addr = (void *)va->va_start;
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->private = vm;
va->flags |= VM_VM_AREA;
write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
if (tmp->addr >= vm->addr)
break;
}
vm->next = *p;
*p = vm;
write_unlock(&vmlist_lock);
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long flags, unsigned long start, unsigned long end,
int node, gfp_t gfp_mask, void *caller)
{
static struct vmap_area *va;
struct vm_struct *area;
struct vm_struct *tmp, **p;
unsigned long align = 1;
BUG_ON(in_interrupt());
@@ -1147,7 +1181,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
if (unlikely(!size))
return NULL;
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
@@ -1162,25 +1196,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
area->flags = flags;
area->addr = (void *)va->va_start;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
area->caller = caller;
va->private = area;
va->flags |= VM_VM_AREA;
write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
if (tmp->addr >= area->addr)
break;
}
area->next = *p;
*p = area;
write_unlock(&vmlist_lock);
insert_vmalloc_vm(area, va, flags, caller);
return area;
}
@@ -1818,6 +1834,286 @@ void free_vm_area(struct vm_struct *area)
}
EXPORT_SYMBOL_GPL(free_vm_area);
static struct vmap_area *node_to_va(struct rb_node *n)
{
return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
}
/**
* pvm_find_next_prev - find the next and prev vmap_area surrounding @end
* @end: target address
* @pnext: out arg for the next vmap_area
* @pprev: out arg for the previous vmap_area
*
* Returns: %true if either or both of next and prev are found,
* %false if no vmap_area exists
*
* Find vmap_areas end addresses of which enclose @end. ie. if not
* NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
*/
static bool pvm_find_next_prev(unsigned long end,
struct vmap_area **pnext,
struct vmap_area **pprev)
{
struct rb_node *n = vmap_area_root.rb_node;
struct vmap_area *va = NULL;
while (n) {
va = rb_entry(n, struct vmap_area, rb_node);
if (end < va->va_end)
n = n->rb_left;
else if (end > va->va_end)
n = n->rb_right;
else
break;
}
if (!va)
return false;
if (va->va_end > end) {
*pnext = va;
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
} else {
*pprev = va;
*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
}
return true;
}
/**
* pvm_determine_end - find the highest aligned address between two vmap_areas
* @pnext: in/out arg for the next vmap_area
* @pprev: in/out arg for the previous vmap_area
* @align: alignment
*
* Returns: determined end address
*
* Find the highest aligned address between *@pnext and *@pprev below
* VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned
* down address is between the end addresses of the two vmap_areas.
*
* Please note that the address returned by this function may fall
* inside *@pnext vmap_area. The caller is responsible for checking
* that.
*/
static unsigned long pvm_determine_end(struct vmap_area **pnext,
struct vmap_area **pprev,
unsigned long align)
{
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
unsigned long addr;
if (*pnext)
addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
else
addr = vmalloc_end;
while (*pprev && (*pprev)->va_end > addr) {
*pnext = *pprev;
*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
}
return addr;
}
/**
* pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
* @offsets: array containing offset of each area
* @sizes: array containing size of each area
* @nr_vms: the number of areas to allocate
* @align: alignment, all entries in @offsets and @sizes must be aligned to this
* @gfp_mask: allocation mask
*
* Returns: kmalloc'd vm_struct pointer array pointing to allocated
* vm_structs on success, %NULL on failure
*
* Percpu allocator wants to use congruent vm areas so that it can
* maintain the offsets among percpu areas. This function allocates
* congruent vmalloc areas for it. These areas tend to be scattered
* pretty far, distance between two areas easily going up to
* gigabytes. To avoid interacting with regular vmallocs, these areas
* are allocated from top.
*
* Despite its complicated look, this allocator is rather simple. It
* does everything top-down and scans areas from the end looking for
* matching slot. While scanning, if any of the areas overlaps with
* existing vmap_area, the base address is pulled down to fit the
* area. Scanning is repeated till all the areas fit and then all
* necessary data structres are inserted and the result is returned.
*/
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
size_t align, gfp_t gfp_mask)
{
const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
struct vmap_area **vas, *prev, *next;
struct vm_struct **vms;
int area, area2, last_area, term_area;
unsigned long base, start, end, last_end;
bool purged = false;
gfp_mask &= GFP_RECLAIM_MASK;
/* verify parameters and allocate data structures */
BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
for (last_area = 0, area = 0; area < nr_vms; area++) {
start = offsets[area];
end = start + sizes[area];
/* is everything aligned properly? */
BUG_ON(!IS_ALIGNED(offsets[area], align));
BUG_ON(!IS_ALIGNED(sizes[area], align));
/* detect the area with the highest address */
if (start > offsets[last_area])
last_area = area;
for (area2 = 0; area2 < nr_vms; area2++) {
unsigned long start2 = offsets[area2];
unsigned long end2 = start2 + sizes[area2];
if (area2 == area)
continue;
BUG_ON(start2 >= start && start2 < end);
BUG_ON(end2 <= end && end2 > start);
}
}
last_end = offsets[last_area] + sizes[last_area];
if (vmalloc_end - vmalloc_start < last_end) {
WARN_ON(true);
return NULL;
}
vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
if (!vas || !vms)
goto err_free;
for (area = 0; area < nr_vms; area++) {
vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
if (!vas[area] || !vms[area])
goto err_free;
}
retry:
spin_lock(&vmap_area_lock);
/* start scanning - we scan from the top, begin with the last area */
area = term_area = last_area;
start = offsets[area];
end = start + sizes[area];
if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
base = vmalloc_end - last_end;
goto found;
}
base = pvm_determine_end(&next, &prev, align) - end;
while (true) {
BUG_ON(next && next->va_end <= base + end);
BUG_ON(prev && prev->va_end > base + end);
/*
* base might have underflowed, add last_end before
* comparing.
*/
if (base + last_end < vmalloc_start + last_end) {
spin_unlock(&vmap_area_lock);
if (!purged) {
purge_vmap_area_lazy();
purged = true;
goto retry;
}
goto err_free;
}
/*
* If next overlaps, move base downwards so that it's
* right below next and then recheck.
*/
if (next && next->va_start < base + end) {
base = pvm_determine_end(&next, &prev, align) - end;
term_area = area;
continue;
}
/*
* If prev overlaps, shift down next and prev and move
* base so that it's right below new next and then
* recheck.
*/
if (prev && prev->va_end > base + start) {
next = prev;
prev = node_to_va(rb_prev(&next->rb_node));
base = pvm_determine_end(&next, &prev, align) - end;
term_area = area;
continue;
}
/*
* This area fits, move on to the previous one. If
* the previous one is the terminal one, we're done.
*/
area = (area + nr_vms - 1) % nr_vms;
if (area == term_area)
break;
start = offsets[area];
end = start + sizes[area];
pvm_find_next_prev(base + end, &next, &prev);
}
found:
/* we've found a fitting base, insert all va's */
for (area = 0; area < nr_vms; area++) {
struct vmap_area *va = vas[area];
va->va_start = base + offsets[area];
va->va_end = va->va_start + sizes[area];
__insert_vmap_area(va);
}
vmap_area_pcpu_hole = base + offsets[last_area];
spin_unlock(&vmap_area_lock);
/* insert all vm's */
for (area = 0; area < nr_vms; area++)
insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
pcpu_get_vm_areas);
kfree(vas);
return vms;
err_free:
for (area = 0; area < nr_vms; area++) {
if (vas)
kfree(vas[area]);
if (vms)
kfree(vms[area]);
}
kfree(vas);
kfree(vms);
return NULL;
}
/**
* pcpu_free_vm_areas - free vmalloc areas for percpu allocator
* @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
* @nr_vms: the number of allocated areas
*
* Free vm_structs and the array allocated by pcpu_get_vm_areas().
*/
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
{
int i;
for (i = 0; i < nr_vms; i++)
free_vm_area(vms[i]);
kfree(vms);
}
#ifdef CONFIG_PROC_FS
static void *s_start(struct seq_file *m, loff_t *pos)