Merge branch 'slab/next' into slab/for-linus

This commit is contained in:
Pekka Enberg
2013-05-07 09:19:47 +03:00
9 changed files with 782 additions and 916 deletions

221
mm/slub.c
View File

@@ -1005,7 +1005,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
* dilemma by deferring the increment of the count during
* bootstrap (see early_kmem_cache_node_alloc).
*/
if (n) {
if (likely(n)) {
atomic_long_inc(&n->nr_slabs);
atomic_long_add(objects, &n->total_objects);
}
@@ -1493,7 +1493,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
int mode)
int mode, int *objects)
{
void *freelist;
unsigned long counters;
@@ -1507,6 +1507,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
freelist = page->freelist;
counters = page->counters;
new.counters = counters;
*objects = new.objects - new.inuse;
if (mode) {
new.inuse = page->objects;
new.freelist = NULL;
@@ -1528,7 +1529,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
return freelist;
}
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/*
@@ -1539,6 +1540,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
{
struct page *page, *page2;
void *object = NULL;
int available = 0;
int objects;
/*
* Racy check. If we mistakenly see no partial slabs then we
@@ -1552,22 +1555,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
void *t;
int available;
if (!pfmemalloc_match(page, flags))
continue;
t = acquire_slab(s, n, page, object == NULL);
t = acquire_slab(s, n, page, object == NULL, &objects);
if (!t)
break;
available += objects;
if (!object) {
c->page = page;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
available = page->objects - page->inuse;
} else {
available = put_cpu_partial(s, page, 0);
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
}
if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
@@ -1946,7 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s,
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
struct page *oldpage;
int pages;
@@ -1984,7 +1986,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
return pobjects;
}
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -2041,7 +2042,7 @@ static void flush_all(struct kmem_cache *s)
static inline int node_match(struct page *page, int node)
{
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE && page_to_nid(page) != node)
if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
return 0;
#endif
return 1;
@@ -2331,13 +2332,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
s = memcg_kmem_get_cache(s, gfpflags);
redo:
/*
* Must read kmem_cache cpu data via this cpu ptr. Preemption is
* enabled. We may switch back and forth between cpus while
* reading from one cpu area. That does not matter as long
* as we end up on the original cpu again when doing the cmpxchg.
*
* Preemption is disabled for the retrieval of the tid because that
* must occur from the current processor. We cannot allow rescheduling
* on a different processor between the determination of the pointer
* and the retrieval of the tid.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);
/*
@@ -2347,7 +2353,7 @@ redo:
* linked list in between.
*/
tid = c->tid;
barrier();
preempt_enable();
object = c->freelist;
page = c->page;
@@ -2594,10 +2600,11 @@ redo:
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);
tid = c->tid;
barrier();
preempt_enable();
if (likely(page == c->page)) {
set_freepointer(s, object, c->freelist);
@@ -2775,7 +2782,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
/*
* Must align to double word boundary for the double cmpxchg
@@ -2982,7 +2989,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
s->allocflags |= __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= SLUB_DMA;
s->allocflags |= GFP_DMA;
if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
@@ -3174,13 +3181,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
* Kmalloc subsystem
*******************************************************************/
struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
EXPORT_SYMBOL(kmalloc_caches);
#ifdef CONFIG_ZONE_DMA
static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
#endif
static int __init setup_slub_min_order(char *str)
{
get_option(&str, &slub_min_order);
@@ -3217,73 +3217,15 @@ static int __init setup_slub_nomerge(char *str)
__setup("slub_nomerge", setup_slub_nomerge);
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
* of two cache sizes there. The size of larger slabs can be determined using
* fls.
*/
static s8 size_index[24] = {
3, /* 8 */
4, /* 16 */
5, /* 24 */
5, /* 32 */
6, /* 40 */
6, /* 48 */
6, /* 56 */
6, /* 64 */
1, /* 72 */
1, /* 80 */
1, /* 88 */
1, /* 96 */
7, /* 104 */
7, /* 112 */
7, /* 120 */
7, /* 128 */
2, /* 136 */
2, /* 144 */
2, /* 152 */
2, /* 160 */
2, /* 168 */
2, /* 176 */
2, /* 184 */
2 /* 192 */
};
static inline int size_index_elem(size_t bytes)
{
return (bytes - 1) / 8;
}
static struct kmem_cache *get_slab(size_t size, gfp_t flags)
{
int index;
if (size <= 192) {
if (!size)
return ZERO_SIZE_PTR;
index = size_index[size_index_elem(size)];
} else
index = fls(size - 1);
#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & SLUB_DMA)))
return kmalloc_dma_caches[index];
#endif
return kmalloc_caches[index];
}
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, flags);
s = get_slab(size, flags);
s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@@ -3316,7 +3258,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) {
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);
trace_kmalloc_node(_RET_IP_, ret,
@@ -3326,7 +3268,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
return ret;
}
s = get_slab(size, flags);
s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@@ -3617,6 +3559,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
memcpy(s, static_cache, kmem_cache->object_size);
/*
* This runs very early, and only the boot processor is supposed to be
* up. Even if it weren't true, IRQs are not up so we couldn't fire
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
struct page *p;
@@ -3639,8 +3587,6 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
int i;
int caches = 2;
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -3671,103 +3617,16 @@ void __init kmem_cache_init(void)
kmem_cache_node = bootstrap(&boot_kmem_cache_node);
/* Now we can use the kmem_cache to allocate kmalloc slabs */
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
* MIPS it seems. The standard arches will not generate any code here.
*
* Largest permitted alignment is 256 bytes due to the way we
* handle the index determination for the smaller caches.
*
* Make sure that nothing crazy happens if someone starts tinkering
* around with ARCH_KMALLOC_MINALIGN
*/
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
int elem = size_index_elem(i);
if (elem >= ARRAY_SIZE(size_index))
break;
size_index[elem] = KMALLOC_SHIFT_LOW;
}
if (KMALLOC_MIN_SIZE == 64) {
/*
* The 96 byte size cache is not used if the alignment
* is 64 byte.
*/
for (i = 64 + 8; i <= 96; i += 8)
size_index[size_index_elem(i)] = 7;
} else if (KMALLOC_MIN_SIZE == 128) {
/*
* The 192 byte sized cache is not used if the alignment
* is 128 byte. Redirect kmalloc to use the 256 byte cache
* instead.
*/
for (i = 128 + 8; i <= 192; i += 8)
size_index[size_index_elem(i)] = 8;
}
/* Caches that are not of the two-to-the-power-of size */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
caches++;
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
caches++;
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
caches++;
}
slab_state = UP;
/* Provide the correct kmalloc names now that the caches are up */
if (KMALLOC_MIN_SIZE <= 32) {
kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[1]->name);
}
if (KMALLOC_MIN_SIZE <= 64) {
kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
BUG_ON(!kmalloc_caches[2]->name);
}
for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
BUG_ON(!s);
kmalloc_caches[i]->name = s;
}
create_kmalloc_caches(0);
#ifdef CONFIG_SMP
register_cpu_notifier(&slab_notifier);
#endif
#ifdef CONFIG_ZONE_DMA
for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
struct kmem_cache *s = kmalloc_caches[i];
if (s && s->size) {
char *name = kasprintf(GFP_NOWAIT,
"dma-kmalloc-%d", s->object_size);
BUG_ON(!name);
kmalloc_dma_caches[i] = create_kmalloc_cache(name,
s->object_size, SLAB_CACHE_DMA);
}
}
#endif
printk(KERN_INFO
"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
"SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
" CPUs=%d, Nodes=%d\n",
caches, cache_line_size(),
cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects,
nr_cpu_ids, nr_node_ids);
}
@@ -3930,10 +3789,10 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, gfpflags);
s = get_slab(size, gfpflags);
s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@@ -3953,7 +3812,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE)) {
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);
trace_kmalloc_node(caller, ret,
@@ -3963,7 +3822,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return ret;
}
s = get_slab(size, gfpflags);
s = kmalloc_slab(size, gfpflags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
@@ -4312,7 +4171,7 @@ static void resiliency_test(void)
{
u8 *p;
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
printk(KERN_ERR "SLUB resiliency testing\n");
printk(KERN_ERR "-----------------------\n");