mm: memcg/slab: use a single set of kmem_caches for all allocations
Instead of having two sets of kmem_caches: one for system-wide and non-accounted allocations and the second one shared by all accounted allocations, we can use just one. The idea is simple: space for obj_cgroup metadata can be allocated on demand and filled only for accounted allocations. It allows to remove a bunch of code which is required to handle kmem_cache clones for accounted allocations. There is no more need to create them, accumulate statistics, propagate attributes, etc. It's a quite significant simplification. Also, because the total number of slab_caches is reduced almost twice (not all kmem_caches have a memcg clone), some additional memory savings are expected. On my devvm it additionally saves about 3.5% of slab memory. [guro@fb.com: fix build on MIPS] Link: http://lkml.kernel.org/r/20200717214810.3733082-1-guro@fb.com Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Christoph Lameter <cl@linux.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tejun Heo <tj@kernel.org> Cc: Naresh Kamboju <naresh.kamboju@linaro.org> Link: http://lkml.kernel.org/r/20200623174037.3951353-18-guro@fb.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

父節點
15999eef7f
當前提交
10befea91b
163
mm/slub.c
163
mm/slub.c
@@ -218,14 +218,10 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
|
||||
#ifdef CONFIG_SYSFS
|
||||
static int sysfs_slab_add(struct kmem_cache *);
|
||||
static int sysfs_slab_alias(struct kmem_cache *, const char *);
|
||||
static void memcg_propagate_slab_attrs(struct kmem_cache *s);
|
||||
static void sysfs_slab_remove(struct kmem_cache *s);
|
||||
#else
|
||||
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
|
||||
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
|
||||
{ return 0; }
|
||||
static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
|
||||
static inline void sysfs_slab_remove(struct kmem_cache *s) { }
|
||||
#endif
|
||||
|
||||
static inline void stat(const struct kmem_cache *s, enum stat_item si)
|
||||
@@ -1624,10 +1620,8 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
|
||||
else
|
||||
page = __alloc_pages_node(node, flags, order);
|
||||
|
||||
if (page && charge_slab_page(page, flags, order, s)) {
|
||||
__free_pages(page, order);
|
||||
page = NULL;
|
||||
}
|
||||
if (page)
|
||||
charge_slab_page(page, flags, order, s);
|
||||
|
||||
return page;
|
||||
}
|
||||
@@ -3920,7 +3914,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
|
||||
if (n->nr_partial || slabs_node(s, node))
|
||||
return 1;
|
||||
}
|
||||
sysfs_slab_remove(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4358,7 +4351,6 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
|
||||
p->slab_cache = s;
|
||||
#endif
|
||||
}
|
||||
slab_init_memcg_params(s);
|
||||
list_add(&s->list, &slab_caches);
|
||||
return s;
|
||||
}
|
||||
@@ -4414,7 +4406,7 @@ struct kmem_cache *
|
||||
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
|
||||
slab_flags_t flags, void (*ctor)(void *))
|
||||
{
|
||||
struct kmem_cache *s, *c;
|
||||
struct kmem_cache *s;
|
||||
|
||||
s = find_mergeable(size, align, flags, name, ctor);
|
||||
if (s) {
|
||||
@@ -4427,12 +4419,6 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
|
||||
s->object_size = max(s->object_size, size);
|
||||
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
|
||||
|
||||
c = memcg_cache(s);
|
||||
if (c) {
|
||||
c->object_size = s->object_size;
|
||||
c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
|
||||
}
|
||||
|
||||
if (sysfs_slab_alias(s, name)) {
|
||||
s->refcount--;
|
||||
s = NULL;
|
||||
@@ -4454,7 +4440,6 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
|
||||
if (slab_state <= UP)
|
||||
return 0;
|
||||
|
||||
memcg_propagate_slab_attrs(s);
|
||||
err = sysfs_slab_add(s);
|
||||
if (err)
|
||||
__kmem_cache_release(s);
|
||||
@@ -5312,7 +5297,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
|
||||
const char *buf, size_t length)
|
||||
{
|
||||
if (buf[0] == '1')
|
||||
kmem_cache_shrink_all(s);
|
||||
kmem_cache_shrink(s);
|
||||
else
|
||||
return -EINVAL;
|
||||
return length;
|
||||
@@ -5536,99 +5521,9 @@ static ssize_t slab_attr_store(struct kobject *kobj,
|
||||
return -EIO;
|
||||
|
||||
err = attribute->store(s, buf, len);
|
||||
#ifdef CONFIG_MEMCG
|
||||
if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
|
||||
struct kmem_cache *c;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
if (s->max_attr_size < len)
|
||||
s->max_attr_size = len;
|
||||
|
||||
/*
|
||||
* This is a best effort propagation, so this function's return
|
||||
* value will be determined by the parent cache only. This is
|
||||
* basically because not all attributes will have a well
|
||||
* defined semantics for rollbacks - most of the actions will
|
||||
* have permanent effects.
|
||||
*
|
||||
* Returning the error value of any of the children that fail
|
||||
* is not 100 % defined, in the sense that users seeing the
|
||||
* error code won't be able to know anything about the state of
|
||||
* the cache.
|
||||
*
|
||||
* Only returning the error code for the parent cache at least
|
||||
* has well defined semantics. The cache being written to
|
||||
* directly either failed or succeeded, in which case we loop
|
||||
* through the descendants with best-effort propagation.
|
||||
*/
|
||||
c = memcg_cache(s);
|
||||
if (c)
|
||||
attribute->store(c, buf, len);
|
||||
mutex_unlock(&slab_mutex);
|
||||
}
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
static void memcg_propagate_slab_attrs(struct kmem_cache *s)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG
|
||||
int i;
|
||||
char *buffer = NULL;
|
||||
struct kmem_cache *root_cache;
|
||||
|
||||
if (is_root_cache(s))
|
||||
return;
|
||||
|
||||
root_cache = s->memcg_params.root_cache;
|
||||
|
||||
/*
|
||||
* This mean this cache had no attribute written. Therefore, no point
|
||||
* in copying default values around
|
||||
*/
|
||||
if (!root_cache->max_attr_size)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
|
||||
char mbuf[64];
|
||||
char *buf;
|
||||
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
|
||||
ssize_t len;
|
||||
|
||||
if (!attr || !attr->store || !attr->show)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* It is really bad that we have to allocate here, so we will
|
||||
* do it only as a fallback. If we actually allocate, though,
|
||||
* we can just use the allocated buffer until the end.
|
||||
*
|
||||
* Most of the slub attributes will tend to be very small in
|
||||
* size, but sysfs allows buffers up to a page, so they can
|
||||
* theoretically happen.
|
||||
*/
|
||||
if (buffer)
|
||||
buf = buffer;
|
||||
else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
|
||||
!IS_ENABLED(CONFIG_SLUB_STATS))
|
||||
buf = mbuf;
|
||||
else {
|
||||
buffer = (char *) get_zeroed_page(GFP_KERNEL);
|
||||
if (WARN_ON(!buffer))
|
||||
continue;
|
||||
buf = buffer;
|
||||
}
|
||||
|
||||
len = attr->show(root_cache, buf);
|
||||
if (len > 0)
|
||||
attr->store(s, buf, len);
|
||||
}
|
||||
|
||||
if (buffer)
|
||||
free_page((unsigned long)buffer);
|
||||
#endif /* CONFIG_MEMCG */
|
||||
}
|
||||
|
||||
static void kmem_cache_release(struct kobject *k)
|
||||
{
|
||||
slab_kmem_cache_release(to_slab(k));
|
||||
@@ -5648,10 +5543,6 @@ static struct kset *slab_kset;
|
||||
|
||||
static inline struct kset *cache_kset(struct kmem_cache *s)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG
|
||||
if (!is_root_cache(s))
|
||||
return s->memcg_params.root_cache->memcg_kset;
|
||||
#endif
|
||||
return slab_kset;
|
||||
}
|
||||
|
||||
@@ -5694,27 +5585,6 @@ static char *create_unique_id(struct kmem_cache *s)
|
||||
return name;
|
||||
}
|
||||
|
||||
static void sysfs_slab_remove_workfn(struct work_struct *work)
|
||||
{
|
||||
struct kmem_cache *s =
|
||||
container_of(work, struct kmem_cache, kobj_remove_work);
|
||||
|
||||
if (!s->kobj.state_in_sysfs)
|
||||
/*
|
||||
* For a memcg cache, this may be called during
|
||||
* deactivation and again on shutdown. Remove only once.
|
||||
* A cache is never shut down before deactivation is
|
||||
* complete, so no need to worry about synchronization.
|
||||
*/
|
||||
goto out;
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
kset_unregister(s->memcg_kset);
|
||||
#endif
|
||||
out:
|
||||
kobject_put(&s->kobj);
|
||||
}
|
||||
|
||||
static int sysfs_slab_add(struct kmem_cache *s)
|
||||
{
|
||||
int err;
|
||||
@@ -5722,8 +5592,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
|
||||
struct kset *kset = cache_kset(s);
|
||||
int unmergeable = slab_unmergeable(s);
|
||||
|
||||
INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
|
||||
|
||||
if (!kset) {
|
||||
kobject_init(&s->kobj, &slab_ktype);
|
||||
return 0;
|
||||
@@ -5760,16 +5628,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
|
||||
if (err)
|
||||
goto out_del_kobj;
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
if (is_root_cache(s) && memcg_sysfs_enabled) {
|
||||
s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
|
||||
if (!s->memcg_kset) {
|
||||
err = -ENOMEM;
|
||||
goto out_del_kobj;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!unmergeable) {
|
||||
/* Setup first alias */
|
||||
sysfs_slab_alias(s, s->name);
|
||||
@@ -5783,19 +5641,6 @@ out_del_kobj:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void sysfs_slab_remove(struct kmem_cache *s)
|
||||
{
|
||||
if (slab_state < FULL)
|
||||
/*
|
||||
* Sysfs has not been setup yet so no need to remove the
|
||||
* cache from sysfs.
|
||||
*/
|
||||
return;
|
||||
|
||||
kobject_get(&s->kobj);
|
||||
schedule_work(&s->kobj_remove_work);
|
||||
}
|
||||
|
||||
void sysfs_slab_unlink(struct kmem_cache *s)
|
||||
{
|
||||
if (slab_state >= FULL)
|
||||
|
Reference in New Issue
Block a user