mm: memcg/slab: use a single set of kmem_caches for all allocations

Instead of having two sets of kmem_caches: one for system-wide and
non-accounted allocations and the second one shared by all accounted
allocations, we can use just one.

The idea is simple: space for obj_cgroup metadata can be allocated on
demand and filled only for accounted allocations.

It allows to remove a bunch of code which is required to handle kmem_cache
clones for accounted allocations.  There is no more need to create them,
accumulate statistics, propagate attributes, etc.  It's a quite
significant simplification.

Also, because the total number of slab_caches is reduced almost twice (not
all kmem_caches have a memcg clone), some additional memory savings are
expected.  On my devvm it additionally saves about 3.5% of slab memory.

[guro@fb.com: fix build on MIPS]
  Link: http://lkml.kernel.org/r/20200717214810.3733082-1-guro@fb.com

Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Link: http://lkml.kernel.org/r/20200623174037.3951353-18-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Roman Gushchin
2020-08-06 23:21:27 -07:00
committed by Linus Torvalds
父節點 15999eef7f
當前提交 10befea91b
共有 8 個文件被更改,包括 78 次插入590 次删除

163
mm/slub.c
查看文件

@@ -218,14 +218,10 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
#ifdef CONFIG_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void memcg_propagate_slab_attrs(struct kmem_cache *s);
static void sysfs_slab_remove(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -1624,10 +1620,8 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
else
page = __alloc_pages_node(node, flags, order);
if (page && charge_slab_page(page, flags, order, s)) {
__free_pages(page, order);
page = NULL;
}
if (page)
charge_slab_page(page, flags, order, s);
return page;
}
@@ -3920,7 +3914,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
if (n->nr_partial || slabs_node(s, node))
return 1;
}
sysfs_slab_remove(s);
return 0;
}
@@ -4358,7 +4351,6 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
p->slab_cache = s;
#endif
}
slab_init_memcg_params(s);
list_add(&s->list, &slab_caches);
return s;
}
@@ -4414,7 +4406,7 @@ struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
{
struct kmem_cache *s, *c;
struct kmem_cache *s;
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
@@ -4427,12 +4419,6 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
c = memcg_cache(s);
if (c) {
c->object_size = s->object_size;
c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
}
if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
@@ -4454,7 +4440,6 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
if (slab_state <= UP)
return 0;
memcg_propagate_slab_attrs(s);
err = sysfs_slab_add(s);
if (err)
__kmem_cache_release(s);
@@ -5312,7 +5297,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
const char *buf, size_t length)
{
if (buf[0] == '1')
kmem_cache_shrink_all(s);
kmem_cache_shrink(s);
else
return -EINVAL;
return length;
@@ -5536,99 +5521,9 @@ static ssize_t slab_attr_store(struct kobject *kobj,
return -EIO;
err = attribute->store(s, buf, len);
#ifdef CONFIG_MEMCG
if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
struct kmem_cache *c;
mutex_lock(&slab_mutex);
if (s->max_attr_size < len)
s->max_attr_size = len;
/*
* This is a best effort propagation, so this function's return
* value will be determined by the parent cache only. This is
* basically because not all attributes will have a well
* defined semantics for rollbacks - most of the actions will
* have permanent effects.
*
* Returning the error value of any of the children that fail
* is not 100 % defined, in the sense that users seeing the
* error code won't be able to know anything about the state of
* the cache.
*
* Only returning the error code for the parent cache at least
* has well defined semantics. The cache being written to
* directly either failed or succeeded, in which case we loop
* through the descendants with best-effort propagation.
*/
c = memcg_cache(s);
if (c)
attribute->store(c, buf, len);
mutex_unlock(&slab_mutex);
}
#endif
return err;
}
static void memcg_propagate_slab_attrs(struct kmem_cache *s)
{
#ifdef CONFIG_MEMCG
int i;
char *buffer = NULL;
struct kmem_cache *root_cache;
if (is_root_cache(s))
return;
root_cache = s->memcg_params.root_cache;
/*
* This mean this cache had no attribute written. Therefore, no point
* in copying default values around
*/
if (!root_cache->max_attr_size)
return;
for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
char mbuf[64];
char *buf;
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
ssize_t len;
if (!attr || !attr->store || !attr->show)
continue;
/*
* It is really bad that we have to allocate here, so we will
* do it only as a fallback. If we actually allocate, though,
* we can just use the allocated buffer until the end.
*
* Most of the slub attributes will tend to be very small in
* size, but sysfs allows buffers up to a page, so they can
* theoretically happen.
*/
if (buffer)
buf = buffer;
else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
!IS_ENABLED(CONFIG_SLUB_STATS))
buf = mbuf;
else {
buffer = (char *) get_zeroed_page(GFP_KERNEL);
if (WARN_ON(!buffer))
continue;
buf = buffer;
}
len = attr->show(root_cache, buf);
if (len > 0)
attr->store(s, buf, len);
}
if (buffer)
free_page((unsigned long)buffer);
#endif /* CONFIG_MEMCG */
}
static void kmem_cache_release(struct kobject *k)
{
slab_kmem_cache_release(to_slab(k));
@@ -5648,10 +5543,6 @@ static struct kset *slab_kset;
static inline struct kset *cache_kset(struct kmem_cache *s)
{
#ifdef CONFIG_MEMCG
if (!is_root_cache(s))
return s->memcg_params.root_cache->memcg_kset;
#endif
return slab_kset;
}
@@ -5694,27 +5585,6 @@ static char *create_unique_id(struct kmem_cache *s)
return name;
}
static void sysfs_slab_remove_workfn(struct work_struct *work)
{
struct kmem_cache *s =
container_of(work, struct kmem_cache, kobj_remove_work);
if (!s->kobj.state_in_sysfs)
/*
* For a memcg cache, this may be called during
* deactivation and again on shutdown. Remove only once.
* A cache is never shut down before deactivation is
* complete, so no need to worry about synchronization.
*/
goto out;
#ifdef CONFIG_MEMCG
kset_unregister(s->memcg_kset);
#endif
out:
kobject_put(&s->kobj);
}
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
@@ -5722,8 +5592,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
@@ -5760,16 +5628,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
if (err)
goto out_del_kobj;
#ifdef CONFIG_MEMCG
if (is_root_cache(s) && memcg_sysfs_enabled) {
s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
if (!s->memcg_kset) {
err = -ENOMEM;
goto out_del_kobj;
}
}
#endif
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
@@ -5783,19 +5641,6 @@ out_del_kobj:
goto out;
}
static void sysfs_slab_remove(struct kmem_cache *s)
{
if (slab_state < FULL)
/*
* Sysfs has not been setup yet so no need to remove the
* cache from sysfs.
*/
return;
kobject_get(&s->kobj);
schedule_work(&s->kobj_remove_work);
}
void sysfs_slab_unlink(struct kmem_cache *s)
{
if (slab_state >= FULL)