Merge branch 'akpm' (more patches from Andrew)
Merge patches from Andrew Morton: "Most of the rest of MM, plus a few dribs and drabs. I still have quite a few irritating patches left around: ones with dubious testing results, lack of review, ones which should have gone via maintainer trees but the maintainers are slack, etc. I need to be more activist in getting these things wrapped up outside the merge window, but they're such a PITA." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (48 commits) mm/vmscan.c: avoid possible deadlock caused by too_many_isolated() vmscan: comment too_many_isolated() mm/kmemleak.c: remove obsolete simple_strtoul mm/memory_hotplug.c: improve comments mm/hugetlb: create hugetlb cgroup file in hugetlb_init mm/mprotect.c: coding-style cleanups Documentation: ABI: /sys/devices/system/node/ slub: drop mutex before deleting sysfs entry memcg: add comments clarifying aspects of cache attribute propagation kmem: add slab-specific documentation about the kmem controller slub: slub-specific propagation changes slab: propagate tunable values memcg: aggregate memcg cache values in slabinfo memcg/sl[au]b: shrink dead caches memcg/sl[au]b: track all the memcg children of a kmem_cache memcg: destroy memcg caches sl[au]b: allocate objects from memcg cache sl[au]b: always get the cache from its page in kmem_cache_free() memcg: skip memcg kmem allocations in specified code regions memcg: infrastructure to match an allocation to the right cache ...
This commit is contained in:
@@ -30,6 +30,7 @@ struct vm_area_struct;
|
||||
#define ___GFP_HARDWALL 0x20000u
|
||||
#define ___GFP_THISNODE 0x40000u
|
||||
#define ___GFP_RECLAIMABLE 0x80000u
|
||||
#define ___GFP_KMEMCG 0x100000u
|
||||
#define ___GFP_NOTRACK 0x200000u
|
||||
#define ___GFP_NO_KSWAPD 0x400000u
|
||||
#define ___GFP_OTHER_NODE 0x800000u
|
||||
@@ -89,6 +90,7 @@ struct vm_area_struct;
|
||||
|
||||
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
|
||||
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
|
||||
#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
|
||||
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
|
||||
|
||||
/*
|
||||
@@ -365,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
|
||||
extern void free_hot_cold_page(struct page *page, int cold);
|
||||
extern void free_hot_cold_page_list(struct list_head *list, int cold);
|
||||
|
||||
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
|
||||
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg);
|
||||
extern int hugetlb_cgroup_file_init(int idx) __init;
|
||||
extern void hugetlb_cgroup_file_init(void) __init;
|
||||
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
struct page *newhpage);
|
||||
|
||||
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
return;
|
||||
}
|
||||
|
||||
static inline int __init hugetlb_cgroup_file_init(int idx)
|
||||
static inline void hugetlb_cgroup_file_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
|
||||
@@ -21,11 +21,14 @@
|
||||
#define _LINUX_MEMCONTROL_H
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/vm_event_item.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
struct mem_cgroup;
|
||||
struct page_cgroup;
|
||||
struct page;
|
||||
struct mm_struct;
|
||||
struct kmem_cache;
|
||||
|
||||
/* Stats that can be updated by kernel. */
|
||||
enum mem_cgroup_page_stat_item {
|
||||
@@ -414,5 +417,211 @@ static inline void sock_release_memcg(struct sock *sk)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
extern struct static_key memcg_kmem_enabled_key;
|
||||
|
||||
extern int memcg_limited_groups_array_size;
|
||||
|
||||
/*
|
||||
* Helper macro to loop through all memcg-specific caches. Callers must still
|
||||
* check if the cache is valid (it is either valid or NULL).
|
||||
* the slab_mutex must be held when looping through those caches
|
||||
*/
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return static_key_false(&memcg_kmem_enabled_key);
|
||||
}
|
||||
|
||||
/*
|
||||
* In general, we'll do everything in our power to not incur in any overhead
|
||||
* for non-memcg users for the kmem functions. Not even a function call, if we
|
||||
* can avoid it.
|
||||
*
|
||||
* Therefore, we'll inline all those functions so that in the best case, we'll
|
||||
* see that kmemcg is off for everybody and proceed quickly. If it is on,
|
||||
* we'll still do most of the flag checking inline. We check a lot of
|
||||
* conditions, but because they are pretty simple, they are expected to be
|
||||
* fast.
|
||||
*/
|
||||
bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
|
||||
int order);
|
||||
void __memcg_kmem_commit_charge(struct page *page,
|
||||
struct mem_cgroup *memcg, int order);
|
||||
void __memcg_kmem_uncharge_pages(struct page *page, int order);
|
||||
|
||||
int memcg_cache_id(struct mem_cgroup *memcg);
|
||||
int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache);
|
||||
void memcg_release_cache(struct kmem_cache *cachep);
|
||||
void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
|
||||
|
||||
int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
|
||||
void memcg_update_array_size(int num_groups);
|
||||
|
||||
struct kmem_cache *
|
||||
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
|
||||
|
||||
void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
|
||||
void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
|
||||
|
||||
/**
|
||||
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
|
||||
* @gfp: the gfp allocation flags.
|
||||
* @memcg: a pointer to the memcg this was charged against.
|
||||
* @order: allocation order.
|
||||
*
|
||||
* returns true if the memcg where the current task belongs can hold this
|
||||
* allocation.
|
||||
*
|
||||
* We return true automatically if this allocation is not to be accounted to
|
||||
* any memcg.
|
||||
*/
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return true;
|
||||
|
||||
/*
|
||||
* __GFP_NOFAIL allocations will move on even if charging is not
|
||||
* possible. Therefore we don't even try, and have this allocation
|
||||
* unaccounted. We could in theory charge it with
|
||||
* res_counter_charge_nofail, but we hope those allocations are rare,
|
||||
* and won't be worth the trouble.
|
||||
*/
|
||||
if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
|
||||
return true;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return true;
|
||||
|
||||
/* If the test is dying, just let it go. */
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return true;
|
||||
|
||||
return __memcg_kmem_newpage_charge(gfp, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_uncharge_pages: uncharge pages from memcg
|
||||
* @page: pointer to struct page being freed
|
||||
* @order: allocation order.
|
||||
*
|
||||
* there is no need to specify memcg here, since it is embedded in page_cgroup
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
__memcg_kmem_uncharge_pages(page, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_commit_charge: embeds correct memcg in a page
|
||||
* @page: pointer to struct page recently allocated
|
||||
* @memcg: the memcg structure we charged against
|
||||
* @order: allocation order.
|
||||
*
|
||||
* Needs to be called after memcg_kmem_newpage_charge, regardless of success or
|
||||
* failure of the allocation. if @page is NULL, this function will revert the
|
||||
* charges. Otherwise, it will commit the memcg given by @memcg to the
|
||||
* corresponding page_cgroup.
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled() && memcg)
|
||||
__memcg_kmem_commit_charge(page, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
|
||||
* @cachep: the original global kmem cache
|
||||
* @gfp: allocation flags.
|
||||
*
|
||||
* This function assumes that the task allocating, which determines the memcg
|
||||
* in the page allocator, belongs to the same cgroup throughout the whole
|
||||
* process. Misacounting can happen if the task calls memcg_kmem_get_cache()
|
||||
* while belonging to a cgroup, and later on changes. This is considered
|
||||
* acceptable, and should only happen upon task migration.
|
||||
*
|
||||
* Before the cache is created by the memcg core, there is also a possible
|
||||
* imbalance: the task belongs to a memcg, but the cache being allocated from
|
||||
* is the global cache, since the child cache is not yet guaranteed to be
|
||||
* ready. This case is also fine, since in this case the GFP_KMEMCG will not be
|
||||
* passed and the page allocator will not attempt any cgroup accounting.
|
||||
*/
|
||||
static __always_inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return cachep;
|
||||
if (gfp & __GFP_NOFAIL)
|
||||
return cachep;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return cachep;
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return cachep;
|
||||
|
||||
return __memcg_kmem_get_cache(cachep, gfp);
|
||||
}
|
||||
#else
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for (; NULL; )
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int memcg_cache_id(struct mem_cgroup *memcg)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_release_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
return cachep;
|
||||
}
|
||||
|
||||
static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
#endif /* _LINUX_MEMCONTROL_H */
|
||||
|
||||
|
||||
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
|
||||
*
|
||||
* these calls check for usage underflow and show a warning on the console
|
||||
* _locked call expects the counter->lock to be taken
|
||||
*
|
||||
* returns the total charges still present in @counter.
|
||||
*/
|
||||
|
||||
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
void res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
|
||||
void res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
u64 res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
/**
|
||||
* res_counter_margin - calculate chargeable space of a counter
|
||||
* @cnt: the counter
|
||||
|
||||
@@ -1597,6 +1597,7 @@ struct task_struct {
|
||||
unsigned long nr_pages; /* uncharged usage */
|
||||
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
|
||||
} memcg_batch;
|
||||
unsigned int memcg_kmem_skip_account;
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
atomic_t ptrace_bp_refcnt;
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
|
||||
/*
|
||||
* Flags to pass to kmem_cache_create().
|
||||
@@ -116,6 +118,7 @@ struct kmem_cache {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct mem_cgroup;
|
||||
/*
|
||||
* struct kmem_cache related prototypes
|
||||
*/
|
||||
@@ -125,6 +128,9 @@ int slab_is_available(void);
|
||||
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
|
||||
unsigned long,
|
||||
void (*)(void *));
|
||||
struct kmem_cache *
|
||||
kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
|
||||
unsigned long, void (*)(void *), struct kmem_cache *);
|
||||
void kmem_cache_destroy(struct kmem_cache *);
|
||||
int kmem_cache_shrink(struct kmem_cache *);
|
||||
void kmem_cache_free(struct kmem_cache *, void *);
|
||||
@@ -175,6 +181,48 @@ void kmem_cache_free(struct kmem_cache *, void *);
|
||||
#ifndef ARCH_SLAB_MINALIGN
|
||||
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
||||
#endif
|
||||
/*
|
||||
* This is the main placeholder for memcg-related information in kmem caches.
|
||||
* struct kmem_cache will hold a pointer to it, so the memory cost while
|
||||
* disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
|
||||
* would otherwise be if that would be bundled in kmem_cache: we'll need an
|
||||
* extra pointer chase. But the trade off clearly lays in favor of not
|
||||
* penalizing non-users.
|
||||
*
|
||||
* Both the root cache and the child caches will have it. For the root cache,
|
||||
* this will hold a dynamically allocated array large enough to hold
|
||||
* information about the currently limited memcgs in the system.
|
||||
*
|
||||
* Child caches will hold extra metadata needed for its operation. Fields are:
|
||||
*
|
||||
* @memcg: pointer to the memcg this cache belongs to
|
||||
* @list: list_head for the list of all caches in this memcg
|
||||
* @root_cache: pointer to the global, root cache, this cache was derived from
|
||||
* @dead: set to true after the memcg dies; the cache may still be around.
|
||||
* @nr_pages: number of pages that belongs to this cache.
|
||||
* @destroy: worker to be called whenever we are ready, or believe we may be
|
||||
* ready, to destroy this cache.
|
||||
*/
|
||||
struct memcg_cache_params {
|
||||
bool is_root_cache;
|
||||
union {
|
||||
struct kmem_cache *memcg_caches[0];
|
||||
struct {
|
||||
struct mem_cgroup *memcg;
|
||||
struct list_head list;
|
||||
struct kmem_cache *root_cache;
|
||||
bool dead;
|
||||
atomic_t nr_pages;
|
||||
struct work_struct destroy;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int memcg_update_all_caches(int num_memcgs);
|
||||
|
||||
struct seq_file;
|
||||
int cache_show(struct kmem_cache *s, struct seq_file *m);
|
||||
void print_slabinfo_header(struct seq_file *m);
|
||||
|
||||
/*
|
||||
* Common kmalloc functions provided by all allocators
|
||||
|
||||
@@ -81,6 +81,9 @@ struct kmem_cache {
|
||||
*/
|
||||
int obj_offset;
|
||||
#endif /* CONFIG_DEBUG_SLAB */
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
#endif
|
||||
|
||||
/* 6) per-cpu/per-node data, touched during every alloc/free */
|
||||
/*
|
||||
|
||||
@@ -101,6 +101,10 @@ struct kmem_cache {
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
@@ -222,7 +226,10 @@ void *__kmalloc(size_t size, gfp_t flags);
|
||||
static __always_inline void *
|
||||
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
{
|
||||
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
|
||||
void *ret;
|
||||
|
||||
flags |= (__GFP_COMP | __GFP_KMEMCG);
|
||||
ret = (void *) __get_free_pages(flags, order);
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
|
||||
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
|
||||
#endif
|
||||
|
||||
#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
|
||||
|
||||
/*
|
||||
* flag set/clear/test wrappers
|
||||
* - pass TIF_xxxx constants to these functions
|
||||
|
||||
Reference in New Issue
Block a user