Merge branch 'akpm' (more patches from Andrew)

Merge patches from Andrew Morton:
 "Most of the rest of MM, plus a few dribs and drabs.

  I still have quite a few irritating patches left around: ones with
  dubious testing results, lack of review, ones which should have gone
  via maintainer trees but the maintainers are slack, etc.

  I need to be more activist in getting these things wrapped up outside
  the merge window, but they're such a PITA."

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (48 commits)
  mm/vmscan.c: avoid possible deadlock caused by too_many_isolated()
  vmscan: comment too_many_isolated()
  mm/kmemleak.c: remove obsolete simple_strtoul
  mm/memory_hotplug.c: improve comments
  mm/hugetlb: create hugetlb cgroup file in hugetlb_init
  mm/mprotect.c: coding-style cleanups
  Documentation: ABI: /sys/devices/system/node/
  slub: drop mutex before deleting sysfs entry
  memcg: add comments clarifying aspects of cache attribute propagation
  kmem: add slab-specific documentation about the kmem controller
  slub: slub-specific propagation changes
  slab: propagate tunable values
  memcg: aggregate memcg cache values in slabinfo
  memcg/sl[au]b: shrink dead caches
  memcg/sl[au]b: track all the memcg children of a kmem_cache
  memcg: destroy memcg caches
  sl[au]b: allocate objects from memcg cache
  sl[au]b: always get the cache from its page in kmem_cache_free()
  memcg: skip memcg kmem allocations in specified code regions
  memcg: infrastructure to match an allocation to the right cache
  ...
This commit is contained in:
Linus Torvalds
2012-12-18 15:08:12 -08:00
38 changed files with 2414 additions and 211 deletions

View File

@@ -30,6 +30,7 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -89,6 +90,7 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -365,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold);
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)

View File

@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page);
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
extern int hugetlb_cgroup_file_init(int idx) __init;
extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage);
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
return;
}
static inline int __init hugetlb_cgroup_file_init(int idx)
static inline void hugetlb_cgroup_file_init(void)
{
return 0;
}
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,

View File

@@ -21,11 +21,14 @@
#define _LINUX_MEMCONTROL_H
#include <linux/cgroup.h>
#include <linux/vm_event_item.h>
#include <linux/hardirq.h>
#include <linux/jump_label.h>
struct mem_cgroup;
struct page_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
/* Stats that can be updated by kernel. */
enum mem_cgroup_page_stat_item {
@@ -414,5 +417,211 @@ static inline void sock_release_memcg(struct sock *sk)
{
}
#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
#ifdef CONFIG_MEMCG_KMEM
extern struct static_key memcg_kmem_enabled_key;
extern int memcg_limited_groups_array_size;
/*
* Helper macro to loop through all memcg-specific caches. Callers must still
* check if the cache is valid (it is either valid or NULL).
* the slab_mutex must be held when looping through those caches
*/
#define for_each_memcg_cache_index(_idx) \
for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
static inline bool memcg_kmem_enabled(void)
{
return static_key_false(&memcg_kmem_enabled_key);
}
/*
* In general, we'll do everything in our power to not incur in any overhead
* for non-memcg users for the kmem functions. Not even a function call, if we
* can avoid it.
*
* Therefore, we'll inline all those functions so that in the best case, we'll
* see that kmemcg is off for everybody and proceed quickly. If it is on,
* we'll still do most of the flag checking inline. We check a lot of
* conditions, but because they are pretty simple, they are expected to be
* fast.
*/
bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
int order);
void __memcg_kmem_commit_charge(struct page *page,
struct mem_cgroup *memcg, int order);
void __memcg_kmem_uncharge_pages(struct page *page, int order);
int memcg_cache_id(struct mem_cgroup *memcg);
int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache);
void memcg_release_cache(struct kmem_cache *cachep);
void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
void memcg_update_array_size(int num_groups);
struct kmem_cache *
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
/**
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
* @gfp: the gfp allocation flags.
* @memcg: a pointer to the memcg this was charged against.
* @order: allocation order.
*
* returns true if the memcg where the current task belongs can hold this
* allocation.
*
* We return true automatically if this allocation is not to be accounted to
* any memcg.
*/
static inline bool
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
{
if (!memcg_kmem_enabled())
return true;
/*
* __GFP_NOFAIL allocations will move on even if charging is not
* possible. Therefore we don't even try, and have this allocation
* unaccounted. We could in theory charge it with
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
/* If the test is dying, just let it go. */
if (unlikely(fatal_signal_pending(current)))
return true;
return __memcg_kmem_newpage_charge(gfp, memcg, order);
}
/**
* memcg_kmem_uncharge_pages: uncharge pages from memcg
* @page: pointer to struct page being freed
* @order: allocation order.
*
* there is no need to specify memcg here, since it is embedded in page_cgroup
*/
static inline void
memcg_kmem_uncharge_pages(struct page *page, int order)
{
if (memcg_kmem_enabled())
__memcg_kmem_uncharge_pages(page, order);
}
/**
* memcg_kmem_commit_charge: embeds correct memcg in a page
* @page: pointer to struct page recently allocated
* @memcg: the memcg structure we charged against
* @order: allocation order.
*
* Needs to be called after memcg_kmem_newpage_charge, regardless of success or
* failure of the allocation. if @page is NULL, this function will revert the
* charges. Otherwise, it will commit the memcg given by @memcg to the
* corresponding page_cgroup.
*/
static inline void
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
{
if (memcg_kmem_enabled() && memcg)
__memcg_kmem_commit_charge(page, memcg, order);
}
/**
* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
* @cachep: the original global kmem cache
* @gfp: allocation flags.
*
* This function assumes that the task allocating, which determines the memcg
* in the page allocator, belongs to the same cgroup throughout the whole
* process. Misacounting can happen if the task calls memcg_kmem_get_cache()
* while belonging to a cgroup, and later on changes. This is considered
* acceptable, and should only happen upon task migration.
*
* Before the cache is created by the memcg core, there is also a possible
* imbalance: the task belongs to a memcg, but the cache being allocated from
* is the global cache, since the child cache is not yet guaranteed to be
* ready. This case is also fine, since in this case the GFP_KMEMCG will not be
* passed and the page allocator will not attempt any cgroup accounting.
*/
static __always_inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
if (!memcg_kmem_enabled())
return cachep;
if (gfp & __GFP_NOFAIL)
return cachep;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return cachep;
if (unlikely(fatal_signal_pending(current)))
return cachep;
return __memcg_kmem_get_cache(cachep, gfp);
}
#else
#define for_each_memcg_cache_index(_idx) \
for (; NULL; )
static inline bool memcg_kmem_enabled(void)
{
return false;
}
static inline bool
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
{
return true;
}
static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
{
}
static inline void
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
{
}
static inline int memcg_cache_id(struct mem_cgroup *memcg)
{
return -1;
}
static inline int
memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache)
{
return 0;
}
static inline void memcg_release_cache(struct kmem_cache *cachep)
{
}
static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
}
static inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
return cachep;
}
static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
{
}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */

View File

@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
*
* these calls check for usage underflow and show a warning on the console
* _locked call expects the counter->lock to be taken
*
* returns the total charges still present in @counter.
*/
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
void res_counter_uncharge(struct res_counter *counter, unsigned long val);
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
void res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *top,
unsigned long val);
u64 res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *top,
unsigned long val);
/**
* res_counter_margin - calculate chargeable space of a counter
* @cnt: the counter

View File

@@ -1597,6 +1597,7 @@ struct task_struct {
unsigned long nr_pages; /* uncharged usage */
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch;
unsigned int memcg_kmem_skip_account;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;

View File

@@ -11,6 +11,8 @@
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/workqueue.h>
/*
* Flags to pass to kmem_cache_create().
@@ -116,6 +118,7 @@ struct kmem_cache {
};
#endif
struct mem_cgroup;
/*
* struct kmem_cache related prototypes
*/
@@ -125,6 +128,9 @@ int slab_is_available(void);
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
void (*)(void *));
struct kmem_cache *
kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
unsigned long, void (*)(void *), struct kmem_cache *);
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *);
@@ -175,6 +181,48 @@ void kmem_cache_free(struct kmem_cache *, void *);
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
/*
* This is the main placeholder for memcg-related information in kmem caches.
* struct kmem_cache will hold a pointer to it, so the memory cost while
* disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
* would otherwise be if that would be bundled in kmem_cache: we'll need an
* extra pointer chase. But the trade off clearly lays in favor of not
* penalizing non-users.
*
* Both the root cache and the child caches will have it. For the root cache,
* this will hold a dynamically allocated array large enough to hold
* information about the currently limited memcgs in the system.
*
* Child caches will hold extra metadata needed for its operation. Fields are:
*
* @memcg: pointer to the memcg this cache belongs to
* @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from
* @dead: set to true after the memcg dies; the cache may still be around.
* @nr_pages: number of pages that belongs to this cache.
* @destroy: worker to be called whenever we are ready, or believe we may be
* ready, to destroy this cache.
*/
struct memcg_cache_params {
bool is_root_cache;
union {
struct kmem_cache *memcg_caches[0];
struct {
struct mem_cgroup *memcg;
struct list_head list;
struct kmem_cache *root_cache;
bool dead;
atomic_t nr_pages;
struct work_struct destroy;
};
};
};
int memcg_update_all_caches(int num_memcgs);
struct seq_file;
int cache_show(struct kmem_cache *s, struct seq_file *m);
void print_slabinfo_header(struct seq_file *m);
/*
* Common kmalloc functions provided by all allocators

View File

@@ -81,6 +81,9 @@ struct kmem_cache {
*/
int obj_offset;
#endif /* CONFIG_DEBUG_SLAB */
#ifdef CONFIG_MEMCG_KMEM
struct memcg_cache_params *memcg_params;
#endif
/* 6) per-cpu/per-node data, touched during every alloc/free */
/*

View File

@@ -101,6 +101,10 @@ struct kmem_cache {
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
#endif
#ifdef CONFIG_MEMCG_KMEM
struct memcg_cache_params *memcg_params;
int max_attr_size; /* for propagation, maximum size of a stored attr */
#endif
#ifdef CONFIG_NUMA
/*
@@ -222,7 +226,10 @@ void *__kmalloc(size_t size, gfp_t flags);
static __always_inline void *
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
void *ret;
flags |= (__GFP_COMP | __GFP_KMEMCG);
ret = (void *) __get_free_pages(flags, order);
kmemleak_alloc(ret, size, 1, flags);
return ret;
}

View File

@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions