mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations controlled by that mempolicy. As the per-node zonelist is already being filtered based on a zone id, this patch adds a version of __alloc_pages() that takes a nodemask for further filtering. This eliminates the need for MPOL_BIND to create a custom zonelist. A positive benefit of this is that allocations using MPOL_BIND now use the local node's distance-ordered zonelist instead of a custom node-id-ordered zonelist. I.e., pages will be allocated from the closest allowed node with available memory. [Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
dd1a239f6f
commit
19770b3260
@@ -26,7 +26,7 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
||||
#define cpuset_current_mems_allowed (current->mems_allowed)
|
||||
void cpuset_init_current_mems_allowed(void);
|
||||
void cpuset_update_task_memory_state(void);
|
||||
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
|
||||
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
|
||||
|
||||
extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
|
||||
extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
|
||||
@@ -103,7 +103,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
|
||||
static inline void cpuset_init_current_mems_allowed(void) {}
|
||||
static inline void cpuset_update_task_memory_state(void) {}
|
||||
|
||||
static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
|
||||
static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
@@ -182,6 +182,10 @@ static inline void arch_alloc_page(struct page *page, int order) { }
|
||||
|
||||
extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
|
||||
|
||||
extern struct page *
|
||||
__alloc_pages_nodemask(gfp_t, unsigned int,
|
||||
struct zonelist *, nodemask_t *nodemask);
|
||||
|
||||
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
|
||||
unsigned int order)
|
||||
{
|
||||
|
@@ -54,19 +54,20 @@ struct mm_struct;
|
||||
* mmap_sem.
|
||||
*
|
||||
* Freeing policy:
|
||||
* When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
|
||||
* All other policies don't have any external state. mpol_free() handles this.
|
||||
* Mempolicy objects are reference counted. A mempolicy will be freed when
|
||||
* mpol_free() decrements the reference count to zero.
|
||||
*
|
||||
* Copying policy objects:
|
||||
* For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this.
|
||||
* mpol_copy() allocates a new mempolicy and copies the specified mempolicy
|
||||
* to the new storage. The reference count of the new object is initialized
|
||||
* to 1, representing the caller of mpol_copy().
|
||||
*/
|
||||
struct mempolicy {
|
||||
atomic_t refcnt;
|
||||
short policy; /* See MPOL_* above */
|
||||
union {
|
||||
struct zonelist *zonelist; /* bind */
|
||||
short preferred_node; /* preferred */
|
||||
nodemask_t nodes; /* interleave */
|
||||
nodemask_t nodes; /* interleave/bind */
|
||||
/* undefined for default */
|
||||
} v;
|
||||
nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
|
||||
@@ -151,7 +152,8 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
|
||||
|
||||
extern struct mempolicy default_policy;
|
||||
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
||||
unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
|
||||
unsigned long addr, gfp_t gfp_flags,
|
||||
struct mempolicy **mpol, nodemask_t **nodemask);
|
||||
extern unsigned slab_node(struct mempolicy *policy);
|
||||
|
||||
extern enum zone_type policy_zone;
|
||||
@@ -239,8 +241,11 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
|
||||
}
|
||||
|
||||
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
||||
unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
|
||||
unsigned long addr, gfp_t gfp_flags,
|
||||
struct mempolicy **mpol, nodemask_t **nodemask)
|
||||
{
|
||||
*mpol = NULL;
|
||||
*nodemask = NULL;
|
||||
return node_zonelist(0, gfp_flags);
|
||||
}
|
||||
|
||||
|
@@ -749,36 +749,60 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
|
||||
#endif /* CONFIG_NUMA */
|
||||
}
|
||||
|
||||
static inline void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
|
||||
{
|
||||
zoneref->zone = zone;
|
||||
zoneref->zone_idx = zone_idx(zone);
|
||||
}
|
||||
/**
|
||||
* next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
|
||||
* @z - The cursor used as a starting point for the search
|
||||
* @highest_zoneidx - The zone index of the highest zone to return
|
||||
* @nodes - An optional nodemask to filter the zonelist with
|
||||
* @zone - The first suitable zone found is returned via this parameter
|
||||
*
|
||||
* This function returns the next zone at or below a given zone index that is
|
||||
* within the allowed nodemask using a cursor as the starting point for the
|
||||
* search. The zoneref returned is a cursor that is used as the next starting
|
||||
* point for future calls to next_zones_zonelist().
|
||||
*/
|
||||
struct zoneref *next_zones_zonelist(struct zoneref *z,
|
||||
enum zone_type highest_zoneidx,
|
||||
nodemask_t *nodes,
|
||||
struct zone **zone);
|
||||
|
||||
/* Returns the first zone at or below highest_zoneidx in a zonelist */
|
||||
/**
|
||||
* first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
|
||||
* @zonelist - The zonelist to search for a suitable zone
|
||||
* @highest_zoneidx - The zone index of the highest zone to return
|
||||
* @nodes - An optional nodemask to filter the zonelist with
|
||||
* @zone - The first suitable zone found is returned via this parameter
|
||||
*
|
||||
* This function returns the first zone at or below a given zone index that is
|
||||
* within the allowed nodemask. The zoneref returned is a cursor that can be
|
||||
* used to iterate the zonelist with next_zones_zonelist. The cursor should
|
||||
* not be used by the caller as it does not match the value of the zone
|
||||
* returned.
|
||||
*/
|
||||
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
|
||||
enum zone_type highest_zoneidx)
|
||||
enum zone_type highest_zoneidx,
|
||||
nodemask_t *nodes,
|
||||
struct zone **zone)
|
||||
{
|
||||
struct zoneref *z;
|
||||
|
||||
/* Find the first suitable zone to use for the allocation */
|
||||
z = zonelist->_zonerefs;
|
||||
while (zonelist_zone_idx(z) > highest_zoneidx)
|
||||
z++;
|
||||
|
||||
return z;
|
||||
return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes,
|
||||
zone);
|
||||
}
|
||||
|
||||
/* Returns the next zone at or below highest_zoneidx in a zonelist */
|
||||
static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
|
||||
enum zone_type highest_zoneidx)
|
||||
{
|
||||
/* Find the next suitable zone to use for the allocation */
|
||||
while (zonelist_zone_idx(z) > highest_zoneidx)
|
||||
z++;
|
||||
|
||||
return z;
|
||||
}
|
||||
/**
|
||||
* for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
|
||||
* @zone - The current zone in the iterator
|
||||
* @z - The current pointer within zonelist->zones being iterated
|
||||
* @zlist - The zonelist being iterated
|
||||
* @highidx - The zone index of the highest zone to return
|
||||
* @nodemask - Nodemask allowed by the allocator
|
||||
*
|
||||
* This iterator iterates though all zones at or below a given zone index and
|
||||
* within a given nodemask
|
||||
*/
|
||||
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
|
||||
for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
|
||||
zone; \
|
||||
z = next_zones_zonelist(z, highidx, nodemask, &zone)) \
|
||||
|
||||
/**
|
||||
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
|
||||
@@ -790,11 +814,7 @@ static inline struct zoneref *next_zones_zonelist(struct zoneref *z,
|
||||
* This iterator iterates though all zones at or below a given zone index.
|
||||
*/
|
||||
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
|
||||
for (z = first_zones_zonelist(zlist, highidx), \
|
||||
zone = zonelist_zone(z++); \
|
||||
zone; \
|
||||
z = next_zones_zonelist(z, highidx), \
|
||||
zone = zonelist_zone(z++))
|
||||
for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM
|
||||
#include <asm/sparsemem.h>
|
||||
|
Reference in New Issue
Block a user