mm: filter based on a nodemask as well as a gfp_mask

The MPOL_BIND policy creates a zonelist that is used for allocations
controlled by that mempolicy.  As the per-node zonelist is already being
filtered based on a zone id, this patch adds a version of __alloc_pages() that
takes a nodemask for further filtering.  This eliminates the need for
MPOL_BIND to create a custom zonelist.

A positive benefit of this is that allocations using MPOL_BIND now use the
local node's distance-ordered zonelist instead of a custom node-id-ordered
zonelist.  I.e., pages will be allocated from the closest allowed node with
available memory.

[Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Mel Gorman
2008-04-28 02:12:18 -07:00
committed by Linus Torvalds
parent dd1a239f6f
commit 19770b3260
11 changed files with 224 additions and 191 deletions

View File

@@ -1377,7 +1377,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
* a page.
*/
static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
{
struct zoneref *z;
@@ -1388,16 +1388,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */
z = first_zones_zonelist(zonelist, high_zoneidx);
classzone_idx = zonelist_zone_idx(z);
preferred_zone = zonelist_zone(z);
(void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
&preferred_zone);
classzone_idx = zone_idx(preferred_zone);
zonelist_scan:
/*
* Scan zonelist, looking for a zone with enough free.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
for_each_zone_zonelist_nodemask(zone, z, zonelist,
high_zoneidx, nodemask) {
if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
@@ -1447,9 +1448,9 @@ try_next_zone:
/*
* This is the 'heart' of the zoned buddy allocator.
*/
struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist)
static struct page *
__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
const gfp_t wait = gfp_mask & __GFP_WAIT;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1478,7 +1479,7 @@ restart:
return NULL;
}
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
if (page)
goto got_pg;
@@ -1523,7 +1524,7 @@ restart:
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
page = get_page_from_freelist(gfp_mask, order, zonelist,
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
high_zoneidx, alloc_flags);
if (page)
goto got_pg;
@@ -1536,7 +1537,7 @@ rebalance:
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
nofail_alloc:
/* go through the zonelist yet again, ignoring mins */
page = get_page_from_freelist(gfp_mask, order,
page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
if (page)
goto got_pg;
@@ -1571,7 +1572,7 @@ nofail_alloc:
drain_all_pages();
if (likely(did_some_progress)) {
page = get_page_from_freelist(gfp_mask, order,
page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx, alloc_flags);
if (page)
goto got_pg;
@@ -1587,8 +1588,9 @@ nofail_alloc:
* a parallel oom killing, we must fail if we're still
* under heavy pressure.
*/
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
order, zonelist, high_zoneidx,
ALLOC_WMARK_HIGH|ALLOC_CPUSET);
if (page) {
clear_zonelist_oom(zonelist, gfp_mask);
goto got_pg;
@@ -1637,6 +1639,20 @@ got_pg:
return page;
}
struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
}
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
}
EXPORT_SYMBOL(__alloc_pages);
/*
@@ -1880,6 +1896,12 @@ void show_free_areas(void)
show_swap_cache_info();
}
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
zoneref->zone_idx = zone_idx(zone);
}
/*
* Builds allocation fallback zone lists.
*