mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations controlled by that mempolicy. As the per-node zonelist is already being filtered based on a zone id, this patch adds a version of __alloc_pages() that takes a nodemask for further filtering. This eliminates the need for MPOL_BIND to create a custom zonelist. A positive benefit of this is that allocations using MPOL_BIND now use the local node's distance-ordered zonelist instead of a custom node-id-ordered zonelist. I.e., pages will be allocated from the closest allowed node with available memory. [Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
dd1a239f6f
commit
19770b3260
@@ -1377,7 +1377,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
|
||||
* a page.
|
||||
*/
|
||||
static struct page *
|
||||
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
|
||||
get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
|
||||
struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
|
||||
{
|
||||
struct zoneref *z;
|
||||
@@ -1388,16 +1388,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
|
||||
int zlc_active = 0; /* set if using zonelist_cache */
|
||||
int did_zlc_setup = 0; /* just call zlc_setup() one time */
|
||||
|
||||
z = first_zones_zonelist(zonelist, high_zoneidx);
|
||||
classzone_idx = zonelist_zone_idx(z);
|
||||
preferred_zone = zonelist_zone(z);
|
||||
(void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
|
||||
&preferred_zone);
|
||||
classzone_idx = zone_idx(preferred_zone);
|
||||
|
||||
zonelist_scan:
|
||||
/*
|
||||
* Scan zonelist, looking for a zone with enough free.
|
||||
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
||||
*/
|
||||
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
|
||||
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
||||
high_zoneidx, nodemask) {
|
||||
if (NUMA_BUILD && zlc_active &&
|
||||
!zlc_zone_worth_trying(zonelist, z, allowednodes))
|
||||
continue;
|
||||
@@ -1447,9 +1448,9 @@ try_next_zone:
|
||||
/*
|
||||
* This is the 'heart' of the zoned buddy allocator.
|
||||
*/
|
||||
struct page *
|
||||
__alloc_pages(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist)
|
||||
static struct page *
|
||||
__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist, nodemask_t *nodemask)
|
||||
{
|
||||
const gfp_t wait = gfp_mask & __GFP_WAIT;
|
||||
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
|
||||
@@ -1478,7 +1479,7 @@ restart:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
||||
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
|
||||
zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
@@ -1523,7 +1524,7 @@ restart:
|
||||
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
|
||||
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
|
||||
*/
|
||||
page = get_page_from_freelist(gfp_mask, order, zonelist,
|
||||
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
|
||||
high_zoneidx, alloc_flags);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
@@ -1536,7 +1537,7 @@ rebalance:
|
||||
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
|
||||
nofail_alloc:
|
||||
/* go through the zonelist yet again, ignoring mins */
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
page = get_page_from_freelist(gfp_mask, nodemask, order,
|
||||
zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
@@ -1571,7 +1572,7 @@ nofail_alloc:
|
||||
drain_all_pages();
|
||||
|
||||
if (likely(did_some_progress)) {
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
page = get_page_from_freelist(gfp_mask, nodemask, order,
|
||||
zonelist, high_zoneidx, alloc_flags);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
@@ -1587,8 +1588,9 @@ nofail_alloc:
|
||||
* a parallel oom killing, we must fail if we're still
|
||||
* under heavy pressure.
|
||||
*/
|
||||
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
|
||||
zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
|
||||
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
|
||||
order, zonelist, high_zoneidx,
|
||||
ALLOC_WMARK_HIGH|ALLOC_CPUSET);
|
||||
if (page) {
|
||||
clear_zonelist_oom(zonelist, gfp_mask);
|
||||
goto got_pg;
|
||||
@@ -1637,6 +1639,20 @@ got_pg:
|
||||
return page;
|
||||
}
|
||||
|
||||
struct page *
|
||||
__alloc_pages(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist)
|
||||
{
|
||||
return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
|
||||
}
|
||||
|
||||
struct page *
|
||||
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist, nodemask_t *nodemask)
|
||||
{
|
||||
return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__alloc_pages);
|
||||
|
||||
/*
|
||||
@@ -1880,6 +1896,12 @@ void show_free_areas(void)
|
||||
show_swap_cache_info();
|
||||
}
|
||||
|
||||
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
|
||||
{
|
||||
zoneref->zone = zone;
|
||||
zoneref->zone_idx = zone_idx(zone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Builds allocation fallback zone lists.
|
||||
*
|
||||
|
Reference in New Issue
Block a user