xarray: Replace exceptional entries
Introduce xarray value entries and tagged pointers to replace radix tree exceptional entries. This is a slight change in encoding to allow the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a value entry). It is also a change in emphasis; exceptional entries are intimidating and different. As the comment explains, you can choose to store values or pointers in the xarray and they are both first-class citizens. Signed-off-by: Matthew Wilcox <willy@infradead.org> Reviewed-by: Josef Bacik <jbacik@fb.com>
This commit is contained in:
10
mm/filemap.c
10
mm/filemap.c
@@ -127,7 +127,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
|
||||
|
||||
p = radix_tree_deref_slot_protected(slot,
|
||||
&mapping->i_pages.xa_lock);
|
||||
if (!radix_tree_exceptional_entry(p))
|
||||
if (!xa_is_value(p))
|
||||
return -EEXIST;
|
||||
|
||||
mapping->nrexceptional--;
|
||||
@@ -336,7 +336,7 @@ page_cache_tree_delete_batch(struct address_space *mapping,
|
||||
break;
|
||||
page = radix_tree_deref_slot_protected(slot,
|
||||
&mapping->i_pages.xa_lock);
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
if (!tail_pages) {
|
||||
/*
|
||||
@@ -1355,7 +1355,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
|
||||
struct page *page;
|
||||
|
||||
page = radix_tree_lookup(&mapping->i_pages, index);
|
||||
if (!page || radix_tree_exceptional_entry(page))
|
||||
if (!page || xa_is_value(page))
|
||||
break;
|
||||
index++;
|
||||
if (index == 0)
|
||||
@@ -1396,7 +1396,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
|
||||
struct page *page;
|
||||
|
||||
page = radix_tree_lookup(&mapping->i_pages, index);
|
||||
if (!page || radix_tree_exceptional_entry(page))
|
||||
if (!page || xa_is_value(page))
|
||||
break;
|
||||
index--;
|
||||
if (index == ULONG_MAX)
|
||||
@@ -1539,7 +1539,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
|
||||
|
||||
repeat:
|
||||
page = find_get_entry(mapping, offset);
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
if (xa_is_value(page))
|
||||
page = NULL;
|
||||
if (!page)
|
||||
goto no_page;
|
||||
|
@@ -1369,7 +1369,7 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
|
||||
page = radix_tree_deref_slot_protected(slot,
|
||||
&mapping->i_pages.xa_lock);
|
||||
if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
|
||||
if (xa_is_value(page) || !PageUptodate(page)) {
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
/* swap in or instantiate fallocated page */
|
||||
if (shmem_getpage(mapping->host, index, &page,
|
||||
|
@@ -251,7 +251,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
|
||||
index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
|
||||
|
||||
page = find_get_entry(mapping, index);
|
||||
if (!radix_tree_exceptional_entry(page)) {
|
||||
if (!xa_is_value(page)) {
|
||||
if (page)
|
||||
put_page(page);
|
||||
continue;
|
||||
|
@@ -4750,7 +4750,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
||||
/* shmem/tmpfs may report page out on swap: account for that too. */
|
||||
if (shmem_mapping(mapping)) {
|
||||
page = find_get_entry(mapping, pgoff);
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
swp_entry_t swp = radix_to_swp_entry(page);
|
||||
if (do_memsw_account())
|
||||
*entry = swp;
|
||||
|
@@ -66,7 +66,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
|
||||
* shmem/tmpfs may return swap: account for swapcache
|
||||
* page too.
|
||||
*/
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
swp_entry_t swp = radix_to_swp_entry(page);
|
||||
page = find_get_page(swap_address_space(swp),
|
||||
swp_offset(swp));
|
||||
|
@@ -179,7 +179,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
|
||||
rcu_read_lock();
|
||||
page = radix_tree_lookup(&mapping->i_pages, page_offset);
|
||||
rcu_read_unlock();
|
||||
if (page && !radix_tree_exceptional_entry(page)) {
|
||||
if (page && !xa_is_value(page)) {
|
||||
/*
|
||||
* Page already present? Kick off the current batch of
|
||||
* contiguous pages before continuing with the next
|
||||
|
10
mm/shmem.c
10
mm/shmem.c
@@ -709,7 +709,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
if (xa_is_value(page))
|
||||
swapped++;
|
||||
|
||||
if (need_resched()) {
|
||||
@@ -824,7 +824,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||
if (index >= end)
|
||||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
if (unfalloc)
|
||||
continue;
|
||||
nr_swaps_freed += !shmem_free_swap(mapping,
|
||||
@@ -921,7 +921,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||
if (index >= end)
|
||||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
if (unfalloc)
|
||||
continue;
|
||||
if (shmem_free_swap(mapping, index, page)) {
|
||||
@@ -1643,7 +1643,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
||||
repeat:
|
||||
swap.val = 0;
|
||||
page = find_lock_entry(mapping, index);
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
swap = radix_to_swp_entry(page);
|
||||
page = NULL;
|
||||
}
|
||||
@@ -2578,7 +2578,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
|
||||
index = indices[i];
|
||||
}
|
||||
page = pvec.pages[i];
|
||||
if (page && !radix_tree_exceptional_entry(page)) {
|
||||
if (page && !xa_is_value(page)) {
|
||||
if (!PageUptodate(page))
|
||||
page = NULL;
|
||||
}
|
||||
|
@@ -965,7 +965,7 @@ void pagevec_remove_exceptionals(struct pagevec *pvec)
|
||||
|
||||
for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
if (!radix_tree_exceptional_entry(page))
|
||||
if (!xa_is_value(page))
|
||||
pvec->pages[j++] = page;
|
||||
}
|
||||
pvec->nr = j;
|
||||
|
@@ -70,7 +70,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
|
||||
return;
|
||||
|
||||
for (j = 0; j < pagevec_count(pvec); j++)
|
||||
if (radix_tree_exceptional_entry(pvec->pages[j]))
|
||||
if (xa_is_value(pvec->pages[j]))
|
||||
break;
|
||||
|
||||
if (j == pagevec_count(pvec))
|
||||
@@ -85,7 +85,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
|
||||
struct page *page = pvec->pages[i];
|
||||
pgoff_t index = indices[i];
|
||||
|
||||
if (!radix_tree_exceptional_entry(page)) {
|
||||
if (!xa_is_value(page)) {
|
||||
pvec->pages[j++] = page;
|
||||
continue;
|
||||
}
|
||||
@@ -347,7 +347,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
||||
if (index >= end)
|
||||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
|
||||
if (!trylock_page(page))
|
||||
@@ -442,7 +442,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
||||
break;
|
||||
}
|
||||
|
||||
if (radix_tree_exceptional_entry(page))
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
|
||||
lock_page(page);
|
||||
@@ -561,7 +561,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
||||
if (index > end)
|
||||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
invalidate_exceptional_entry(mapping, index,
|
||||
page);
|
||||
continue;
|
||||
@@ -692,7 +692,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
|
||||
if (index > end)
|
||||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
if (xa_is_value(page)) {
|
||||
if (!invalidate_exceptional_entry2(mapping,
|
||||
index, page))
|
||||
ret = -EBUSY;
|
||||
|
@@ -155,8 +155,8 @@
|
||||
* refault distance will immediately activate the refaulting page.
|
||||
*/
|
||||
|
||||
#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
|
||||
NODES_SHIFT + \
|
||||
#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
|
||||
NODES_SHIFT + \
|
||||
MEM_CGROUP_ID_SHIFT)
|
||||
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
|
||||
|
||||
@@ -173,20 +173,19 @@ static unsigned int bucket_order __read_mostly;
|
||||
static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
|
||||
{
|
||||
eviction >>= bucket_order;
|
||||
eviction &= EVICTION_MASK;
|
||||
eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
|
||||
eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
|
||||
eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
|
||||
|
||||
return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
|
||||
return xa_mk_value(eviction);
|
||||
}
|
||||
|
||||
static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
||||
unsigned long *evictionp)
|
||||
{
|
||||
unsigned long entry = (unsigned long)shadow;
|
||||
unsigned long entry = xa_to_value(shadow);
|
||||
int memcgid, nid;
|
||||
|
||||
entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
|
||||
nid = entry & ((1UL << NODES_SHIFT) - 1);
|
||||
entry >>= NODES_SHIFT;
|
||||
memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
|
||||
@@ -453,7 +452,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
|
||||
goto out_invalid;
|
||||
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
|
||||
if (node->slots[i]) {
|
||||
if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
|
||||
if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
|
||||
goto out_invalid;
|
||||
if (WARN_ON_ONCE(!node->exceptional))
|
||||
goto out_invalid;
|
||||
|
Reference in New Issue
Block a user