dm bio prison: switch to using a red black tree

Previously it was using a fixed sized hash table.  There are times
when very many concurrent cells are held (such as when processing a very
large discard).  When this happens the hash table performance becomes
very poor.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Joe Thornber
2014-10-06 16:30:06 -04:00
committed by Mike Snitzer
parent 33096a7822
commit a195db2d29
4 changed files with 84 additions and 111 deletions

View File

@@ -14,68 +14,38 @@
/*----------------------------------------------------------------*/
struct bucket {
spinlock_t lock;
struct hlist_head cells;
};
#define MIN_CELLS 1024
struct dm_bio_prison {
spinlock_t lock;
mempool_t *cell_pool;
unsigned nr_buckets;
unsigned hash_mask;
struct bucket *buckets;
struct rb_root cells;
};
/*----------------------------------------------------------------*/
static uint32_t calc_nr_buckets(unsigned nr_cells)
{
uint32_t n = 128;
nr_cells /= 4;
nr_cells = min(nr_cells, 8192u);
while (n < nr_cells)
n <<= 1;
return n;
}
static struct kmem_cache *_cell_cache;
static void init_bucket(struct bucket *b)
{
spin_lock_init(&b->lock);
INIT_HLIST_HEAD(&b->cells);
}
/*----------------------------------------------------------------*/
/*
* @nr_cells should be the number of cells you want in use _concurrently_.
* Don't confuse it with the number of distinct keys.
*/
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
struct dm_bio_prison *dm_bio_prison_create(void)
{
unsigned i;
uint32_t nr_buckets = calc_nr_buckets(nr_cells);
size_t len = sizeof(struct dm_bio_prison) +
(sizeof(struct bucket) * nr_buckets);
struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
if (!prison)
return NULL;
prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
spin_lock_init(&prison->lock);
prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
if (!prison->cell_pool) {
kfree(prison);
return NULL;
}
prison->nr_buckets = nr_buckets;
prison->hash_mask = nr_buckets - 1;
prison->buckets = (struct bucket *) (prison + 1);
for (i = 0; i < nr_buckets; i++)
init_bucket(prison->buckets + i);
prison->cells = RB_ROOT;
return prison;
}
@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
{
const unsigned long BIG_PRIME = 4294967291UL;
uint64_t hash = key->block * BIG_PRIME;
return (uint32_t) (hash & prison->hash_mask);
}
static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
{
return (lhs->virtual == rhs->virtual) &&
(lhs->dev == rhs->dev) &&
(lhs->block == rhs->block);
}
static struct bucket *get_bucket(struct dm_bio_prison *prison,
struct dm_cell_key *key)
{
return prison->buckets + hash_key(prison, key);
}
static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
struct dm_cell_key *key)
{
struct dm_bio_prison_cell *cell;
hlist_for_each_entry(cell, &b->cells, list)
if (keys_equal(&cell->key, key))
return cell;
return NULL;
}
static void __setup_new_cell(struct bucket *b,
struct dm_cell_key *key,
static void __setup_new_cell(struct dm_cell_key *key,
struct bio *holder,
struct dm_bio_prison_cell *cell)
{
memcpy(&cell->key, key, sizeof(cell->key));
cell->holder = holder;
bio_list_init(&cell->bios);
hlist_add_head(&cell->list, &b->cells);
memcpy(&cell->key, key, sizeof(cell->key));
cell->holder = holder;
bio_list_init(&cell->bios);
}
static int __bio_detain(struct bucket *b,
static int cmp_keys(struct dm_cell_key *lhs,
struct dm_cell_key *rhs)
{
if (lhs->virtual < rhs->virtual)
return -1;
if (lhs->virtual > rhs->virtual)
return 1;
if (lhs->dev < rhs->dev)
return -1;
if (lhs->dev > rhs->dev)
return 1;
if (lhs->block < rhs->block)
return -1;
if (lhs->block > rhs->block)
return 1;
return 0;
}
static int __bio_detain(struct dm_bio_prison *prison,
struct dm_cell_key *key,
struct bio *inmate,
struct dm_bio_prison_cell *cell_prealloc,
struct dm_bio_prison_cell **cell_result)
{
struct dm_bio_prison_cell *cell;
int r;
struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
cell = __search_bucket(b, key);
if (cell) {
if (inmate)
bio_list_add(&cell->bios, inmate);
*cell_result = cell;
return 1;
while (*new) {
struct dm_bio_prison_cell *cell =
container_of(*new, struct dm_bio_prison_cell, node);
r = cmp_keys(key, &cell->key);
parent = *new;
if (r < 0)
new = &((*new)->rb_left);
else if (r > 0)
new = &((*new)->rb_right);
else {
if (inmate)
bio_list_add(&cell->bios, inmate);
*cell_result = cell;
return 1;
}
}
__setup_new_cell(b, key, inmate, cell_prealloc);
__setup_new_cell(key, inmate, cell_prealloc);
*cell_result = cell_prealloc;
rb_link_node(&cell_prealloc->node, parent, new);
rb_insert_color(&cell_prealloc->node, &prison->cells);
return 0;
}
@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison,
{
int r;
unsigned long flags;
struct bucket *b = get_bucket(prison, key);
spin_lock_irqsave(&b->lock, flags);
r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&prison->lock, flags);
return r;
}
@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell);
/*
* @inmates must have been initialised prior to this call
*/
static void __cell_release(struct dm_bio_prison_cell *cell,
static void __cell_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
hlist_del(&cell->list);
rb_erase(&cell->node, &prison->cells);
if (inmates) {
if (cell->holder)
@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct bio_list *bios)
{
unsigned long flags;
struct bucket *b = get_bucket(prison, &cell->key);
spin_lock_irqsave(&b->lock, flags);
__cell_release(cell, bios);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
__cell_release(prison, cell, bios);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
static void __cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
hlist_del(&cell->list);
rb_erase(&cell->node, &prison->cells);
bio_list_merge(inmates, &cell->bios);
}
@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct bio_list *inmates)
{
unsigned long flags;
struct bucket *b = get_bucket(prison, &cell->key);
spin_lock_irqsave(&b->lock, flags);
__cell_release_no_holder(cell, inmates);
spin_unlock_irqrestore(&b->lock, flags);
spin_lock_irqsave(&prison->lock, flags);
__cell_release_no_holder(prison, cell, inmates);
spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);