crush: add chooseleaf_stable tunable
Add a tunable to fix the bug that chooseleaf may cause unnecessary pg migrations when some device fails. Reflects ceph.git commit fdb3f664448e80d984470f32f04e2e6f03ab52ec. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
|
||||
* @local_retries: localized retries
|
||||
* @local_fallback_retries: localized fallback retries
|
||||
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
|
||||
* @stable: stable mode starts rep=0 in the recursive call for all replicas
|
||||
* @vary_r: pass r to recursive calls
|
||||
* @out2: second output vector for leaf items (if @recurse_to_leaf)
|
||||
* @parent_r: r value passed from the parent
|
||||
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||
unsigned int local_fallback_retries,
|
||||
int recurse_to_leaf,
|
||||
unsigned int vary_r,
|
||||
unsigned int stable,
|
||||
int *out2,
|
||||
int parent_r)
|
||||
{
|
||||
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||
int collide, reject;
|
||||
int count = out_size;
|
||||
|
||||
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
|
||||
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
|
||||
recurse_to_leaf ? "_LEAF" : "",
|
||||
bucket->id, x, outpos, numrep,
|
||||
tries, recurse_tries, local_retries, local_fallback_retries,
|
||||
parent_r);
|
||||
parent_r, stable);
|
||||
|
||||
for (rep = outpos; rep < numrep && count > 0 ; rep++) {
|
||||
for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
|
||||
/* keep trying until we get a non-out, non-colliding item */
|
||||
ftotal = 0;
|
||||
skip_rep = 0;
|
||||
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||
if (crush_choose_firstn(map,
|
||||
map->buckets[-1-item],
|
||||
weight, weight_max,
|
||||
x, outpos+1, 0,
|
||||
x, stable ? 1 : outpos+1, 0,
|
||||
out2, outpos, count,
|
||||
recurse_tries, 0,
|
||||
local_retries,
|
||||
local_fallback_retries,
|
||||
0,
|
||||
vary_r,
|
||||
stable,
|
||||
NULL,
|
||||
sub_r) <= outpos)
|
||||
/* didn't get leaf */
|
||||
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||
int choose_local_fallback_retries = map->choose_local_fallback_tries;
|
||||
|
||||
int vary_r = map->chooseleaf_vary_r;
|
||||
int stable = map->chooseleaf_stable;
|
||||
|
||||
if ((__u32)ruleno >= map->max_rules) {
|
||||
dprintk(" bad ruleno %d\n", ruleno);
|
||||
@@ -870,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
|
||||
vary_r = curstep->arg1;
|
||||
break;
|
||||
|
||||
case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
|
||||
if (curstep->arg1 >= 0)
|
||||
stable = curstep->arg1;
|
||||
break;
|
||||
|
||||
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
|
||||
case CRUSH_RULE_CHOOSE_FIRSTN:
|
||||
firstn = 1;
|
||||
@@ -932,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||
choose_local_fallback_retries,
|
||||
recurse_to_leaf,
|
||||
vary_r,
|
||||
stable,
|
||||
c+osize,
|
||||
0);
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user