Merge git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull ceph updates from Sage Weil:
 "There are some updates and cleanups to the CRUSH placement code, a bug
  fix with incremental maps, several cleanups and fixes from Josh Durgin
  in the RBD block device code, a series of cleanups and bug fixes from
  Alex Elder in the messenger code, and some miscellaneous bounds
  checking and gfp cleanups/fixes."

Fix up trivial conflicts in net/ceph/{messenger.c,osdmap.c} due to the
networking people preferring "unsigned int" over just "unsigned".

* git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (45 commits)
  libceph: fix pg_temp updates
  libceph: avoid unregistering osd request when not registered
  ceph: add auth buf in prepare_write_connect()
  ceph: rename prepare_connect_authorizer()
  ceph: return pointer from prepare_connect_authorizer()
  ceph: use info returned by get_authorizer
  ceph: have get_authorizer methods return pointers
  ceph: ensure auth ops are defined before use
  ceph: messenger: reduce args to create_authorizer
  ceph: define ceph_auth_handshake type
  ceph: messenger: check return from get_authorizer
  ceph: messenger: rework prepare_connect_authorizer()
  ceph: messenger: check prepare_write_connect() result
  ceph: don't set WRITE_PENDING too early
  ceph: drop msgr argument from prepare_write_connect()
  ceph: messenger: send banner in process_connect()
  ceph: messenger: reset connection kvec caller
  libceph: don't reset kvec in prepare_write_banner()
  ceph: ignore preferred_osd field
  ceph: fully initialize new layout
  ...
This commit is contained in:
Linus Torvalds
2012-05-30 11:17:19 -07:00
23 changed files with 373 additions and 450 deletions

View File

@@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
*/
static int ceph_auth_none_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_authorizer **a,
void **buf, size_t *len,
void **reply_buf, size_t *reply_len)
struct ceph_auth_handshake *auth)
{
struct ceph_auth_none_info *ai = ac->private;
struct ceph_none_authorizer *au = &ai->au;
@@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
dout("built authorizer len %d\n", au->buf_len);
}
*a = (struct ceph_authorizer *)au;
*buf = au->buf;
*len = au->buf_len;
*reply_buf = au->reply_buf;
*reply_len = sizeof(au->reply_buf);
auth->authorizer = (struct ceph_authorizer *) au;
auth->authorizer_buf = au->buf;
auth->authorizer_buf_len = au->buf_len;
auth->authorizer_reply_buf = au->reply_buf;
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
return 0;
bad2:

View File

@@ -526,9 +526,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
static int ceph_x_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_authorizer **a,
void **buf, size_t *len,
void **reply_buf, size_t *reply_len)
struct ceph_auth_handshake *auth)
{
struct ceph_x_authorizer *au;
struct ceph_x_ticket_handler *th;
@@ -548,11 +546,12 @@ static int ceph_x_create_authorizer(
return ret;
}
*a = (struct ceph_authorizer *)au;
*buf = au->buf->vec.iov_base;
*len = au->buf->vec.iov_len;
*reply_buf = au->reply_buf;
*reply_len = sizeof(au->reply_buf);
auth->authorizer = (struct ceph_authorizer *) au;
auth->authorizer_buf = au->buf->vec.iov_base;
auth->authorizer_buf_len = au->buf->vec.iov_len;
auth->authorizer_reply_buf = au->reply_buf;
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
return 0;
}

View File

@@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
* @b: bucket pointer
* @p: item index in bucket
*/
int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
{
if (p >= b->size)
if ((__u32)p >= b->size)
return 0;
switch (b->alg) {
@@ -37,38 +37,13 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
case CRUSH_BUCKET_LIST:
return ((struct crush_bucket_list *)b)->item_weights[p];
case CRUSH_BUCKET_TREE:
if (p & 1)
return ((struct crush_bucket_tree *)b)->node_weights[p];
return 0;
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
}
return 0;
}
/**
* crush_calc_parents - Calculate parent vectors for the given crush map.
* @map: crush_map pointer
*/
void crush_calc_parents(struct crush_map *map)
{
int i, b, c;
for (b = 0; b < map->max_buckets; b++) {
if (map->buckets[b] == NULL)
continue;
for (i = 0; i < map->buckets[b]->size; i++) {
c = map->buckets[b]->items[i];
BUG_ON(c >= map->max_devices ||
c < -map->max_buckets);
if (c >= 0)
map->device_parents[c] = map->buckets[b]->id;
else
map->bucket_parents[-1-c] = map->buckets[b]->id;
}
}
}
void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
{
kfree(b->h.perm);
@@ -87,6 +62,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
{
kfree(b->h.perm);
kfree(b->h.items);
kfree(b->node_weights);
kfree(b);
}
@@ -124,10 +101,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
*/
void crush_destroy(struct crush_map *map)
{
int b;
/* buckets */
if (map->buckets) {
__s32 b;
for (b = 0; b < map->max_buckets; b++) {
if (map->buckets[b] == NULL)
continue;
@@ -138,13 +114,12 @@ void crush_destroy(struct crush_map *map)
/* rules */
if (map->rules) {
__u32 b;
for (b = 0; b < map->max_rules; b++)
kfree(map->rules[b]);
kfree(map->rules);
}
kfree(map->bucket_parents);
kfree(map->device_parents);
kfree(map);
}

View File

@@ -33,9 +33,9 @@
* @type: storage ruleset type (user defined)
* @size: output set size
*/
int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
{
int i;
__u32 i;
for (i = 0; i < map->max_rules; i++) {
if (map->rules[i] &&
@@ -73,7 +73,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
unsigned int i, s;
/* start a new permutation if @x has changed */
if (bucket->perm_x != x || bucket->perm_n == 0) {
if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
dprintk("bucket %d new x=%d\n", bucket->id, x);
bucket->perm_x = x;
@@ -153,8 +153,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
return bucket->h.items[i];
}
BUG_ON(1);
return 0;
dprintk("bad list sums for bucket %d\n", bucket->h.id);
return bucket->h.items[0];
}
@@ -220,7 +220,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
static int bucket_straw_choose(struct crush_bucket_straw *bucket,
int x, int r)
{
int i;
__u32 i;
int high = 0;
__u64 high_draw = 0;
__u64 draw;
@@ -240,6 +240,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -254,7 +255,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
default:
BUG_ON(1);
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
@@ -263,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
{
if (weight[item] >= 0x10000)
return 0;
@@ -288,16 +289,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
* @recurse_to_leaf: true if we want one device under each item of given type
* @out2: second output vector for leaf items (if @recurse_to_leaf)
*/
static int crush_choose(struct crush_map *map,
static int crush_choose(const struct crush_map *map,
struct crush_bucket *bucket,
__u32 *weight,
const __u32 *weight,
int x, int numrep, int type,
int *out, int outpos,
int firstn, int recurse_to_leaf,
int *out2)
{
int rep;
int ftotal, flocal;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
struct crush_bucket *in = bucket;
int r;
@@ -305,7 +306,7 @@ static int crush_choose(struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
const unsigned int orig_tries = 5; /* attempts before we fall back to search */
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
@@ -326,7 +327,7 @@ static int crush_choose(struct crush_map *map,
r = rep;
if (in->alg == CRUSH_BUCKET_UNIFORM) {
/* be careful */
if (firstn || numrep >= in->size)
if (firstn || (__u32)numrep >= in->size)
/* r' = r + f_total */
r += ftotal;
else if (in->size % numrep == 0)
@@ -355,7 +356,11 @@ static int crush_choose(struct crush_map *map,
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
BUG_ON(item >= map->max_devices);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
skip_rep = 1;
break;
}
/* desired type? */
if (item < 0)
@@ -366,8 +371,12 @@ static int crush_choose(struct crush_map *map,
/* keep going? */
if (itemtype != type) {
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
if (item >= 0 ||
(-1-item) >= map->max_buckets) {
dprintk(" bad item type %d\n", type);
skip_rep = 1;
break;
}
in = map->buckets[-1-item];
retry_bucket = 1;
continue;
@@ -416,7 +425,7 @@ reject:
if (collide && flocal < 3)
/* retry locally a few times */
retry_bucket = 1;
else if (flocal < in->size + orig_tries)
else if (flocal <= in->size + orig_tries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < 20)
@@ -426,7 +435,7 @@ reject:
/* else give up */
skip_rep = 1;
dprintk(" reject %d collide %d "
"ftotal %d flocal %d\n",
"ftotal %u flocal %u\n",
reject, collide, ftotal,
flocal);
}
@@ -455,15 +464,12 @@ reject:
* @x: hash input
* @result: pointer to result vector
* @result_max: maximum result size
* @force: force initial replica choice; -1 for none
*/
int crush_do_rule(struct crush_map *map,
int crush_do_rule(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
int force, __u32 *weight)
const __u32 *weight)
{
int result_len;
int force_context[CRUSH_MAX_DEPTH];
int force_pos = -1;
int a[CRUSH_MAX_SET];
int b[CRUSH_MAX_SET];
int c[CRUSH_MAX_SET];
@@ -474,66 +480,44 @@ int crush_do_rule(struct crush_map *map,
int osize;
int *tmp;
struct crush_rule *rule;
int step;
__u32 step;
int i, j;
int numrep;
int firstn;
BUG_ON(ruleno >= map->max_rules);
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
return 0;
}
rule = map->rules[ruleno];
result_len = 0;
w = a;
o = b;
/*
* determine hierarchical context of force, if any. note
* that this may or may not correspond to the specific types
* referenced by the crush rule.
*/
if (force >= 0 &&
force < map->max_devices &&
map->device_parents[force] != 0 &&
!is_out(map, weight, force, x)) {
while (1) {
force_context[++force_pos] = force;
if (force >= 0)
force = map->device_parents[force];
else
force = map->bucket_parents[-1-force];
if (force == 0)
break;
}
}
for (step = 0; step < rule->len; step++) {
struct crush_rule_step *curstep = &rule->steps[step];
firstn = 0;
switch (rule->steps[step].op) {
switch (curstep->op) {
case CRUSH_RULE_TAKE:
w[0] = rule->steps[step].arg1;
/* find position in force_context/hierarchy */
while (force_pos >= 0 &&
force_context[force_pos] != w[0])
force_pos--;
/* and move past it */
if (force_pos >= 0)
force_pos--;
w[0] = curstep->arg1;
wsize = 1;
break;
case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
case CRUSH_RULE_CHOOSE_FIRSTN:
firstn = 1;
/* fall through */
case CRUSH_RULE_CHOOSE_LEAF_INDEP:
case CRUSH_RULE_CHOOSE_INDEP:
BUG_ON(wsize == 0);
if (wsize == 0)
break;
recurse_to_leaf =
rule->steps[step].op ==
curstep->op ==
CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
rule->steps[step].op ==
curstep->op ==
CRUSH_RULE_CHOOSE_LEAF_INDEP;
/* reset output */
@@ -545,32 +529,18 @@ int crush_do_rule(struct crush_map *map,
* basically, numrep <= 0 means relative to
* the provided result_max
*/
numrep = rule->steps[step].arg1;
numrep = curstep->arg1;
if (numrep <= 0) {
numrep += result_max;
if (numrep <= 0)
continue;
}
j = 0;
if (osize == 0 && force_pos >= 0) {
/* skip any intermediate types */
while (force_pos &&
force_context[force_pos] < 0 &&
rule->steps[step].arg2 !=
map->buckets[-1 -
force_context[force_pos]]->type)
force_pos--;
o[osize] = force_context[force_pos];
if (recurse_to_leaf)
c[osize] = force_context[0];
j++;
force_pos--;
}
osize += crush_choose(map,
map->buckets[-1-w[i]],
weight,
x, numrep,
rule->steps[step].arg2,
curstep->arg2,
o+osize, j,
firstn,
recurse_to_leaf, c+osize);
@@ -597,7 +567,9 @@ int crush_do_rule(struct crush_map *map,
break;
default:
BUG_ON(1);
dprintk(" unknown op %d at step %d\n",
curstep->op, step);
break;
}
}
return result_len;

View File

@@ -653,54 +653,57 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
static int prepare_connect_authorizer(struct ceph_connection *con)
static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
int *auth_proto)
{
void *auth_buf;
int auth_len = 0;
int auth_protocol = 0;
struct ceph_auth_handshake *auth;
if (!con->ops->get_authorizer) {
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
return NULL;
}
/* Can't hold the mutex while getting authorizer */
mutex_unlock(&con->mutex);
if (con->ops->get_authorizer)
con->ops->get_authorizer(con, &auth_buf, &auth_len,
&auth_protocol, &con->auth_reply_buf,
&con->auth_reply_buf_len,
con->auth_retry);
auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
mutex_lock(&con->mutex);
if (test_bit(CLOSED, &con->state) ||
test_bit(OPENING, &con->state))
return -EAGAIN;
if (IS_ERR(auth))
return auth;
if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
return ERR_PTR(-EAGAIN);
con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
con->out_connect.authorizer_len = cpu_to_le32(auth_len);
con->auth_reply_buf = auth->authorizer_reply_buf;
con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
if (auth_len)
ceph_con_out_kvec_add(con, auth_len, auth_buf);
return 0;
return auth;
}
/*
* We connected to a peer and are saying hello.
*/
static void prepare_write_banner(struct ceph_messenger *msgr,
struct ceph_connection *con)
static void prepare_write_banner(struct ceph_connection *con)
{
ceph_con_out_kvec_reset(con);
ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr),
&msgr->my_enc_addr);
ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
&con->msgr->my_enc_addr);
con->out_more = 0;
set_bit(WRITE_PENDING, &con->state);
}
static int prepare_write_connect(struct ceph_messenger *msgr,
struct ceph_connection *con,
int include_banner)
static int prepare_write_connect(struct ceph_connection *con)
{
unsigned int global_seq = get_global_seq(con->msgr, 0);
int proto;
int auth_proto;
struct ceph_auth_handshake *auth;
switch (con->peer_name.type) {
case CEPH_ENTITY_TYPE_MON:
@@ -719,23 +722,32 @@ static int prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
con->out_connect.features = cpu_to_le64(msgr->supported_features);
con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
con->out_connect.protocol_version = cpu_to_le32(proto);
con->out_connect.flags = 0;
if (include_banner)
prepare_write_banner(msgr, con);
else
ceph_con_out_kvec_reset(con);
ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect);
auth_proto = CEPH_AUTH_UNKNOWN;
auth = get_connect_authorizer(con, &auth_proto);
if (IS_ERR(auth))
return PTR_ERR(auth);
con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
con->out_connect.authorizer_len = auth ?
cpu_to_le32(auth->authorizer_buf_len) : 0;
ceph_con_out_kvec_add(con, sizeof (con->out_connect),
&con->out_connect);
if (auth && auth->authorizer_buf_len)
ceph_con_out_kvec_add(con, auth->authorizer_buf_len,
auth->authorizer_buf);
con->out_more = 0;
set_bit(WRITE_PENDING, &con->state);
return prepare_connect_authorizer(con);
return 0;
}
/*
@@ -992,11 +1004,10 @@ static int prepare_read_message(struct ceph_connection *con)
static int read_partial(struct ceph_connection *con,
int *to, int size, void *object)
int end, int size, void *object)
{
*to += size;
while (con->in_base_pos < *to) {
int left = *to - con->in_base_pos;
while (con->in_base_pos < end) {
int left = end - con->in_base_pos;
int have = size - left;
int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
if (ret <= 0)
@@ -1012,37 +1023,52 @@ static int read_partial(struct ceph_connection *con,
*/
static int read_partial_banner(struct ceph_connection *con)
{
int ret, to = 0;
int size;
int end;
int ret;
dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
/* peer's banner */
ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
size = strlen(CEPH_BANNER);
end = size;
ret = read_partial(con, end, size, con->in_banner);
if (ret <= 0)
goto out;
ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
&con->actual_peer_addr);
size = sizeof (con->actual_peer_addr);
end += size;
ret = read_partial(con, end, size, &con->actual_peer_addr);
if (ret <= 0)
goto out;
ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
&con->peer_addr_for_me);
size = sizeof (con->peer_addr_for_me);
end += size;
ret = read_partial(con, end, size, &con->peer_addr_for_me);
if (ret <= 0)
goto out;
out:
return ret;
}
static int read_partial_connect(struct ceph_connection *con)
{
int ret, to = 0;
int size;
int end;
int ret;
dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
size = sizeof (con->in_reply);
end = size;
ret = read_partial(con, end, size, &con->in_reply);
if (ret <= 0)
goto out;
ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
con->auth_reply_buf);
size = le32_to_cpu(con->in_reply.authorizer_len);
end += size;
ret = read_partial(con, end, size, con->auth_reply_buf);
if (ret <= 0)
goto out;
@@ -1377,7 +1403,8 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
ret = prepare_write_connect(con->msgr, con, 0);
ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
prepare_read_connect(con);
@@ -1397,7 +1424,10 @@ static int process_connect(struct ceph_connection *con)
ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr));
reset_connection(con);
prepare_write_connect(con->msgr, con, 0);
ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
prepare_read_connect(con);
/* Tell ceph about it. */
@@ -1420,7 +1450,10 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->out_connect.connect_seq),
le32_to_cpu(con->in_connect.connect_seq));
con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
prepare_write_connect(con->msgr, con, 0);
ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
prepare_read_connect(con);
break;
@@ -1434,7 +1467,10 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_connect.global_seq));
get_global_seq(con->msgr,
le32_to_cpu(con->in_connect.global_seq));
prepare_write_connect(con->msgr, con, 0);
ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
prepare_read_connect(con);
break;
@@ -1491,10 +1527,10 @@ static int process_connect(struct ceph_connection *con)
*/
static int read_partial_ack(struct ceph_connection *con)
{
int to = 0;
int size = sizeof (con->in_temp_ack);
int end = size;
return read_partial(con, &to, sizeof(con->in_temp_ack),
&con->in_temp_ack);
return read_partial(con, end, size, &con->in_temp_ack);
}
@@ -1627,8 +1663,9 @@ static int read_partial_message_bio(struct ceph_connection *con,
static int read_partial_message(struct ceph_connection *con)
{
struct ceph_msg *m = con->in_msg;
int size;
int end;
int ret;
int to, left;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
int skip;
@@ -1638,15 +1675,11 @@ static int read_partial_message(struct ceph_connection *con)
dout("read_partial_message con %p msg %p\n", con, m);
/* header */
while (con->in_base_pos < sizeof(con->in_hdr)) {
left = sizeof(con->in_hdr) - con->in_base_pos;
ret = ceph_tcp_recvmsg(con->sock,
(char *)&con->in_hdr + con->in_base_pos,
left);
if (ret <= 0)
return ret;
con->in_base_pos += ret;
}
size = sizeof (con->in_hdr);
end = size;
ret = read_partial(con, end, size, &con->in_hdr);
if (ret <= 0)
return ret;
crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
if (cpu_to_le32(crc) != con->in_hdr.crc) {
@@ -1759,16 +1792,12 @@ static int read_partial_message(struct ceph_connection *con)
}
/* footer */
to = sizeof(m->hdr) + sizeof(m->footer);
while (con->in_base_pos < to) {
left = to - con->in_base_pos;
ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
(con->in_base_pos - sizeof(m->hdr)),
left);
if (ret <= 0)
return ret;
con->in_base_pos += ret;
}
size = sizeof (m->footer);
end += size;
ret = read_partial(con, end, size, &m->footer);
if (ret <= 0)
return ret;
dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
m, front_len, m->footer.front_crc, middle_len,
m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -1835,7 +1864,6 @@ static void process_message(struct ceph_connection *con)
*/
static int try_write(struct ceph_connection *con)
{
struct ceph_messenger *msgr = con->msgr;
int ret = 1;
dout("try_write start %p state %lu nref %d\n", con, con->state,
@@ -1846,7 +1874,11 @@ more:
/* open the socket first? */
if (con->sock == NULL) {
prepare_write_connect(msgr, con, 1);
ceph_con_out_kvec_reset(con);
prepare_write_banner(con);
ret = prepare_write_connect(con);
if (ret < 0)
goto out;
prepare_read_banner(con);
set_bit(CONNECTING, &con->state);
clear_bit(NEGOTIATING, &con->state);

View File

@@ -278,7 +278,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
{
dst->op = cpu_to_le16(src->op);
switch (dst->op) {
switch (src->op) {
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
dst->extent.offset =
@@ -664,11 +664,11 @@ static void put_osd(struct ceph_osd *osd)
{
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
if (atomic_dec_and_test(&osd->o_ref)) {
if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
if (osd->o_authorizer)
ac->ops->destroy_authorizer(ac, osd->o_authorizer);
if (ac->ops && ac->ops->destroy_authorizer)
ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -841,6 +841,12 @@ static void register_request(struct ceph_osd_client *osdc,
static void __unregister_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
if (RB_EMPTY_NODE(&req->r_node)) {
dout("__unregister_request %p tid %lld not registered\n",
req, req->r_tid);
return;
}
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &osdc->requests);
osdc->num_requests--;
@@ -2108,37 +2114,32 @@ static void put_osd_con(struct ceph_connection *con)
/*
* authentication
*/
static int get_authorizer(struct ceph_connection *con,
void **buf, int *len, int *proto,
void **reply_buf, int *reply_len, int force_new)
/*
* Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it.
*/
static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
int *proto, int force_new)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
int ret = 0;
struct ceph_auth_handshake *auth = &o->o_auth;
if (force_new && o->o_authorizer) {
ac->ops->destroy_authorizer(ac, o->o_authorizer);
o->o_authorizer = NULL;
if (force_new && auth->authorizer) {
if (ac->ops && ac->ops->destroy_authorizer)
ac->ops->destroy_authorizer(ac, auth->authorizer);
auth->authorizer = NULL;
}
if (o->o_authorizer == NULL) {
ret = ac->ops->create_authorizer(
ac, CEPH_ENTITY_TYPE_OSD,
&o->o_authorizer,
&o->o_authorizer_buf,
&o->o_authorizer_buf_len,
&o->o_authorizer_reply_buf,
&o->o_authorizer_reply_buf_len);
if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ret;
return ERR_PTR(ret);
}
*proto = ac->protocol;
*buf = o->o_authorizer_buf;
*len = o->o_authorizer_buf_len;
*reply_buf = o->o_authorizer_reply_buf;
*reply_len = o->o_authorizer_reply_buf_len;
return 0;
return auth;
}
@@ -2148,7 +2149,11 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
/*
* XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
* XXX which do we do: succeed or fail?
*/
return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
}
static int invalidate_authorizer(struct ceph_connection *con)
@@ -2157,7 +2162,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
if (ac->ops->invalidate_authorizer)
if (ac->ops && ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
return ceph_monc_validate_auth(&osdc->client->monc);

View File

@@ -161,13 +161,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
c->max_rules = ceph_decode_32(p);
c->max_devices = ceph_decode_32(p);
c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
if (c->device_parents == NULL)
goto badmem;
c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
if (c->bucket_parents == NULL)
goto badmem;
c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
if (c->buckets == NULL)
goto badmem;
@@ -890,8 +883,12 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
pglen = ceph_decode_32(p);
if (pglen) {
/* insert */
ceph_decode_need(p, end, pglen*sizeof(u32), bad);
/* removing existing (if any) */
(void) __remove_pg_mapping(&map->pg_temp, pgid);
/* insert */
pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
if (!pg) {
err = -ENOMEM;
@@ -1000,7 +997,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
{
unsigned int num, num_mask;
struct ceph_pg pgid;
s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
int poolid = le32_to_cpu(fl->fl_pg_pool);
struct ceph_pg_pool_info *pool;
unsigned int ps;
@@ -1011,23 +1007,13 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
if (!pool)
return -EIO;
ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
if (preferred >= 0) {
ps += preferred;
num = le32_to_cpu(pool->v.lpg_num);
num_mask = pool->lpg_num_mask;
} else {
num = le32_to_cpu(pool->v.pg_num);
num_mask = pool->pg_num_mask;
}
num = le32_to_cpu(pool->v.pg_num);
num_mask = pool->pg_num_mask;
pgid.ps = cpu_to_le16(ps);
pgid.preferred = cpu_to_le16(preferred);
pgid.preferred = cpu_to_le16(-1);
pgid.pool = fl->fl_pg_pool;
if (preferred >= 0)
dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
(int)preferred);
else
dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
ol->ol_pgid = pgid;
ol->ol_stripe_unit = fl->fl_object_stripe_unit;
@@ -1045,24 +1031,18 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
struct ceph_pg_mapping *pg;
struct ceph_pg_pool_info *pool;
int ruleno;
unsigned int poolid, ps, pps, t;
int preferred;
unsigned int poolid, ps, pps, t, r;
poolid = le32_to_cpu(pgid.pool);
ps = le16_to_cpu(pgid.ps);
preferred = (s16)le16_to_cpu(pgid.preferred);
pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
if (!pool)
return NULL;
/* pg_temp? */
if (preferred >= 0)
t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
pool->lpgp_num_mask);
else
t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
pool->pgp_num_mask);
t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
pool->pgp_num_mask);
pgid.ps = cpu_to_le16(t);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
@@ -1080,23 +1060,20 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
return NULL;
}
/* don't forcefeed bad device ids to crush */
if (preferred >= osdmap->max_osd ||
preferred >= osdmap->crush->max_devices)
preferred = -1;
if (preferred >= 0)
pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.lpgp_num),
pool->lpgp_num_mask);
else
pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.pgp_num),
pool->pgp_num_mask);
pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.pgp_num),
pool->pgp_num_mask);
pps += poolid;
*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
min_t(int, pool->v.size, *num),
preferred, osdmap->osd_weight);
r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
min_t(int, pool->v.size, *num),
osdmap->osd_weight);
if (r < 0) {
pr_err("error %d from crush rule: pool %d ruleset %d type %d"
" size %d\n", r, poolid, pool->v.crush_ruleset,
pool->v.type, pool->v.size);
return NULL;
}
*num = r;
return osds;
}