Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph updates from Sage Weil:
 "There are several patches from Ilya fixing RBD allocation lifecycle
  issues, a series adding a nocephx_sign_messages option (and associated
  bug fixes/cleanups), several patches from Zheng improving the
  (directory) fsync behavior, a big improvement in IO for direct-io
  requests when striping is enabled from Caifeng, and several other
  small fixes and cleanups"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  libceph: clear msg->con in ceph_msg_release() only
  libceph: add nocephx_sign_messages option
  libceph: stop duplicating client fields in messenger
  libceph: drop authorizer check from cephx msg signing routines
  libceph: msg signing callouts don't need con argument
  libceph: evaluate osd_req_op_data() arguments only once
  ceph: make fsync() wait unsafe requests that created/modified inode
  ceph: add request to i_unsafe_dirops when getting unsafe reply
  libceph: introduce ceph_x_authorizer_cleanup()
  ceph: don't invalidate page cache when inode is no longer used
  rbd: remove duplicate calls to rbd_dev_mapping_clear()
  rbd: set device_type::release instead of device::release
  rbd: don't free rbd_dev outside of the release callback
  rbd: return -ENOMEM instead of pool id if rbd_dev_create() fails
  libceph: use local variable cursor instead of &msg->cursor
  libceph: remove con argument in handle_reply()
  ceph: combine as many iovec as possile into one OSD request
  ceph: fix message length computation
  ceph: fix a comment typo
  rbd: drop null test before destroy functions
This commit is contained in:
Linus Torvalds
2015-11-13 09:24:40 -08:00
15 changed files with 315 additions and 223 deletions

View File

@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
#include "crypto.h"
@@ -279,6 +280,15 @@ bad:
return -EINVAL;
}
static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
{
ceph_crypto_key_destroy(&au->session_key);
if (au->buf) {
ceph_buffer_put(au->buf);
au->buf = NULL;
}
}
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
@@ -297,7 +307,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ceph_crypto_key_destroy(&au->session_key);
ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
if (ret)
return ret;
goto out_au;
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
ceph_x_encrypt_buflen(ticket_blob_len);
@@ -309,8 +319,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
if (!au->buf) {
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
if (!au->buf) {
ceph_crypto_key_destroy(&au->session_key);
return -ENOMEM;
ret = -ENOMEM;
goto out_au;
}
}
au->service = th->service;
@@ -340,7 +350,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
p, end - p);
if (ret < 0)
goto out_buf;
goto out_au;
p += ret;
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
@@ -348,9 +358,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
BUG_ON(au->buf->vec.iov_len > maxlen);
return 0;
out_buf:
ceph_buffer_put(au->buf);
au->buf = NULL;
out_au:
ceph_x_authorizer_cleanup(au);
return ret;
}
@@ -624,8 +633,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
{
struct ceph_x_authorizer *au = (void *)a;
ceph_crypto_key_destroy(&au->session_key);
ceph_buffer_put(au->buf);
ceph_x_authorizer_cleanup(au);
kfree(au);
}
@@ -653,8 +661,7 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
if (xi->auth_authorizer.buf)
ceph_buffer_put(xi->auth_authorizer.buf);
ceph_x_authorizer_cleanup(&xi->auth_authorizer);
kfree(ac->private);
ac->private = NULL;
@@ -691,8 +698,10 @@ static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
struct ceph_msg *msg)
{
int ret;
if (!auth->authorizer)
if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &msg->footer.sig);
if (ret < 0)
@@ -707,8 +716,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
__le64 sig_check;
int ret;
if (!auth->authorizer)
if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN))
return 0;
ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
msg, &sig_check);
if (ret < 0)

View File

@@ -245,6 +245,8 @@ enum {
Opt_nocrc,
Opt_cephx_require_signatures,
Opt_nocephx_require_signatures,
Opt_cephx_sign_messages,
Opt_nocephx_sign_messages,
Opt_tcp_nodelay,
Opt_notcp_nodelay,
};
@@ -267,6 +269,8 @@ static match_table_t opt_tokens = {
{Opt_nocrc, "nocrc"},
{Opt_cephx_require_signatures, "cephx_require_signatures"},
{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
{Opt_cephx_sign_messages, "cephx_sign_messages"},
{Opt_nocephx_sign_messages, "nocephx_sign_messages"},
{Opt_tcp_nodelay, "tcp_nodelay"},
{Opt_notcp_nodelay, "notcp_nodelay"},
{-1, NULL}
@@ -491,6 +495,12 @@ ceph_parse_options(char *options, const char *dev_name,
case Opt_nocephx_require_signatures:
opt->flags |= CEPH_OPT_NOMSGAUTH;
break;
case Opt_cephx_sign_messages:
opt->flags &= ~CEPH_OPT_NOMSGSIGN;
break;
case Opt_nocephx_sign_messages:
opt->flags |= CEPH_OPT_NOMSGSIGN;
break;
case Opt_tcp_nodelay:
opt->flags |= CEPH_OPT_TCP_NODELAY;
@@ -534,6 +544,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "nocrc,");
if (opt->flags & CEPH_OPT_NOMSGAUTH)
seq_puts(m, "nocephx_require_signatures,");
if (opt->flags & CEPH_OPT_NOMSGSIGN)
seq_puts(m, "nocephx_sign_messages,");
if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
seq_puts(m, "notcp_nodelay,");
@@ -596,11 +608,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
ceph_messenger_init(&client->msgr, myaddr,
client->supported_features,
client->required_features,
ceph_test_opt(client, NOCRC),
ceph_test_opt(client, TCP_NODELAY));
ceph_messenger_init(&client->msgr, myaddr);
/* subsystems */
err = ceph_monc_init(&client->monc, client);

View File

@@ -16,8 +16,10 @@ struct ceph_crypto_key {
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
if (key)
if (key) {
kfree(key->key);
key->key = NULL;
}
}
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,

View File

@@ -509,7 +509,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
if (con->msgr->tcp_nodelay) {
if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
int optval = 1;
ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
@@ -637,9 +637,6 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
BUG_ON(msg->con == NULL);
msg->con->ops->put(msg->con);
msg->con = NULL;
ceph_msg_put(msg);
}
@@ -662,15 +659,14 @@ static void reset_connection(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
con->ops->put(con);
}
con->connect_seq = 0;
con->out_seq = 0;
if (con->out_msg) {
BUG_ON(con->out_msg->con != con);
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
@@ -1205,7 +1201,7 @@ static void prepare_write_message_footer(struct ceph_connection *con)
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
con->ops->sign_message(con, m);
con->ops->sign_message(m);
else
m->footer.sig = 0;
con->out_kvec[v].iov_len = sizeof(m->footer);
@@ -1432,7 +1428,8 @@ static int prepare_write_connect(struct ceph_connection *con)
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
con->out_connect.features =
cpu_to_le64(from_msgr(con->msgr)->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1527,7 +1524,7 @@ static int write_partial_message_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->out_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
bool do_datacrc = !con->msgr->nocrc;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
u32 crc;
dout("%s %p msg %p\n", __func__, con, msg);
@@ -1552,8 +1549,8 @@ static int write_partial_message_data(struct ceph_connection *con)
bool need_crc;
int ret;
page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
&last_piece);
page = ceph_msg_data_next(cursor, &page_offset, &length,
&last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
length, !last_piece);
if (ret <= 0) {
@@ -1564,7 +1561,7 @@ static int write_partial_message_data(struct ceph_connection *con)
}
if (do_datacrc && cursor->need_crc)
crc = ceph_crc32c_page(crc, page, page_offset, length);
need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
}
dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2005,8 +2002,8 @@ static int process_banner(struct ceph_connection *con)
static int process_connect(struct ceph_connection *con)
{
u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features;
u64 sup_feat = from_msgr(con->msgr)->supported_features;
u64 req_feat = from_msgr(con->msgr)->required_features;
u64 server_feat = ceph_sanitize_features(
le64_to_cpu(con->in_reply.features));
int ret;
@@ -2232,7 +2229,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor;
const bool do_datacrc = !con->msgr->nocrc;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
size_t length;
@@ -2246,8 +2243,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->resid) {
page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
NULL);
page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
if (do_datacrc)
@@ -2258,7 +2254,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = ceph_crc32c_page(crc, page, page_offset, ret);
(void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
(void) ceph_msg_data_advance(cursor, (size_t)ret);
}
if (do_datacrc)
con->in_data_crc = crc;
@@ -2278,7 +2274,7 @@ static int read_partial_message(struct ceph_connection *con)
int end;
int ret;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
u64 seq;
u32 crc;
@@ -2423,7 +2419,7 @@ static int read_partial_message(struct ceph_connection *con)
}
if (need_sign && con->ops->check_message_signature &&
con->ops->check_message_signature(con, m)) {
con->ops->check_message_signature(m)) {
pr_err("read_partial_message %p signature check failed\n", m);
return -EBADMSG;
}
@@ -2438,13 +2434,10 @@ static int read_partial_message(struct ceph_connection *con)
*/
static void process_message(struct ceph_connection *con)
{
struct ceph_msg *msg;
struct ceph_msg *msg = con->in_msg;
BUG_ON(con->in_msg->con != con);
con->in_msg->con = NULL;
msg = con->in_msg;
con->in_msg = NULL;
con->ops->put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -2677,7 +2670,7 @@ more:
if (ret <= 0) {
switch (ret) {
case -EBADMSG:
con->error_msg = "bad crc";
con->error_msg = "bad crc/signature";
/* fall through */
case -EBADE:
ret = -EIO;
@@ -2918,10 +2911,8 @@ static void con_fault(struct ceph_connection *con)
if (con->in_msg) {
BUG_ON(con->in_msg->con != con);
con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
con->ops->put(con);
}
/* Requeue anything that hasn't been acked */
@@ -2952,15 +2943,8 @@ static void con_fault(struct ceph_connection *con)
* initialize a new messenger instance
*/
void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u64 supported_features,
u64 required_features,
bool nocrc,
bool tcp_nodelay)
struct ceph_entity_addr *myaddr)
{
msgr->supported_features = supported_features;
msgr->required_features = required_features;
spin_lock_init(&msgr->global_seq_lock);
if (myaddr)
@@ -2970,8 +2954,6 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
msgr->nocrc = nocrc;
msgr->tcp_nodelay = tcp_nodelay;
atomic_set(&msgr->stopping, 0);
write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
@@ -2986,6 +2968,15 @@ void ceph_messenger_fini(struct ceph_messenger *msgr)
}
EXPORT_SYMBOL(ceph_messenger_fini);
static void msg_con_set(struct ceph_msg *msg, struct ceph_connection *con)
{
if (msg->con)
msg->con->ops->put(msg->con);
msg->con = con ? con->ops->get(con) : NULL;
BUG_ON(msg->con != con);
}
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
@@ -3017,9 +3008,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
return;
}
BUG_ON(msg->con != NULL);
msg->con = con->ops->get(con);
BUG_ON(msg->con == NULL);
msg_con_set(msg, con);
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
@@ -3047,16 +3036,15 @@ void ceph_msg_revoke(struct ceph_msg *msg)
{
struct ceph_connection *con = msg->con;
if (!con)
if (!con) {
dout("%s msg %p null con\n", __func__, msg);
return; /* Message not in our possession */
}
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
BUG_ON(msg->con == NULL);
msg->con->ops->put(msg->con);
msg->con = NULL;
msg->hdr.seq = 0;
ceph_msg_put(msg);
@@ -3080,16 +3068,13 @@ void ceph_msg_revoke(struct ceph_msg *msg)
*/
void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
struct ceph_connection *con;
struct ceph_connection *con = msg->con;
BUG_ON(msg == NULL);
if (!msg->con) {
if (!con) {
dout("%s msg %p null con\n", __func__, msg);
return; /* Message not in our possession */
}
con = msg->con;
mutex_lock(&con->mutex);
if (con->in_msg == msg) {
unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
@@ -3335,9 +3320,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
}
if (msg) {
BUG_ON(*skip);
msg_con_set(msg, con);
con->in_msg = msg;
con->in_msg->con = con->ops->get(con);
BUG_ON(con->in_msg->con == NULL);
} else {
/*
* Null message pointer means either we should skip
@@ -3384,6 +3368,8 @@ static void ceph_msg_release(struct kref *kref)
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
msg_con_set(m, NULL);
/* drop middle, data, if any */
if (m->middle) {
ceph_buffer_put(m->middle);

View File

@@ -120,11 +120,13 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
}
#endif /* CONFIG_BLOCK */
#define osd_req_op_data(oreq, whch, typ, fld) \
({ \
BUG_ON(whch >= (oreq)->r_num_ops); \
&(oreq)->r_ops[whch].typ.fld; \
})
#define osd_req_op_data(oreq, whch, typ, fld) \
({ \
struct ceph_osd_request *__oreq = (oreq); \
unsigned int __whch = (whch); \
BUG_ON(__whch >= __oreq->r_num_ops); \
&__oreq->r_ops[__whch].typ.fld; \
})
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
@@ -1750,8 +1752,7 @@ static void complete_request(struct ceph_osd_request *req)
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
*/
static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
struct ceph_connection *con)
static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
{
void *p, *end;
struct ceph_osd_request *req;
@@ -2807,7 +2808,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_osdc_handle_map(osdc, msg);
break;
case CEPH_MSG_OSD_OPREPLY:
handle_reply(osdc, msg, con);
handle_reply(osdc, msg);
break;
case CEPH_MSG_WATCH_NOTIFY:
handle_watch_notify(osdc, msg);
@@ -2849,9 +2850,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
goto out;
}
if (req->r_reply->con)
dout("%s revoking msg %p from old con %p\n", __func__,
req->r_reply, req->r_reply->con);
ceph_msg_revoke_incoming(req->r_reply);
if (front_len > req->r_reply->front_alloc_len) {
@@ -2978,17 +2976,19 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
static int osd_sign_message(struct ceph_msg *msg)
{
struct ceph_osd *o = con->private;
struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_sign_message(auth, msg);
}
static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
static int osd_check_message_signature(struct ceph_msg *msg)
{
struct ceph_osd *o = con->private;
struct ceph_osd *o = msg->con->private;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_check_message_signature(auth, msg);
}
@@ -3000,7 +3000,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
.sign_message = sign_message,
.check_message_signature = check_message_signature,
.sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature,
.fault = osd_reset,
};