Merge branch 'for-linus' of git://ceph.newdream.net/git/ceph-client

* 'for-linus' of git://ceph.newdream.net/git/ceph-client:
  libceph: fix double-free of page vector
  ceph: fix 32-bit ino numbers
  libceph: force resend of osd requests if we skip an osdmap
  ceph: use kernel DNS resolver
  ceph: fix ceph_monc_init memory leak
  ceph: let the set_layout ioctl set single traits
  Revert "ceph: don't truncate dirty pages in invalidate work thread"
  ceph: replace leading spaces with tabs
  libceph: warn on msg allocation failures
  libceph: don't complain on msgpool alloc failures
  libceph: always preallocate mon connection
  libceph: create messenger with client
  ceph: document ioctls
  ceph: implement (optional) max read size
  ceph: rename rsize -> rasize
  ceph: make readpages fully async
Tento commit je obsažen v:
Linus Torvalds
2011-10-28 16:42:18 -07:00
17 změnil soubory, kde provedl 485 přidání a 257 odebrání

Zobrazit soubor

@@ -27,3 +27,17 @@ config CEPH_LIB_PRETTYDEBUG
If unsure, say N.
config CEPH_LIB_USE_DNS_RESOLVER
bool "Use in-kernel support for DNS lookup"
depends on CEPH_LIB
select DNS_RESOLVER
default n
help
If you say Y here, hostnames (e.g. monitor addresses) will
be resolved using the CONFIG_DNS_RESOLVER facility.
For information on how to use CONFIG_DNS_RESOLVER consult
Documentation/networking/dns_resolver.txt
If unsure, say N.

Zobrazit soubor

@@ -432,9 +432,12 @@ EXPORT_SYMBOL(ceph_client_id);
/*
* create a fresh client instance
*/
struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
unsigned supported_features,
unsigned required_features)
{
struct ceph_client *client;
struct ceph_entity_addr *myaddr = NULL;
int err = -ENOMEM;
client = kzalloc(sizeof(*client), GFP_KERNEL);
@@ -449,15 +452,27 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
client->auth_err = 0;
client->extra_mon_dispatch = NULL;
client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT |
supported_features;
client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT |
required_features;
client->msgr = NULL;
/* msgr */
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
client->msgr = ceph_messenger_create(myaddr,
client->supported_features,
client->required_features);
if (IS_ERR(client->msgr)) {
err = PTR_ERR(client->msgr);
goto fail;
}
client->msgr->nocrc = ceph_test_opt(client, NOCRC);
/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
goto fail;
goto fail_msgr;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
@@ -466,6 +481,8 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
fail_monc:
ceph_monc_stop(&client->monc);
fail_msgr:
ceph_messenger_destroy(client->msgr);
fail:
kfree(client);
return ERR_PTR(err);
@@ -490,8 +507,7 @@ void ceph_destroy_client(struct ceph_client *client)
ceph_debugfs_client_cleanup(client);
if (client->msgr)
ceph_messenger_destroy(client->msgr);
ceph_messenger_destroy(client->msgr);
ceph_destroy_options(client->options);
@@ -514,24 +530,9 @@ static int have_mon_and_osd_map(struct ceph_client *client)
*/
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
struct ceph_entity_addr *myaddr = NULL;
int err;
unsigned long timeout = client->options->mount_timeout * HZ;
/* initialize the messenger */
if (client->msgr == NULL) {
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
client->msgr = ceph_messenger_create(myaddr,
client->supported_features,
client->required_features);
if (IS_ERR(client->msgr)) {
client->msgr = NULL;
return PTR_ERR(client->msgr);
}
client->msgr->nocrc = ceph_test_opt(client, NOCRC);
}
/* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc);
if (err < 0)

Zobrazit soubor

@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/dns_resolver.h>
#include <net/tcp.h>
#include <linux/ceph/libceph.h>
@@ -1077,6 +1078,101 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
}
}
/*
* Unlike other *_pton function semantics, zero indicates success.
*/
static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
char delim, const char **ipend)
{
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
memset(ss, 0, sizeof(*ss));
if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
ss->ss_family = AF_INET;
return 0;
}
if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
ss->ss_family = AF_INET6;
return 0;
}
return -EINVAL;
}
/*
* Extract hostname string and resolve using kernel DNS facility.
*/
#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
static int ceph_dns_resolve_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend)
{
const char *end, *delim_p;
char *colon_p, *ip_addr = NULL;
int ip_len, ret;
/*
* The end of the hostname occurs immediately preceding the delimiter or
* the port marker (':') where the delimiter takes precedence.
*/
delim_p = memchr(name, delim, namelen);
colon_p = memchr(name, ':', namelen);
if (delim_p && colon_p)
end = delim_p < colon_p ? delim_p : colon_p;
else if (!delim_p && colon_p)
end = colon_p;
else {
end = delim_p;
if (!end) /* case: hostname:/ */
end = name + namelen;
}
if (end <= name)
return -EINVAL;
/* do dns_resolve upcall */
ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
if (ip_len > 0)
ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
else
ret = -ESRCH;
kfree(ip_addr);
*ipend = end;
pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
ret, ret ? "failed" : ceph_pr_addr(ss));
return ret;
}
#else
static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend)
{
return -EINVAL;
}
#endif
/*
* Parse a server name (IP or hostname). If a valid IP address is not found
* then try to extract a hostname to resolve using userspace DNS upcall.
*/
static int ceph_parse_server_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend)
{
int ret;
ret = ceph_pton(name, namelen, ss, delim, ipend);
if (ret)
ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
return ret;
}
/*
* Parse an ip[:port] list into an addr array. Use the default
* monitor port if a port isn't specified.
@@ -1085,15 +1181,13 @@ int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count)
{
int i;
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
const char *ipend;
struct sockaddr_storage *ss = &addr[i].in_addr;
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
char delim = ',';
@@ -1102,15 +1196,11 @@ int ceph_parse_ips(const char *c, const char *end,
p++;
}
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
delim, &ipend))
ss->ss_family = AF_INET;
else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
delim, &ipend))
ss->ss_family = AF_INET6;
else
ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
if (delim == ']') {
@@ -1155,7 +1245,7 @@ int ceph_parse_ips(const char *c, const char *end,
bad:
pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
return ret;
}
EXPORT_SYMBOL(ceph_parse_ips);
@@ -2281,7 +2371,8 @@ EXPORT_SYMBOL(ceph_con_keepalive);
* construct a new message with given type, size
* the new msg has a ref count of 1.
*/
struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail)
{
struct ceph_msg *m;
@@ -2333,7 +2424,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
m->front.iov_base = kmalloc(front_len, flags);
}
if (m->front.iov_base == NULL) {
pr_err("msg_new can't allocate %d bytes\n",
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
goto out2;
}
@@ -2348,7 +2439,14 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
out2:
ceph_msg_put(m);
out:
pr_err("msg_new can't create type %d front %d\n", type, front_len);
if (!can_fail) {
pr_err("msg_new can't create type %d front %d\n", type,
front_len);
WARN_ON(1);
} else {
dout("msg_new can't create type %d front %d\n", type,
front_len);
}
return NULL;
}
EXPORT_SYMBOL(ceph_msg_new);
@@ -2398,7 +2496,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
}
if (!msg) {
*skip = 0;
msg = ceph_msg_new(type, front_len, GFP_NOFS);
msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
if (!msg) {
pr_err("unable to allocate msg type %d len %d\n",
type, front_len);

Zobrazit soubor

@@ -116,14 +116,12 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
*/
static void __close_session(struct ceph_mon_client *monc)
{
if (monc->con) {
dout("__close_session closing mon%d\n", monc->cur_mon);
ceph_con_revoke(monc->con, monc->m_auth);
ceph_con_close(monc->con);
monc->cur_mon = -1;
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
}
dout("__close_session closing mon%d\n", monc->cur_mon);
ceph_con_revoke(monc->con, monc->m_auth);
ceph_con_close(monc->con);
monc->cur_mon = -1;
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
}
/*
@@ -302,15 +300,6 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
*/
int ceph_monc_open_session(struct ceph_mon_client *monc)
{
if (!monc->con) {
monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
if (!monc->con)
return -ENOMEM;
ceph_con_init(monc->client->msgr, monc->con);
monc->con->private = monc;
monc->con->ops = &mon_con_ops;
}
mutex_lock(&monc->mutex);
__open_session(monc);
__schedule_delayed(monc);
@@ -528,10 +517,12 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
init_completion(&req->completion);
err = -ENOMEM;
req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS);
req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS,
true);
if (!req->request)
goto out;
req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS);
req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS,
true);
if (!req->reply)
goto out;
@@ -626,10 +617,12 @@ int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op,
init_completion(&req->completion);
err = -ENOMEM;
req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS);
req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS,
true);
if (!req->request)
goto out;
req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS);
req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS,
true);
if (!req->reply)
goto out;
@@ -755,13 +748,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
if (err)
goto out;
monc->con = NULL;
/* connection */
monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
if (!monc->con)
goto out_monmap;
ceph_con_init(monc->client->msgr, monc->con);
monc->con->private = monc;
monc->con->ops = &mon_con_ops;
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
if (IS_ERR(monc->auth))
return PTR_ERR(monc->auth);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
goto out_con;
}
monc->auth->want_keys =
CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
@@ -770,19 +771,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
err = -ENOMEM;
monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
sizeof(struct ceph_mon_subscribe_ack),
GFP_NOFS);
GFP_NOFS, true);
if (!monc->m_subscribe_ack)
goto out_monmap;
goto out_auth;
monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS);
monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS,
true);
if (!monc->m_subscribe)
goto out_subscribe_ack;
monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS);
monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS,
true);
if (!monc->m_auth_reply)
goto out_subscribe;
monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS);
monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true);
monc->pending_auth = 0;
if (!monc->m_auth)
goto out_auth_reply;
@@ -808,6 +811,10 @@ out_subscribe:
ceph_msg_put(monc->m_subscribe);
out_subscribe_ack:
ceph_msg_put(monc->m_subscribe_ack);
out_auth:
ceph_auth_destroy(monc->auth);
out_con:
monc->con->ops->put(monc->con);
out_monmap:
kfree(monc->monmap);
out:
@@ -822,11 +829,11 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
mutex_lock(&monc->mutex);
__close_session(monc);
if (monc->con) {
monc->con->private = NULL;
monc->con->ops->put(monc->con);
monc->con = NULL;
}
monc->con->private = NULL;
monc->con->ops->put(monc->con);
monc->con = NULL;
mutex_unlock(&monc->mutex);
ceph_auth_destroy(monc->auth);
@@ -973,7 +980,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MON_MAP:
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
m = ceph_msg_new(type, front_len, GFP_NOFS);
m = ceph_msg_new(type, front_len, GFP_NOFS, false);
break;
}
@@ -1000,7 +1007,7 @@ static void mon_fault(struct ceph_connection *con)
if (!con->private)
goto out;
if (monc->con && !monc->hunting)
if (!monc->hunting)
pr_info("mon%d %s session lost, "
"hunting for new mon\n", monc->cur_mon,
ceph_pr_addr(&monc->con->peer_addr.in_addr));

Zobrazit soubor

@@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg;
struct ceph_msg *msg;
msg = ceph_msg_new(0, pool->front_len, gfp_mask);
msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name);
} else {
@@ -61,7 +61,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
WARN_ON(1);
/* try to alloc a fresh message */
return ceph_msg_new(0, front_len, GFP_NOFS);
return ceph_msg_new(0, front_len, GFP_NOFS, false);
}
msg = mempool_alloc(pool->pool, GFP_NOFS);

Zobrazit soubor

@@ -227,7 +227,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
else
msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
OSD_OPREPLY_FRONT_LEN, gfp_flags);
OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
if (!msg) {
ceph_osdc_put_request(req);
return NULL;
@@ -250,7 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
if (use_mempool)
msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
else
msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags);
msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true);
if (!msg) {
ceph_osdc_put_request(req);
return NULL;
@@ -943,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
* Caller should hold map_sem for read and request_mutex.
*/
static int __map_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
struct ceph_osd_request *req, int force_resend)
{
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid;
@@ -967,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc,
num = err;
}
if ((req->r_osd && req->r_osd->o_osd == o &&
if ((!force_resend &&
req->r_osd && req->r_osd->o_osd == o &&
req->r_sent >= req->r_osd->o_incarnation &&
req->r_num_pg_osds == num &&
memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
@@ -1289,18 +1290,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
*
* Caller should hold map_sem for read and request_mutex.
*/
static void kick_requests(struct ceph_osd_client *osdc)
static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
{
struct ceph_osd_request *req, *nreq;
struct rb_node *p;
int needmap = 0;
int err;
dout("kick_requests\n");
dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_osd_request, r_node);
err = __map_request(osdc, req);
err = __map_request(osdc, req, force_resend);
if (err < 0)
continue; /* error */
if (req->r_osd == NULL) {
@@ -1318,7 +1319,7 @@ static void kick_requests(struct ceph_osd_client *osdc)
r_linger_item) {
dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
err = __map_request(osdc, req);
err = __map_request(osdc, req, force_resend);
if (err == 0)
continue; /* no change and no osd was specified */
if (err < 0)
@@ -1395,7 +1396,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = newmap;
}
kick_requests(osdc);
kick_requests(osdc, 0);
reset_changed_osds(osdc);
} else {
dout("ignoring incremental map %u len %d\n",
@@ -1423,6 +1424,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
"older than our %u\n", epoch, maplen,
osdc->osdmap->epoch);
} else {
int skipped_map = 0;
dout("taking full map %u len %d\n", epoch, maplen);
newmap = osdmap_decode(&p, p+maplen);
if (IS_ERR(newmap)) {
@@ -1432,9 +1435,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
BUG_ON(!newmap);
oldmap = osdc->osdmap;
osdc->osdmap = newmap;
if (oldmap)
if (oldmap) {
if (oldmap->epoch + 1 < newmap->epoch)
skipped_map = 1;
ceph_osdmap_destroy(oldmap);
kick_requests(osdc);
}
kick_requests(osdc, skipped_map);
}
p += maplen;
nr_maps--;
@@ -1707,7 +1713,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
* the request still han't been touched yet.
*/
if (req->r_sent == 0) {
rc = __map_request(osdc, req);
rc = __map_request(osdc, req, 0);
if (rc < 0) {
if (nofail) {
dout("osdc_start_request failed map, "
@@ -2032,7 +2038,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
if (front > req->r_reply->front.iov_len) {
pr_warning("get_reply front %d > preallocated %d\n",
front, (int)req->r_reply->front.iov_len);
m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS);
m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false);
if (!m)
goto out;
ceph_msg_put(req->r_reply);
@@ -2080,7 +2086,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
switch (type) {
case CEPH_MSG_OSD_MAP:
case CEPH_MSG_WATCH_NOTIFY:
return ceph_msg_new(type, front, GFP_NOFS);
return ceph_msg_new(type, front, GFP_NOFS, false);
case CEPH_MSG_OSD_OPREPLY:
return get_reply(con, hdr, skip);
default: