
A ceph message has a data payload portion. The memory for that data (either the source of data to send or the location to place data that is received) is specified in several ways. The ceph_msg structure includes fields for all of those ways, but this mispresents the fact that not all of them are used at a time. Specifically, the data in a message can be in: - an array of pages - a list of pages - a list of Linux bios - a second list of pages (the "trail") (The two page lists are currently only ever used for outgoing data.) Impose more structure on the ceph message, making the grouping of some of these fields explicit. Shorten the name of the "page_alignment" field. Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
265 lines
7.8 KiB
C
265 lines
7.8 KiB
C
#ifndef __FS_CEPH_MESSENGER_H
|
|
#define __FS_CEPH_MESSENGER_H
|
|
|
|
#include <linux/kref.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/net.h>
|
|
#include <linux/radix-tree.h>
|
|
#include <linux/uio.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/ceph/types.h>
|
|
#include <linux/ceph/buffer.h>
|
|
|
|
struct ceph_msg;
|
|
struct ceph_connection;
|
|
|
|
/*
|
|
* Ceph defines these callbacks for handling connection events.
|
|
*/
|
|
struct ceph_connection_operations {
|
|
struct ceph_connection *(*get)(struct ceph_connection *);
|
|
void (*put)(struct ceph_connection *);
|
|
|
|
/* handle an incoming message. */
|
|
void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
|
|
|
|
/* authorize an outgoing connection */
|
|
struct ceph_auth_handshake *(*get_authorizer) (
|
|
struct ceph_connection *con,
|
|
int *proto, int force_new);
|
|
int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
|
|
int (*invalidate_authorizer)(struct ceph_connection *con);
|
|
|
|
/* there was some error on the socket (disconnect, whatever) */
|
|
void (*fault) (struct ceph_connection *con);
|
|
|
|
/* a remote host as terminated a message exchange session, and messages
|
|
* we sent (or they tried to send us) may be lost. */
|
|
void (*peer_reset) (struct ceph_connection *con);
|
|
|
|
struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
|
|
struct ceph_msg_header *hdr,
|
|
int *skip);
|
|
};
|
|
|
|
/* use format string %s%d */
|
|
#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
|
|
|
|
struct ceph_messenger {
|
|
struct ceph_entity_inst inst; /* my name+address */
|
|
struct ceph_entity_addr my_enc_addr;
|
|
|
|
atomic_t stopping;
|
|
bool nocrc;
|
|
|
|
/*
|
|
* the global_seq counts connections i (attempt to) initiate
|
|
* in order to disambiguate certain connect race conditions.
|
|
*/
|
|
u32 global_seq;
|
|
spinlock_t global_seq_lock;
|
|
|
|
u32 supported_features;
|
|
u32 required_features;
|
|
};
|
|
|
|
#define ceph_msg_has_pages(m) ((m)->p.pages != NULL)
|
|
#define ceph_msg_has_pagelist(m) ((m)->l.pagelist != NULL)
|
|
#ifdef CONFIG_BLOCK
|
|
#define ceph_msg_has_bio(m) ((m)->b.bio != NULL)
|
|
#endif /* CONFIG_BLOCK */
|
|
#define ceph_msg_has_trail(m) ((m)->t.trail != NULL)
|
|
|
|
/*
|
|
* a single message. it contains a header (src, dest, message type, etc.),
|
|
* footer (crc values, mainly), a "front" message body, and possibly a
|
|
* data payload (stored in some number of pages).
|
|
*/
|
|
struct ceph_msg {
|
|
struct ceph_msg_header hdr; /* header */
|
|
struct ceph_msg_footer footer; /* footer */
|
|
struct kvec front; /* unaligned blobs of message */
|
|
struct ceph_buffer *middle;
|
|
|
|
/* data payload */
|
|
struct {
|
|
struct page **pages; /* NOT OWNER. */
|
|
size_t length; /* # data bytes in array */
|
|
unsigned int alignment; /* first page */
|
|
} p;
|
|
struct {
|
|
struct ceph_pagelist *pagelist;
|
|
} l;
|
|
#ifdef CONFIG_BLOCK
|
|
struct {
|
|
struct bio *bio_iter; /* iterator */
|
|
struct bio *bio;
|
|
unsigned int bio_seg; /* current seg in bio */
|
|
} b;
|
|
#endif /* CONFIG_BLOCK */
|
|
struct {
|
|
struct ceph_pagelist *trail; /* trailing part of data */
|
|
} t;
|
|
|
|
struct ceph_connection *con;
|
|
struct list_head list_head; /* links for connection lists */
|
|
|
|
struct kref kref;
|
|
bool front_is_vmalloc;
|
|
bool more_to_follow;
|
|
bool needs_out_seq;
|
|
int front_max;
|
|
unsigned long ack_stamp; /* tx: when we were acked */
|
|
|
|
struct ceph_msgpool *pool;
|
|
};
|
|
|
|
struct ceph_msg_pos {
|
|
int page, page_pos; /* which page; offset in page */
|
|
int data_pos; /* offset in data payload */
|
|
bool did_page_crc; /* true if we've calculated crc for current page */
|
|
};
|
|
|
|
/* ceph connection fault delay defaults, for exponential backoff */
|
|
#define BASE_DELAY_INTERVAL (HZ/2)
|
|
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
|
|
|
|
/*
|
|
* A single connection with another host.
|
|
*
|
|
* We maintain a queue of outgoing messages, and some session state to
|
|
* ensure that we can preserve the lossless, ordered delivery of
|
|
* messages in the case of a TCP disconnect.
|
|
*/
|
|
struct ceph_connection {
|
|
void *private;
|
|
|
|
const struct ceph_connection_operations *ops;
|
|
|
|
struct ceph_messenger *msgr;
|
|
|
|
atomic_t sock_state;
|
|
struct socket *sock;
|
|
struct ceph_entity_addr peer_addr; /* peer address */
|
|
struct ceph_entity_addr peer_addr_for_me;
|
|
|
|
unsigned long flags;
|
|
unsigned long state;
|
|
const char *error_msg; /* error message, if any */
|
|
|
|
struct ceph_entity_name peer_name; /* peer name */
|
|
|
|
unsigned peer_features;
|
|
u32 connect_seq; /* identify the most recent connection
|
|
attempt for this connection, client */
|
|
u32 peer_global_seq; /* peer's global seq for this connection */
|
|
|
|
int auth_retry; /* true if we need a newer authorizer */
|
|
void *auth_reply_buf; /* where to put the authorizer reply */
|
|
int auth_reply_buf_len;
|
|
|
|
struct mutex mutex;
|
|
|
|
/* out queue */
|
|
struct list_head out_queue;
|
|
struct list_head out_sent; /* sending or sent but unacked */
|
|
u64 out_seq; /* last message queued for send */
|
|
|
|
u64 in_seq, in_seq_acked; /* last message received, acked */
|
|
|
|
/* connection negotiation temps */
|
|
char in_banner[CEPH_BANNER_MAX_LEN];
|
|
struct ceph_msg_connect out_connect;
|
|
struct ceph_msg_connect_reply in_reply;
|
|
struct ceph_entity_addr actual_peer_addr;
|
|
|
|
/* message out temps */
|
|
struct ceph_msg *out_msg; /* sending message (== tail of
|
|
out_sent) */
|
|
bool out_msg_done;
|
|
struct ceph_msg_pos out_msg_pos;
|
|
|
|
struct kvec out_kvec[8], /* sending header/footer data */
|
|
*out_kvec_cur;
|
|
int out_kvec_left; /* kvec's left in out_kvec */
|
|
int out_skip; /* skip this many bytes */
|
|
int out_kvec_bytes; /* total bytes left */
|
|
bool out_kvec_is_msg; /* kvec refers to out_msg */
|
|
int out_more; /* there is more data after the kvecs */
|
|
__le64 out_temp_ack; /* for writing an ack */
|
|
|
|
/* message in temps */
|
|
struct ceph_msg_header in_hdr;
|
|
struct ceph_msg *in_msg;
|
|
struct ceph_msg_pos in_msg_pos;
|
|
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
|
|
|
|
char in_tag; /* protocol control byte */
|
|
int in_base_pos; /* bytes read */
|
|
__le64 in_temp_ack; /* for reading an ack */
|
|
|
|
struct delayed_work work; /* send|recv work */
|
|
unsigned long delay; /* current delay interval */
|
|
};
|
|
|
|
|
|
extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
|
|
extern int ceph_parse_ips(const char *c, const char *end,
|
|
struct ceph_entity_addr *addr,
|
|
int max_count, int *count);
|
|
|
|
|
|
extern int ceph_msgr_init(void);
|
|
extern void ceph_msgr_exit(void);
|
|
extern void ceph_msgr_flush(void);
|
|
|
|
extern void ceph_messenger_init(struct ceph_messenger *msgr,
|
|
struct ceph_entity_addr *myaddr,
|
|
u32 supported_features,
|
|
u32 required_features,
|
|
bool nocrc);
|
|
|
|
extern void ceph_con_init(struct ceph_connection *con, void *private,
|
|
const struct ceph_connection_operations *ops,
|
|
struct ceph_messenger *msgr);
|
|
extern void ceph_con_open(struct ceph_connection *con,
|
|
__u8 entity_type, __u64 entity_num,
|
|
struct ceph_entity_addr *addr);
|
|
extern bool ceph_con_opened(struct ceph_connection *con);
|
|
extern void ceph_con_close(struct ceph_connection *con);
|
|
extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
|
|
|
|
extern void ceph_msg_revoke(struct ceph_msg *msg);
|
|
extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
|
|
|
|
extern void ceph_con_keepalive(struct ceph_connection *con);
|
|
|
|
extern void ceph_msg_data_set_pages(struct ceph_msg *msg, struct page **pages,
|
|
size_t length, size_t alignment);
|
|
extern void ceph_msg_data_set_pagelist(struct ceph_msg *msg,
|
|
struct ceph_pagelist *pagelist);
|
|
extern void ceph_msg_data_set_bio(struct ceph_msg *msg, struct bio *bio);
|
|
extern void ceph_msg_data_set_trail(struct ceph_msg *msg,
|
|
struct ceph_pagelist *trail);
|
|
|
|
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
|
|
bool can_fail);
|
|
extern void ceph_msg_kfree(struct ceph_msg *m);
|
|
|
|
|
|
static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
|
|
{
|
|
kref_get(&msg->kref);
|
|
return msg;
|
|
}
|
|
extern void ceph_msg_last_put(struct kref *kref);
|
|
static inline void ceph_msg_put(struct ceph_msg *msg)
|
|
{
|
|
kref_put(&msg->kref, ceph_msg_last_put);
|
|
}
|
|
|
|
extern void ceph_msg_dump(struct ceph_msg *msg);
|
|
|
|
#endif
|