123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409 |
- /* SPDX-License-Identifier: GPL-2.0-or-later */
- /*
- * RDMA Transport Layer
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
- #ifndef RTRS_PRI_H
- #define RTRS_PRI_H
- #include <linux/uuid.h>
- #include <rdma/rdma_cm.h>
- #include <rdma/ib_verbs.h>
- #include <rdma/ib.h>
- #include "rtrs.h"
- #define RTRS_PROTO_VER_MAJOR 2
- #define RTRS_PROTO_VER_MINOR 0
- #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
- __stringify(RTRS_PROTO_VER_MINOR)
- /*
- * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
- * and the minimum chunk size is 4096 (2^12).
- * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
- * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
- * Therefore the pratical max value of sess_queue_depth is
- * somewhere between 1 and 65535 and it depends on the system.
- */
- #define MAX_SESS_QUEUE_DEPTH 65535
- enum rtrs_imm_const {
- MAX_IMM_TYPE_BITS = 4,
- MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
- MAX_IMM_PAYL_BITS = 28,
- MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
- };
- enum rtrs_imm_type {
- RTRS_IO_REQ_IMM = 0, /* client to server */
- RTRS_IO_RSP_IMM = 1, /* server to client */
- RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
- RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
- RTRS_HB_ACK_IMM = 9,
- RTRS_LAST_IMM,
- };
- enum {
- SERVICE_CON_QUEUE_DEPTH = 512,
- MAX_PATHS_NUM = 128,
- MIN_CHUNK_SIZE = 8192,
- RTRS_HB_INTERVAL_MS = 5000,
- RTRS_HB_MISSED_MAX = 5,
- RTRS_MAGIC = 0x1BBD,
- RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
- };
- struct rtrs_ib_dev;
- struct rtrs_rdma_dev_pd_ops {
- struct rtrs_ib_dev *(*alloc)(void);
- void (*free)(struct rtrs_ib_dev *dev);
- int (*init)(struct rtrs_ib_dev *dev);
- void (*deinit)(struct rtrs_ib_dev *dev);
- };
- struct rtrs_rdma_dev_pd {
- struct mutex mutex;
- struct list_head list;
- enum ib_pd_flags pd_flags;
- const struct rtrs_rdma_dev_pd_ops *ops;
- };
- struct rtrs_ib_dev {
- struct ib_device *ib_dev;
- struct ib_pd *ib_pd;
- struct kref ref;
- struct list_head entry;
- struct rtrs_rdma_dev_pd *pool;
- };
- struct rtrs_con {
- struct rtrs_path *path;
- struct ib_qp *qp;
- struct ib_cq *cq;
- struct rdma_cm_id *cm_id;
- unsigned int cid;
- int nr_cqe;
- atomic_t wr_cnt;
- atomic_t sq_wr_avail;
- };
- struct rtrs_path {
- struct list_head entry;
- struct sockaddr_storage dst_addr;
- struct sockaddr_storage src_addr;
- char sessname[NAME_MAX];
- uuid_t uuid;
- struct rtrs_con **con;
- unsigned int con_num;
- unsigned int irq_con_num;
- unsigned int recon_cnt;
- unsigned int signal_interval;
- struct rtrs_ib_dev *dev;
- int dev_ref;
- struct ib_cqe *hb_cqe;
- void (*hb_err_handler)(struct rtrs_con *con);
- struct workqueue_struct *hb_wq;
- struct delayed_work hb_dwork;
- unsigned int hb_interval_ms;
- unsigned int hb_missed_cnt;
- unsigned int hb_missed_max;
- ktime_t hb_last_sent;
- ktime_t hb_cur_latency;
- };
- /* rtrs information unit */
- struct rtrs_iu {
- struct ib_cqe cqe;
- dma_addr_t dma_addr;
- void *buf;
- size_t size;
- enum dma_data_direction direction;
- };
- /**
- * enum rtrs_msg_types - RTRS message types, see also rtrs/README
- * @RTRS_MSG_INFO_REQ: Client additional info request to the server
- * @RTRS_MSG_INFO_RSP: Server additional info response to the client
- * @RTRS_MSG_WRITE: Client writes data per RDMA to server
- * @RTRS_MSG_READ: Client requests data transfer from server
- * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
- */
- enum rtrs_msg_types {
- RTRS_MSG_INFO_REQ,
- RTRS_MSG_INFO_RSP,
- RTRS_MSG_WRITE,
- RTRS_MSG_READ,
- RTRS_MSG_RKEY_RSP,
- };
- /**
- * enum rtrs_msg_flags - RTRS message flags.
- * @RTRS_NEED_INVAL: Send invalidation in response.
- * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
- */
- enum rtrs_msg_flags {
- RTRS_MSG_NEED_INVAL_F = 1 << 0,
- RTRS_MSG_NEW_RKEY_F = 1 << 1,
- };
- /**
- * struct rtrs_sg_desc - RDMA-Buffer entry description
- * @addr: Address of RDMA destination buffer
- * @key: Authorization rkey to write to the buffer
- * @len: Size of the buffer
- */
- struct rtrs_sg_desc {
- __le64 addr;
- __le32 key;
- __le32 len;
- };
- /**
- * struct rtrs_msg_conn_req - Client connection request to the server
- * @magic: RTRS magic
- * @version: RTRS protocol version
- * @cid: Current connection id
- * @cid_num: Number of connections per session
- * @recon_cnt: Reconnections counter
- * @sess_uuid: UUID of a session (path)
- * @paths_uuid: UUID of a group of sessions (paths)
- *
- * NOTE: max size 56 bytes, see man rdma_connect().
- */
- struct rtrs_msg_conn_req {
- /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
- * see https://www.spinics.net/lists/linux-rdma/msg22397.html
- */
- u8 __cma_version;
- /* On sender side that should be set to 0, or cma_save_ip_info()
- * extract garbage and will fail.
- */
- u8 __ip_version;
- __le16 magic;
- __le16 version;
- __le16 cid;
- __le16 cid_num;
- __le16 recon_cnt;
- uuid_t sess_uuid;
- uuid_t paths_uuid;
- u8 first_conn : 1;
- u8 reserved_bits : 7;
- u8 reserved[11];
- };
- /**
- * struct rtrs_msg_conn_rsp - Server connection response to the client
- * @magic: RTRS magic
- * @version: RTRS protocol version
- * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
- * @queue_depth: max inflight messages (queue-depth) in this session
- * @max_io_size: max io size server supports
- * @max_hdr_size: max msg header size server supports
- *
- * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
- */
- struct rtrs_msg_conn_rsp {
- __le16 magic;
- __le16 version;
- __le16 errno;
- __le16 queue_depth;
- __le32 max_io_size;
- __le32 max_hdr_size;
- __le32 flags;
- u8 reserved[36];
- };
- /**
- * struct rtrs_msg_info_req
- * @type: @RTRS_MSG_INFO_REQ
- * @pathname: Path name chosen by client
- */
- struct rtrs_msg_info_req {
- __le16 type;
- u8 pathname[NAME_MAX];
- u8 reserved[15];
- };
- /**
- * struct rtrs_msg_info_rsp
- * @type: @RTRS_MSG_INFO_RSP
- * @sg_cnt: Number of @desc entries
- * @desc: RDMA buffers where the client can write to server
- */
- struct rtrs_msg_info_rsp {
- __le16 type;
- __le16 sg_cnt;
- u8 reserved[4];
- struct rtrs_sg_desc desc[];
- };
- /**
- * struct rtrs_msg_rkey_rsp
- * @type: @RTRS_MSG_RKEY_RSP
- * @buf_id: RDMA buf_id of the new rkey
- * @rkey: new remote key for RDMA buffers id from server
- */
- struct rtrs_msg_rkey_rsp {
- __le16 type;
- __le16 buf_id;
- __le32 rkey;
- };
- /**
- * struct rtrs_msg_rdma_read - RDMA data transfer request from client
- * @type: always @RTRS_MSG_READ
- * @usr_len: length of user payload
- * @sg_cnt: number of @desc entries
- * @desc: RDMA buffers where the server can write the result to
- */
- struct rtrs_msg_rdma_read {
- __le16 type;
- __le16 usr_len;
- __le16 flags;
- __le16 sg_cnt;
- struct rtrs_sg_desc desc[];
- };
- /**
- * struct_msg_rdma_write - Message transferred to server with RDMA-Write
- * @type: always @RTRS_MSG_WRITE
- * @usr_len: length of user payload
- */
- struct rtrs_msg_rdma_write {
- __le16 type;
- __le16 usr_len;
- };
- /**
- * struct_msg_rdma_hdr - header for read or write request
- * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
- */
- struct rtrs_msg_rdma_hdr {
- __le16 type;
- };
- /* rtrs.c */
- struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
- struct ib_device *dev, enum dma_data_direction,
- void (*done)(struct ib_cq *cq, struct ib_wc *wc));
- void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
- int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
- int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
- struct ib_send_wr *head);
- int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
- struct ib_sge *sge, unsigned int num_sge,
- u32 rkey, u64 rdma_addr, u32 imm_data,
- enum ib_send_flags flags,
- struct ib_send_wr *head,
- struct ib_send_wr *tail);
- int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
- int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
- u32 max_send_sge, int cq_vector, int nr_cqe,
- u32 max_send_wr, u32 max_recv_wr,
- enum ib_poll_context poll_ctx);
- void rtrs_cq_qp_destroy(struct rtrs_con *con);
- void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
- unsigned int interval_ms, unsigned int missed_max,
- void (*err_handler)(struct rtrs_con *con),
- struct workqueue_struct *wq);
- void rtrs_start_hb(struct rtrs_path *path);
- void rtrs_stop_hb(struct rtrs_path *path);
- void rtrs_send_hb_ack(struct rtrs_path *path);
- void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
- struct rtrs_rdma_dev_pd *pool);
- void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
- struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
- struct rtrs_rdma_dev_pd *pool);
- int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
- static inline u32 rtrs_to_imm(u32 type, u32 payload)
- {
- BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
- BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
- return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
- (payload & MAX_IMM_PAYL_MASK);
- }
- static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
- {
- *payload = imm & MAX_IMM_PAYL_MASK;
- *type = imm >> MAX_IMM_PAYL_BITS;
- }
- static inline u32 rtrs_to_io_req_imm(u32 addr)
- {
- return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
- }
- static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
- {
- enum rtrs_imm_type type;
- u32 payload;
- /* 9 bits for errno, 19 bits for msg_id */
- payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
- type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
- return rtrs_to_imm(type, payload);
- }
- static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
- {
- /* 9 bits for errno, 19 bits for msg_id */
- *msg_id = payload & 0x7ffff;
- *errno = -(int)((payload >> 19) & 0x1ff);
- }
- #define STAT_STORE_FUNC(type, set_value, reset) \
- static ssize_t set_value##_store(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- const char *buf, size_t count) \
- { \
- int ret = -EINVAL; \
- type *stats = container_of(kobj, type, kobj_stats); \
- \
- if (sysfs_streq(buf, "1")) \
- ret = reset(stats, true); \
- else if (sysfs_streq(buf, "0")) \
- ret = reset(stats, false); \
- if (ret) \
- return ret; \
- \
- return count; \
- }
- #define STAT_SHOW_FUNC(type, get_value, print) \
- static ssize_t get_value##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
- { \
- type *stats = container_of(kobj, type, kobj_stats); \
- \
- return print(stats, page); \
- }
- #define STAT_ATTR(type, stat, print, reset) \
- STAT_STORE_FUNC(type, stat, reset) \
- STAT_SHOW_FUNC(type, stat, print) \
- static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
- #endif /* RTRS_PRI_H */
|