rtrs-pri.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * RDMA Transport Layer
  4. *
  5. * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
  6. * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
  7. * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
  8. */
  9. #ifndef RTRS_PRI_H
  10. #define RTRS_PRI_H
  11. #include <linux/uuid.h>
  12. #include <rdma/rdma_cm.h>
  13. #include <rdma/ib_verbs.h>
  14. #include <rdma/ib.h>
  15. #include "rtrs.h"
  16. #define RTRS_PROTO_VER_MAJOR 2
  17. #define RTRS_PROTO_VER_MINOR 0
  18. #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
  19. __stringify(RTRS_PROTO_VER_MINOR)
  20. /*
  21. * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
  22. * and the minimum chunk size is 4096 (2^12).
  23. * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
  24. * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
  25. * Therefore the pratical max value of sess_queue_depth is
  26. * somewhere between 1 and 65535 and it depends on the system.
  27. */
  28. #define MAX_SESS_QUEUE_DEPTH 65535
  29. enum rtrs_imm_const {
  30. MAX_IMM_TYPE_BITS = 4,
  31. MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
  32. MAX_IMM_PAYL_BITS = 28,
  33. MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
  34. };
  35. enum rtrs_imm_type {
  36. RTRS_IO_REQ_IMM = 0, /* client to server */
  37. RTRS_IO_RSP_IMM = 1, /* server to client */
  38. RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
  39. RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
  40. RTRS_HB_ACK_IMM = 9,
  41. RTRS_LAST_IMM,
  42. };
  43. enum {
  44. SERVICE_CON_QUEUE_DEPTH = 512,
  45. MAX_PATHS_NUM = 128,
  46. MIN_CHUNK_SIZE = 8192,
  47. RTRS_HB_INTERVAL_MS = 5000,
  48. RTRS_HB_MISSED_MAX = 5,
  49. RTRS_MAGIC = 0x1BBD,
  50. RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
  51. };
  52. struct rtrs_ib_dev;
  53. struct rtrs_rdma_dev_pd_ops {
  54. struct rtrs_ib_dev *(*alloc)(void);
  55. void (*free)(struct rtrs_ib_dev *dev);
  56. int (*init)(struct rtrs_ib_dev *dev);
  57. void (*deinit)(struct rtrs_ib_dev *dev);
  58. };
  59. struct rtrs_rdma_dev_pd {
  60. struct mutex mutex;
  61. struct list_head list;
  62. enum ib_pd_flags pd_flags;
  63. const struct rtrs_rdma_dev_pd_ops *ops;
  64. };
  65. struct rtrs_ib_dev {
  66. struct ib_device *ib_dev;
  67. struct ib_pd *ib_pd;
  68. struct kref ref;
  69. struct list_head entry;
  70. struct rtrs_rdma_dev_pd *pool;
  71. };
  72. struct rtrs_con {
  73. struct rtrs_path *path;
  74. struct ib_qp *qp;
  75. struct ib_cq *cq;
  76. struct rdma_cm_id *cm_id;
  77. unsigned int cid;
  78. int nr_cqe;
  79. atomic_t wr_cnt;
  80. atomic_t sq_wr_avail;
  81. };
  82. struct rtrs_path {
  83. struct list_head entry;
  84. struct sockaddr_storage dst_addr;
  85. struct sockaddr_storage src_addr;
  86. char sessname[NAME_MAX];
  87. uuid_t uuid;
  88. struct rtrs_con **con;
  89. unsigned int con_num;
  90. unsigned int irq_con_num;
  91. unsigned int recon_cnt;
  92. unsigned int signal_interval;
  93. struct rtrs_ib_dev *dev;
  94. int dev_ref;
  95. struct ib_cqe *hb_cqe;
  96. void (*hb_err_handler)(struct rtrs_con *con);
  97. struct workqueue_struct *hb_wq;
  98. struct delayed_work hb_dwork;
  99. unsigned int hb_interval_ms;
  100. unsigned int hb_missed_cnt;
  101. unsigned int hb_missed_max;
  102. ktime_t hb_last_sent;
  103. ktime_t hb_cur_latency;
  104. };
  105. /* rtrs information unit */
  106. struct rtrs_iu {
  107. struct ib_cqe cqe;
  108. dma_addr_t dma_addr;
  109. void *buf;
  110. size_t size;
  111. enum dma_data_direction direction;
  112. };
  113. /**
  114. * enum rtrs_msg_types - RTRS message types, see also rtrs/README
  115. * @RTRS_MSG_INFO_REQ: Client additional info request to the server
  116. * @RTRS_MSG_INFO_RSP: Server additional info response to the client
  117. * @RTRS_MSG_WRITE: Client writes data per RDMA to server
  118. * @RTRS_MSG_READ: Client requests data transfer from server
  119. * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
  120. */
  121. enum rtrs_msg_types {
  122. RTRS_MSG_INFO_REQ,
  123. RTRS_MSG_INFO_RSP,
  124. RTRS_MSG_WRITE,
  125. RTRS_MSG_READ,
  126. RTRS_MSG_RKEY_RSP,
  127. };
  128. /**
  129. * enum rtrs_msg_flags - RTRS message flags.
  130. * @RTRS_NEED_INVAL: Send invalidation in response.
  131. * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
  132. */
  133. enum rtrs_msg_flags {
  134. RTRS_MSG_NEED_INVAL_F = 1 << 0,
  135. RTRS_MSG_NEW_RKEY_F = 1 << 1,
  136. };
  137. /**
  138. * struct rtrs_sg_desc - RDMA-Buffer entry description
  139. * @addr: Address of RDMA destination buffer
  140. * @key: Authorization rkey to write to the buffer
  141. * @len: Size of the buffer
  142. */
  143. struct rtrs_sg_desc {
  144. __le64 addr;
  145. __le32 key;
  146. __le32 len;
  147. };
  148. /**
  149. * struct rtrs_msg_conn_req - Client connection request to the server
  150. * @magic: RTRS magic
  151. * @version: RTRS protocol version
  152. * @cid: Current connection id
  153. * @cid_num: Number of connections per session
  154. * @recon_cnt: Reconnections counter
  155. * @sess_uuid: UUID of a session (path)
  156. * @paths_uuid: UUID of a group of sessions (paths)
  157. *
  158. * NOTE: max size 56 bytes, see man rdma_connect().
  159. */
  160. struct rtrs_msg_conn_req {
  161. /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
  162. * see https://www.spinics.net/lists/linux-rdma/msg22397.html
  163. */
  164. u8 __cma_version;
  165. /* On sender side that should be set to 0, or cma_save_ip_info()
  166. * extract garbage and will fail.
  167. */
  168. u8 __ip_version;
  169. __le16 magic;
  170. __le16 version;
  171. __le16 cid;
  172. __le16 cid_num;
  173. __le16 recon_cnt;
  174. uuid_t sess_uuid;
  175. uuid_t paths_uuid;
  176. u8 first_conn : 1;
  177. u8 reserved_bits : 7;
  178. u8 reserved[11];
  179. };
  180. /**
  181. * struct rtrs_msg_conn_rsp - Server connection response to the client
  182. * @magic: RTRS magic
  183. * @version: RTRS protocol version
  184. * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
  185. * @queue_depth: max inflight messages (queue-depth) in this session
  186. * @max_io_size: max io size server supports
  187. * @max_hdr_size: max msg header size server supports
  188. *
  189. * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
  190. */
  191. struct rtrs_msg_conn_rsp {
  192. __le16 magic;
  193. __le16 version;
  194. __le16 errno;
  195. __le16 queue_depth;
  196. __le32 max_io_size;
  197. __le32 max_hdr_size;
  198. __le32 flags;
  199. u8 reserved[36];
  200. };
  201. /**
  202. * struct rtrs_msg_info_req
  203. * @type: @RTRS_MSG_INFO_REQ
  204. * @pathname: Path name chosen by client
  205. */
  206. struct rtrs_msg_info_req {
  207. __le16 type;
  208. u8 pathname[NAME_MAX];
  209. u8 reserved[15];
  210. };
  211. /**
  212. * struct rtrs_msg_info_rsp
  213. * @type: @RTRS_MSG_INFO_RSP
  214. * @sg_cnt: Number of @desc entries
  215. * @desc: RDMA buffers where the client can write to server
  216. */
  217. struct rtrs_msg_info_rsp {
  218. __le16 type;
  219. __le16 sg_cnt;
  220. u8 reserved[4];
  221. struct rtrs_sg_desc desc[];
  222. };
  223. /**
  224. * struct rtrs_msg_rkey_rsp
  225. * @type: @RTRS_MSG_RKEY_RSP
  226. * @buf_id: RDMA buf_id of the new rkey
  227. * @rkey: new remote key for RDMA buffers id from server
  228. */
  229. struct rtrs_msg_rkey_rsp {
  230. __le16 type;
  231. __le16 buf_id;
  232. __le32 rkey;
  233. };
  234. /**
  235. * struct rtrs_msg_rdma_read - RDMA data transfer request from client
  236. * @type: always @RTRS_MSG_READ
  237. * @usr_len: length of user payload
  238. * @sg_cnt: number of @desc entries
  239. * @desc: RDMA buffers where the server can write the result to
  240. */
  241. struct rtrs_msg_rdma_read {
  242. __le16 type;
  243. __le16 usr_len;
  244. __le16 flags;
  245. __le16 sg_cnt;
  246. struct rtrs_sg_desc desc[];
  247. };
  248. /**
  249. * struct_msg_rdma_write - Message transferred to server with RDMA-Write
  250. * @type: always @RTRS_MSG_WRITE
  251. * @usr_len: length of user payload
  252. */
  253. struct rtrs_msg_rdma_write {
  254. __le16 type;
  255. __le16 usr_len;
  256. };
  257. /**
  258. * struct_msg_rdma_hdr - header for read or write request
  259. * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
  260. */
  261. struct rtrs_msg_rdma_hdr {
  262. __le16 type;
  263. };
  264. /* rtrs.c */
  265. struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
  266. struct ib_device *dev, enum dma_data_direction,
  267. void (*done)(struct ib_cq *cq, struct ib_wc *wc));
  268. void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
  269. int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
  270. int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
  271. struct ib_send_wr *head);
  272. int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
  273. struct ib_sge *sge, unsigned int num_sge,
  274. u32 rkey, u64 rdma_addr, u32 imm_data,
  275. enum ib_send_flags flags,
  276. struct ib_send_wr *head,
  277. struct ib_send_wr *tail);
  278. int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
  279. int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
  280. u32 max_send_sge, int cq_vector, int nr_cqe,
  281. u32 max_send_wr, u32 max_recv_wr,
  282. enum ib_poll_context poll_ctx);
  283. void rtrs_cq_qp_destroy(struct rtrs_con *con);
  284. void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
  285. unsigned int interval_ms, unsigned int missed_max,
  286. void (*err_handler)(struct rtrs_con *con),
  287. struct workqueue_struct *wq);
  288. void rtrs_start_hb(struct rtrs_path *path);
  289. void rtrs_stop_hb(struct rtrs_path *path);
  290. void rtrs_send_hb_ack(struct rtrs_path *path);
  291. void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
  292. struct rtrs_rdma_dev_pd *pool);
  293. void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
  294. struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
  295. struct rtrs_rdma_dev_pd *pool);
  296. int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
  297. static inline u32 rtrs_to_imm(u32 type, u32 payload)
  298. {
  299. BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
  300. BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
  301. return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
  302. (payload & MAX_IMM_PAYL_MASK);
  303. }
  304. static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
  305. {
  306. *payload = imm & MAX_IMM_PAYL_MASK;
  307. *type = imm >> MAX_IMM_PAYL_BITS;
  308. }
  309. static inline u32 rtrs_to_io_req_imm(u32 addr)
  310. {
  311. return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
  312. }
  313. static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
  314. {
  315. enum rtrs_imm_type type;
  316. u32 payload;
  317. /* 9 bits for errno, 19 bits for msg_id */
  318. payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
  319. type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
  320. return rtrs_to_imm(type, payload);
  321. }
  322. static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
  323. {
  324. /* 9 bits for errno, 19 bits for msg_id */
  325. *msg_id = payload & 0x7ffff;
  326. *errno = -(int)((payload >> 19) & 0x1ff);
  327. }
  328. #define STAT_STORE_FUNC(type, set_value, reset) \
  329. static ssize_t set_value##_store(struct kobject *kobj, \
  330. struct kobj_attribute *attr, \
  331. const char *buf, size_t count) \
  332. { \
  333. int ret = -EINVAL; \
  334. type *stats = container_of(kobj, type, kobj_stats); \
  335. \
  336. if (sysfs_streq(buf, "1")) \
  337. ret = reset(stats, true); \
  338. else if (sysfs_streq(buf, "0")) \
  339. ret = reset(stats, false); \
  340. if (ret) \
  341. return ret; \
  342. \
  343. return count; \
  344. }
  345. #define STAT_SHOW_FUNC(type, get_value, print) \
  346. static ssize_t get_value##_show(struct kobject *kobj, \
  347. struct kobj_attribute *attr, \
  348. char *page) \
  349. { \
  350. type *stats = container_of(kobj, type, kobj_stats); \
  351. \
  352. return print(stats, page); \
  353. }
  354. #define STAT_ATTR(type, stat, print, reset) \
  355. STAT_STORE_FUNC(type, stat, reset) \
  356. STAT_SHOW_FUNC(type, stat, print) \
  357. static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
  358. #endif /* RTRS_PRI_H */