iscsi_iser.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /*
  2. * iSER transport for the Open iSCSI Initiator & iSER transport internals
  3. *
  4. * Copyright (C) 2004 Dmitry Yusupov
  5. * Copyright (C) 2004 Alex Aizman
  6. * Copyright (C) 2005 Mike Christie
  7. * based on code maintained by [email protected]
  8. *
  9. * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
  10. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
  11. * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  12. *
  13. * This software is available to you under a choice of one of two
  14. * licenses. You may choose to be licensed under the terms of the GNU
  15. * General Public License (GPL) Version 2, available from the file
  16. * COPYING in the main directory of this source tree, or the
  17. * OpenIB.org BSD license below:
  18. *
  19. * Redistribution and use in source and binary forms, with or
  20. * without modification, are permitted provided that the following
  21. * conditions are met:
  22. *
  23. * - Redistributions of source code must retain the above
  24. * copyright notice, this list of conditions and the following
  25. * disclaimer.
  26. *
  27. * - Redistributions in binary form must reproduce the above
  28. * copyright notice, this list of conditions and the following
  29. * disclaimer in the documentation and/or other materials
  30. * provided with the distribution.
  31. *
  32. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  33. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  34. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  35. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  36. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  37. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  38. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  39. * SOFTWARE.
  40. */
  41. #ifndef __ISCSI_ISER_H__
  42. #define __ISCSI_ISER_H__
  43. #include <linux/types.h>
  44. #include <linux/net.h>
  45. #include <linux/printk.h>
  46. #include <scsi/libiscsi.h>
  47. #include <scsi/scsi_transport_iscsi.h>
  48. #include <scsi/scsi_cmnd.h>
  49. #include <scsi/scsi_device.h>
  50. #include <scsi/iser.h>
  51. #include <linux/interrupt.h>
  52. #include <linux/wait.h>
  53. #include <linux/sched.h>
  54. #include <linux/list.h>
  55. #include <linux/slab.h>
  56. #include <linux/dma-mapping.h>
  57. #include <linux/mutex.h>
  58. #include <linux/mempool.h>
  59. #include <linux/uio.h>
  60. #include <linux/socket.h>
  61. #include <linux/in.h>
  62. #include <linux/in6.h>
  63. #include <rdma/ib_verbs.h>
  64. #include <rdma/rdma_cm.h>
  65. #define DRV_NAME "iser"
  66. #define PFX DRV_NAME ": "
  67. #define DRV_VER "1.6"
  68. #define iser_dbg(fmt, arg...) \
  69. do { \
  70. if (unlikely(iser_debug_level > 2)) \
  71. printk(KERN_DEBUG PFX "%s: " fmt,\
  72. __func__ , ## arg); \
  73. } while (0)
  74. #define iser_warn(fmt, arg...) \
  75. do { \
  76. if (unlikely(iser_debug_level > 0)) \
  77. pr_warn(PFX "%s: " fmt, \
  78. __func__ , ## arg); \
  79. } while (0)
  80. #define iser_info(fmt, arg...) \
  81. do { \
  82. if (unlikely(iser_debug_level > 1)) \
  83. pr_info(PFX "%s: " fmt, \
  84. __func__ , ## arg); \
  85. } while (0)
  86. #define iser_err(fmt, arg...) \
  87. pr_err(PFX "%s: " fmt, __func__ , ## arg)
  88. /* Default support is 512KB I/O size */
  89. #define ISER_DEF_MAX_SECTORS 1024
  90. #define ISCSI_ISER_DEF_SG_TABLESIZE \
  91. ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K))
  92. /* Maximum support is 16MB I/O size */
  93. #define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K))
  94. #define ISER_DEF_XMIT_CMDS_DEFAULT 512
  95. #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
  96. #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
  97. #else
  98. #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT
  99. #endif
  100. #define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX
  101. /* QP settings */
  102. /* Maximal bounds on received asynchronous PDUs */
  103. #define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
  104. #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
  105. * SCSI_TMFUNC(2), LOGOUT(1) */
  106. #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
  107. /* the max TX (send) WR supported by the iSER QP is defined by *
  108. * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
  109. * to have at max for SCSI command. The tx posting & completion handling code *
  110. * supports -EAGAIN scheme where tx is suspended till the QP has room for more *
  111. * send WR. D=8 comes from 64K/8K */
  112. #define ISER_INFLIGHT_DATAOUTS 8
  113. #define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
  114. (1 + ISER_INFLIGHT_DATAOUTS) + \
  115. ISER_MAX_TX_MISC_PDUS + \
  116. ISER_MAX_RX_MISC_PDUS)
  117. /* Max registration work requests per command */
  118. #define ISER_MAX_REG_WR_PER_CMD 5
  119. /* For Signature we don't support DATAOUTs so no need to make room for them */
  120. #define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
  121. (1 + ISER_MAX_REG_WR_PER_CMD) + \
  122. ISER_MAX_TX_MISC_PDUS + \
  123. ISER_MAX_RX_MISC_PDUS)
  124. #define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \
  125. - ISER_MAX_TX_MISC_PDUS \
  126. - ISER_MAX_RX_MISC_PDUS) / \
  127. (1 + ISER_INFLIGHT_DATAOUTS))
  128. /* Constant PDU lengths calculations */
  129. #define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
  130. #define ISER_RECV_DATA_SEG_LEN 128
  131. #define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
  132. #define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
  133. /* Length of an object name string */
  134. #define ISER_OBJECT_NAME_SIZE 64
  135. enum iser_conn_state {
  136. ISER_CONN_INIT, /* descriptor allocd, no conn */
  137. ISER_CONN_PENDING, /* in the process of being established */
  138. ISER_CONN_UP, /* up and running */
  139. ISER_CONN_TERMINATING, /* in the process of being terminated */
  140. ISER_CONN_DOWN, /* shut down */
  141. ISER_CONN_STATES_NUM
  142. };
  143. enum iser_task_status {
  144. ISER_TASK_STATUS_INIT = 0,
  145. ISER_TASK_STATUS_STARTED,
  146. ISER_TASK_STATUS_COMPLETED
  147. };
  148. enum iser_data_dir {
  149. ISER_DIR_IN = 0, /* to initiator */
  150. ISER_DIR_OUT, /* from initiator */
  151. ISER_DIRS_NUM
  152. };
  153. /**
  154. * struct iser_data_buf - iSER data buffer
  155. *
  156. * @sg: pointer to the sg list
  157. * @size: num entries of this sg
  158. * @data_len: total beffer byte len
  159. * @dma_nents: returned by dma_map_sg
  160. */
  161. struct iser_data_buf {
  162. struct scatterlist *sg;
  163. int size;
  164. unsigned long data_len;
  165. int dma_nents;
  166. };
  167. /* fwd declarations */
  168. struct iser_device;
  169. struct iscsi_iser_task;
  170. struct iscsi_endpoint;
  171. struct iser_reg_resources;
  172. /**
  173. * struct iser_mem_reg - iSER memory registration info
  174. *
  175. * @sge: memory region sg element
  176. * @rkey: memory region remote key
  177. * @desc: pointer to fast registration context
  178. */
  179. struct iser_mem_reg {
  180. struct ib_sge sge;
  181. u32 rkey;
  182. struct iser_fr_desc *desc;
  183. };
  184. enum iser_desc_type {
  185. ISCSI_TX_CONTROL ,
  186. ISCSI_TX_SCSI_COMMAND,
  187. ISCSI_TX_DATAOUT
  188. };
  189. /**
  190. * struct iser_tx_desc - iSER TX descriptor
  191. *
  192. * @iser_header: iser header
  193. * @iscsi_header: iscsi header
  194. * @type: command/control/dataout
  195. * @dma_addr: header buffer dma_address
  196. * @tx_sg: sg[0] points to iser/iscsi headers
  197. * sg[1] optionally points to either of immediate data
  198. * unsolicited data-out or control
  199. * @num_sge: number sges used on this TX task
  200. * @cqe: completion handler
  201. * @mapped: Is the task header mapped
  202. * @reg_wr: registration WR
  203. * @send_wr: send WR
  204. * @inv_wr: invalidate WR
  205. */
  206. struct iser_tx_desc {
  207. struct iser_ctrl iser_header;
  208. struct iscsi_hdr iscsi_header;
  209. enum iser_desc_type type;
  210. u64 dma_addr;
  211. struct ib_sge tx_sg[2];
  212. int num_sge;
  213. struct ib_cqe cqe;
  214. bool mapped;
  215. struct ib_reg_wr reg_wr;
  216. struct ib_send_wr send_wr;
  217. struct ib_send_wr inv_wr;
  218. };
  219. #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
  220. sizeof(u64) + sizeof(struct ib_sge) + \
  221. sizeof(struct ib_cqe)))
  222. /**
  223. * struct iser_rx_desc - iSER RX descriptor
  224. *
  225. * @iser_header: iser header
  226. * @iscsi_header: iscsi header
  227. * @data: received data segment
  228. * @dma_addr: receive buffer dma address
  229. * @rx_sg: ib_sge of receive buffer
  230. * @cqe: completion handler
  231. * @pad: for sense data TODO: Modify to maximum sense length supported
  232. */
  233. struct iser_rx_desc {
  234. struct iser_ctrl iser_header;
  235. struct iscsi_hdr iscsi_header;
  236. char data[ISER_RECV_DATA_SEG_LEN];
  237. u64 dma_addr;
  238. struct ib_sge rx_sg;
  239. struct ib_cqe cqe;
  240. char pad[ISER_RX_PAD_SIZE];
  241. } __packed;
  242. /**
  243. * struct iser_login_desc - iSER login descriptor
  244. *
  245. * @req: pointer to login request buffer
  246. * @rsp: pointer to login response buffer
  247. * @req_dma: DMA address of login request buffer
  248. * @rsp_dma: DMA address of login response buffer
  249. * @sge: IB sge for login post recv
  250. * @cqe: completion handler
  251. */
  252. struct iser_login_desc {
  253. void *req;
  254. void *rsp;
  255. u64 req_dma;
  256. u64 rsp_dma;
  257. struct ib_sge sge;
  258. struct ib_cqe cqe;
  259. } __packed;
  260. struct iser_conn;
  261. struct ib_conn;
  262. /**
  263. * struct iser_device - iSER device handle
  264. *
  265. * @ib_device: RDMA device
  266. * @pd: Protection Domain for this device
  267. * @mr: Global DMA memory region
  268. * @event_handler: IB events handle routine
  269. * @ig_list: entry in devices list
  270. * @refcount: Reference counter, dominated by open iser connections
  271. */
  272. struct iser_device {
  273. struct ib_device *ib_device;
  274. struct ib_pd *pd;
  275. struct ib_event_handler event_handler;
  276. struct list_head ig_list;
  277. int refcount;
  278. };
  279. /**
  280. * struct iser_reg_resources - Fast registration resources
  281. *
  282. * @mr: memory region
  283. * @sig_mr: signature memory region
  284. * @mr_valid: is mr valid indicator
  285. */
  286. struct iser_reg_resources {
  287. struct ib_mr *mr;
  288. struct ib_mr *sig_mr;
  289. u8 mr_valid:1;
  290. };
  291. /**
  292. * struct iser_fr_desc - Fast registration descriptor
  293. *
  294. * @list: entry in connection fastreg pool
  295. * @rsc: data buffer registration resources
  296. * @sig_protected: is region protected indicator
  297. * @all_list: first and last list members
  298. */
  299. struct iser_fr_desc {
  300. struct list_head list;
  301. struct iser_reg_resources rsc;
  302. bool sig_protected;
  303. struct list_head all_list;
  304. };
  305. /**
  306. * struct iser_fr_pool - connection fast registration pool
  307. *
  308. * @list: list of fastreg descriptors
  309. * @lock: protects fastreg pool
  310. * @size: size of the pool
  311. * @all_list: first and last list members
  312. */
  313. struct iser_fr_pool {
  314. struct list_head list;
  315. spinlock_t lock;
  316. int size;
  317. struct list_head all_list;
  318. };
  319. /**
  320. * struct ib_conn - Infiniband related objects
  321. *
  322. * @cma_id: rdma_cm connection maneger handle
  323. * @qp: Connection Queue-pair
  324. * @cq: Connection completion queue
  325. * @cq_size: The number of max outstanding completions
  326. * @device: reference to iser device
  327. * @fr_pool: connection fast registration pool
  328. * @pi_support: Indicate device T10-PI support
  329. * @reg_cqe: completion handler
  330. */
  331. struct ib_conn {
  332. struct rdma_cm_id *cma_id;
  333. struct ib_qp *qp;
  334. struct ib_cq *cq;
  335. u32 cq_size;
  336. struct iser_device *device;
  337. struct iser_fr_pool fr_pool;
  338. bool pi_support;
  339. struct ib_cqe reg_cqe;
  340. };
  341. /**
  342. * struct iser_conn - iSER connection context
  343. *
  344. * @ib_conn: connection RDMA resources
  345. * @iscsi_conn: link to matching iscsi connection
  346. * @ep: transport handle
  347. * @state: connection logical state
  348. * @qp_max_recv_dtos: maximum number of data outs, corresponds
  349. * to max number of post recvs
  350. * @max_cmds: maximum cmds allowed for this connection
  351. * @name: connection peer portal
  352. * @release_work: deffered work for release job
  353. * @state_mutex: protects iser onnection state
  354. * @stop_completion: conn_stop completion
  355. * @ib_completion: RDMA cleanup completion
  356. * @up_completion: connection establishment completed
  357. * (state is ISER_CONN_UP)
  358. * @conn_list: entry in ig conn list
  359. * @login_desc: login descriptor
  360. * @rx_descs: rx buffers array (cyclic buffer)
  361. * @num_rx_descs: number of rx descriptors
  362. * @scsi_sg_tablesize: scsi host sg_tablesize
  363. * @pages_per_mr: maximum pages available for registration
  364. * @snd_w_inv: connection uses remote invalidation
  365. */
  366. struct iser_conn {
  367. struct ib_conn ib_conn;
  368. struct iscsi_conn *iscsi_conn;
  369. struct iscsi_endpoint *ep;
  370. enum iser_conn_state state;
  371. unsigned qp_max_recv_dtos;
  372. u16 max_cmds;
  373. char name[ISER_OBJECT_NAME_SIZE];
  374. struct work_struct release_work;
  375. struct mutex state_mutex;
  376. struct completion stop_completion;
  377. struct completion ib_completion;
  378. struct completion up_completion;
  379. struct list_head conn_list;
  380. struct iser_login_desc login_desc;
  381. struct iser_rx_desc *rx_descs;
  382. u32 num_rx_descs;
  383. unsigned short scsi_sg_tablesize;
  384. unsigned short pages_per_mr;
  385. bool snd_w_inv;
  386. };
  387. /**
  388. * struct iscsi_iser_task - iser task context
  389. *
  390. * @desc: TX descriptor
  391. * @iser_conn: link to iser connection
  392. * @status: current task status
  393. * @sc: link to scsi command
  394. * @command_sent: indicate if command was sent
  395. * @dir: iser data direction
  396. * @rdma_reg: task rdma registration desc
  397. * @data: iser data buffer desc
  398. * @prot: iser protection buffer desc
  399. */
  400. struct iscsi_iser_task {
  401. struct iser_tx_desc desc;
  402. struct iser_conn *iser_conn;
  403. enum iser_task_status status;
  404. struct scsi_cmnd *sc;
  405. int command_sent;
  406. int dir[ISER_DIRS_NUM];
  407. struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
  408. struct iser_data_buf data[ISER_DIRS_NUM];
  409. struct iser_data_buf prot[ISER_DIRS_NUM];
  410. };
  411. /**
  412. * struct iser_global - iSER global context
  413. *
  414. * @device_list_mutex: protects device_list
  415. * @device_list: iser devices global list
  416. * @connlist_mutex: protects connlist
  417. * @connlist: iser connections global list
  418. * @desc_cache: kmem cache for tx dataout
  419. */
  420. struct iser_global {
  421. struct mutex device_list_mutex;
  422. struct list_head device_list;
  423. struct mutex connlist_mutex;
  424. struct list_head connlist;
  425. struct kmem_cache *desc_cache;
  426. };
  427. extern struct iser_global ig;
  428. extern int iser_debug_level;
  429. extern bool iser_pi_enable;
  430. extern unsigned int iser_max_sectors;
  431. extern bool iser_always_reg;
  432. int iser_send_control(struct iscsi_conn *conn,
  433. struct iscsi_task *task);
  434. int iser_send_command(struct iscsi_conn *conn,
  435. struct iscsi_task *task);
  436. int iser_send_data_out(struct iscsi_conn *conn,
  437. struct iscsi_task *task,
  438. struct iscsi_data *hdr);
  439. void iscsi_iser_recv(struct iscsi_conn *conn,
  440. struct iscsi_hdr *hdr,
  441. char *rx_data,
  442. int rx_data_len);
  443. void iser_conn_init(struct iser_conn *iser_conn);
  444. void iser_conn_release(struct iser_conn *iser_conn);
  445. int iser_conn_terminate(struct iser_conn *iser_conn);
  446. void iser_release_work(struct work_struct *work);
  447. void iser_err_comp(struct ib_wc *wc, const char *type);
  448. void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
  449. void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
  450. void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
  451. void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
  452. void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
  453. void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
  454. void iser_task_rdma_init(struct iscsi_iser_task *task);
  455. void iser_task_rdma_finalize(struct iscsi_iser_task *task);
  456. void iser_free_rx_descriptors(struct iser_conn *iser_conn);
  457. void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
  458. struct iser_data_buf *mem,
  459. enum iser_data_dir cmd_dir);
  460. int iser_reg_mem_fastreg(struct iscsi_iser_task *task,
  461. enum iser_data_dir dir,
  462. bool all_imm);
  463. void iser_unreg_mem_fastreg(struct iscsi_iser_task *task,
  464. enum iser_data_dir dir);
  465. int iser_connect(struct iser_conn *iser_conn,
  466. struct sockaddr *src_addr,
  467. struct sockaddr *dst_addr,
  468. int non_blocking);
  469. int iser_post_recvl(struct iser_conn *iser_conn);
  470. int iser_post_recvm(struct iser_conn *iser_conn,
  471. struct iser_rx_desc *rx_desc);
  472. int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc);
  473. int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
  474. enum iser_data_dir iser_dir,
  475. enum dma_data_direction dma_dir);
  476. void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
  477. enum iser_data_dir iser_dir,
  478. enum dma_data_direction dma_dir);
  479. int iser_initialize_task_headers(struct iscsi_task *task,
  480. struct iser_tx_desc *tx_desc);
  481. int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
  482. struct iscsi_session *session);
  483. int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
  484. unsigned cmds_max,
  485. unsigned int size);
  486. void iser_free_fastreg_pool(struct ib_conn *ib_conn);
  487. u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
  488. enum iser_data_dir cmd_dir, sector_t *sector);
  489. static inline struct iser_conn *
  490. to_iser_conn(struct ib_conn *ib_conn)
  491. {
  492. return container_of(ib_conn, struct iser_conn, ib_conn);
  493. }
  494. static inline struct iser_rx_desc *
  495. iser_rx(struct ib_cqe *cqe)
  496. {
  497. return container_of(cqe, struct iser_rx_desc, cqe);
  498. }
  499. static inline struct iser_tx_desc *
  500. iser_tx(struct ib_cqe *cqe)
  501. {
  502. return container_of(cqe, struct iser_tx_desc, cqe);
  503. }
  504. static inline struct iser_login_desc *
  505. iser_login(struct ib_cqe *cqe)
  506. {
  507. return container_of(cqe, struct iser_login_desc, cqe);
  508. }
  509. #endif