siw_qp.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347
  1. // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
  2. /* Authors: Bernard Metzler <[email protected]> */
  3. /* Copyright (c) 2008-2019, IBM Corporation */
  4. #include <linux/errno.h>
  5. #include <linux/types.h>
  6. #include <linux/net.h>
  7. #include <linux/scatterlist.h>
  8. #include <linux/llist.h>
  9. #include <asm/barrier.h>
  10. #include <net/tcp.h>
  11. #include "siw.h"
  12. #include "siw_verbs.h"
  13. #include "siw_mem.h"
  14. static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
  15. [SIW_QP_STATE_IDLE] = "IDLE",
  16. [SIW_QP_STATE_RTR] = "RTR",
  17. [SIW_QP_STATE_RTS] = "RTS",
  18. [SIW_QP_STATE_CLOSING] = "CLOSING",
  19. [SIW_QP_STATE_TERMINATE] = "TERMINATE",
  20. [SIW_QP_STATE_ERROR] = "ERROR"
  21. };
  22. /*
  23. * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
  24. * per-RDMAP message basis. Please keep order of initializer. All MPA len
  25. * is initialized to minimum packet size.
  26. */
  27. struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
  28. { /* RDMAP_RDMA_WRITE */
  29. .hdr_len = sizeof(struct iwarp_rdma_write),
  30. .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
  31. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
  32. cpu_to_be16(DDP_VERSION << 8) |
  33. cpu_to_be16(RDMAP_VERSION << 6) |
  34. cpu_to_be16(RDMAP_RDMA_WRITE),
  35. .rx_data = siw_proc_write },
  36. { /* RDMAP_RDMA_READ_REQ */
  37. .hdr_len = sizeof(struct iwarp_rdma_rreq),
  38. .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
  39. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  40. cpu_to_be16(RDMAP_VERSION << 6) |
  41. cpu_to_be16(RDMAP_RDMA_READ_REQ),
  42. .rx_data = siw_proc_rreq },
  43. { /* RDMAP_RDMA_READ_RESP */
  44. .hdr_len = sizeof(struct iwarp_rdma_rresp),
  45. .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
  46. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
  47. cpu_to_be16(DDP_VERSION << 8) |
  48. cpu_to_be16(RDMAP_VERSION << 6) |
  49. cpu_to_be16(RDMAP_RDMA_READ_RESP),
  50. .rx_data = siw_proc_rresp },
  51. { /* RDMAP_SEND */
  52. .hdr_len = sizeof(struct iwarp_send),
  53. .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
  54. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  55. cpu_to_be16(RDMAP_VERSION << 6) |
  56. cpu_to_be16(RDMAP_SEND),
  57. .rx_data = siw_proc_send },
  58. { /* RDMAP_SEND_INVAL */
  59. .hdr_len = sizeof(struct iwarp_send_inv),
  60. .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
  61. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  62. cpu_to_be16(RDMAP_VERSION << 6) |
  63. cpu_to_be16(RDMAP_SEND_INVAL),
  64. .rx_data = siw_proc_send },
  65. { /* RDMAP_SEND_SE */
  66. .hdr_len = sizeof(struct iwarp_send),
  67. .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
  68. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  69. cpu_to_be16(RDMAP_VERSION << 6) |
  70. cpu_to_be16(RDMAP_SEND_SE),
  71. .rx_data = siw_proc_send },
  72. { /* RDMAP_SEND_SE_INVAL */
  73. .hdr_len = sizeof(struct iwarp_send_inv),
  74. .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
  75. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  76. cpu_to_be16(RDMAP_VERSION << 6) |
  77. cpu_to_be16(RDMAP_SEND_SE_INVAL),
  78. .rx_data = siw_proc_send },
  79. { /* RDMAP_TERMINATE */
  80. .hdr_len = sizeof(struct iwarp_terminate),
  81. .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
  82. .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  83. cpu_to_be16(RDMAP_VERSION << 6) |
  84. cpu_to_be16(RDMAP_TERMINATE),
  85. .rx_data = siw_proc_terminate }
  86. };
  87. void siw_qp_llp_data_ready(struct sock *sk)
  88. {
  89. struct siw_qp *qp;
  90. read_lock(&sk->sk_callback_lock);
  91. if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
  92. goto done;
  93. qp = sk_to_qp(sk);
  94. if (likely(!qp->rx_stream.rx_suspend &&
  95. down_read_trylock(&qp->state_lock))) {
  96. read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
  97. if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
  98. /*
  99. * Implements data receive operation during
  100. * socket callback. TCP gracefully catches
  101. * the case where there is nothing to receive
  102. * (not calling siw_tcp_rx_data() then).
  103. */
  104. tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
  105. up_read(&qp->state_lock);
  106. } else {
  107. siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
  108. qp->rx_stream.rx_suspend);
  109. }
  110. done:
  111. read_unlock(&sk->sk_callback_lock);
  112. }
  113. void siw_qp_llp_close(struct siw_qp *qp)
  114. {
  115. siw_dbg_qp(qp, "enter llp close, state = %s\n",
  116. siw_qp_state_to_string[qp->attrs.state]);
  117. down_write(&qp->state_lock);
  118. qp->rx_stream.rx_suspend = 1;
  119. qp->tx_ctx.tx_suspend = 1;
  120. qp->attrs.sk = NULL;
  121. switch (qp->attrs.state) {
  122. case SIW_QP_STATE_RTS:
  123. case SIW_QP_STATE_RTR:
  124. case SIW_QP_STATE_IDLE:
  125. case SIW_QP_STATE_TERMINATE:
  126. qp->attrs.state = SIW_QP_STATE_ERROR;
  127. break;
  128. /*
  129. * SIW_QP_STATE_CLOSING:
  130. *
  131. * This is a forced close. shall the QP be moved to
  132. * ERROR or IDLE ?
  133. */
  134. case SIW_QP_STATE_CLOSING:
  135. if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
  136. qp->attrs.state = SIW_QP_STATE_ERROR;
  137. else
  138. qp->attrs.state = SIW_QP_STATE_IDLE;
  139. break;
  140. default:
  141. siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
  142. siw_qp_state_to_string[qp->attrs.state]);
  143. break;
  144. }
  145. siw_sq_flush(qp);
  146. siw_rq_flush(qp);
  147. /*
  148. * Dereference closing CEP
  149. */
  150. if (qp->cep) {
  151. siw_cep_put(qp->cep);
  152. qp->cep = NULL;
  153. }
  154. up_write(&qp->state_lock);
  155. siw_dbg_qp(qp, "llp close exit: state %s\n",
  156. siw_qp_state_to_string[qp->attrs.state]);
  157. }
  158. /*
  159. * socket callback routine informing about newly available send space.
  160. * Function schedules SQ work for processing SQ items.
  161. */
  162. void siw_qp_llp_write_space(struct sock *sk)
  163. {
  164. struct siw_cep *cep;
  165. read_lock(&sk->sk_callback_lock);
  166. cep = sk_to_cep(sk);
  167. if (cep) {
  168. cep->sk_write_space(sk);
  169. if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
  170. (void)siw_sq_start(cep->qp);
  171. }
  172. read_unlock(&sk->sk_callback_lock);
  173. }
  174. static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
  175. {
  176. if (irq_size) {
  177. irq_size = roundup_pow_of_two(irq_size);
  178. qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
  179. if (!qp->irq) {
  180. qp->attrs.irq_size = 0;
  181. return -ENOMEM;
  182. }
  183. }
  184. if (orq_size) {
  185. orq_size = roundup_pow_of_two(orq_size);
  186. qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
  187. if (!qp->orq) {
  188. qp->attrs.orq_size = 0;
  189. qp->attrs.irq_size = 0;
  190. vfree(qp->irq);
  191. return -ENOMEM;
  192. }
  193. }
  194. qp->attrs.irq_size = irq_size;
  195. qp->attrs.orq_size = orq_size;
  196. siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
  197. return 0;
  198. }
  199. static int siw_qp_enable_crc(struct siw_qp *qp)
  200. {
  201. struct siw_rx_stream *c_rx = &qp->rx_stream;
  202. struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
  203. int size;
  204. if (siw_crypto_shash == NULL)
  205. return -ENOENT;
  206. size = crypto_shash_descsize(siw_crypto_shash) +
  207. sizeof(struct shash_desc);
  208. c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
  209. c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
  210. if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
  211. kfree(c_tx->mpa_crc_hd);
  212. kfree(c_rx->mpa_crc_hd);
  213. c_tx->mpa_crc_hd = NULL;
  214. c_rx->mpa_crc_hd = NULL;
  215. return -ENOMEM;
  216. }
  217. c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
  218. c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
  219. return 0;
  220. }
  221. /*
  222. * Send a non signalled READ or WRITE to peer side as negotiated
  223. * with MPAv2 P2P setup protocol. The work request is only created
  224. * as a current active WR and does not consume Send Queue space.
  225. *
  226. * Caller must hold QP state lock.
  227. */
  228. int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
  229. {
  230. struct siw_wqe *wqe = tx_wqe(qp);
  231. unsigned long flags;
  232. int rv = 0;
  233. spin_lock_irqsave(&qp->sq_lock, flags);
  234. if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
  235. spin_unlock_irqrestore(&qp->sq_lock, flags);
  236. return -EIO;
  237. }
  238. memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
  239. wqe->wr_status = SIW_WR_QUEUED;
  240. wqe->sqe.flags = 0;
  241. wqe->sqe.num_sge = 1;
  242. wqe->sqe.sge[0].length = 0;
  243. wqe->sqe.sge[0].laddr = 0;
  244. wqe->sqe.sge[0].lkey = 0;
  245. /*
  246. * While it must not be checked for inbound zero length
  247. * READ/WRITE, some HW may treat STag 0 special.
  248. */
  249. wqe->sqe.rkey = 1;
  250. wqe->sqe.raddr = 0;
  251. wqe->processed = 0;
  252. if (ctrl & MPA_V2_RDMA_WRITE_RTR)
  253. wqe->sqe.opcode = SIW_OP_WRITE;
  254. else if (ctrl & MPA_V2_RDMA_READ_RTR) {
  255. struct siw_sqe *rreq = NULL;
  256. wqe->sqe.opcode = SIW_OP_READ;
  257. spin_lock(&qp->orq_lock);
  258. if (qp->attrs.orq_size)
  259. rreq = orq_get_free(qp);
  260. if (rreq) {
  261. siw_read_to_orq(rreq, &wqe->sqe);
  262. qp->orq_put++;
  263. } else
  264. rv = -EIO;
  265. spin_unlock(&qp->orq_lock);
  266. } else
  267. rv = -EINVAL;
  268. if (rv)
  269. wqe->wr_status = SIW_WR_IDLE;
  270. spin_unlock_irqrestore(&qp->sq_lock, flags);
  271. if (!rv)
  272. rv = siw_sq_start(qp);
  273. return rv;
  274. }
  275. /*
  276. * Map memory access error to DDP tagged error
  277. */
  278. enum ddp_ecode siw_tagged_error(enum siw_access_state state)
  279. {
  280. switch (state) {
  281. case E_STAG_INVALID:
  282. return DDP_ECODE_T_INVALID_STAG;
  283. case E_BASE_BOUNDS:
  284. return DDP_ECODE_T_BASE_BOUNDS;
  285. case E_PD_MISMATCH:
  286. return DDP_ECODE_T_STAG_NOT_ASSOC;
  287. case E_ACCESS_PERM:
  288. /*
  289. * RFC 5041 (DDP) lacks an ecode for insufficient access
  290. * permissions. 'Invalid STag' seem to be the closest
  291. * match though.
  292. */
  293. return DDP_ECODE_T_INVALID_STAG;
  294. default:
  295. WARN_ON(1);
  296. return DDP_ECODE_T_INVALID_STAG;
  297. }
  298. }
  299. /*
  300. * Map memory access error to RDMAP protection error
  301. */
  302. enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
  303. {
  304. switch (state) {
  305. case E_STAG_INVALID:
  306. return RDMAP_ECODE_INVALID_STAG;
  307. case E_BASE_BOUNDS:
  308. return RDMAP_ECODE_BASE_BOUNDS;
  309. case E_PD_MISMATCH:
  310. return RDMAP_ECODE_STAG_NOT_ASSOC;
  311. case E_ACCESS_PERM:
  312. return RDMAP_ECODE_ACCESS_RIGHTS;
  313. default:
  314. return RDMAP_ECODE_UNSPECIFIED;
  315. }
  316. }
  317. void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
  318. u8 ecode, int in_tx)
  319. {
  320. if (!qp->term_info.valid) {
  321. memset(&qp->term_info, 0, sizeof(qp->term_info));
  322. qp->term_info.layer = layer;
  323. qp->term_info.etype = etype;
  324. qp->term_info.ecode = ecode;
  325. qp->term_info.in_tx = in_tx;
  326. qp->term_info.valid = 1;
  327. }
  328. siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
  329. layer, etype, ecode, in_tx ? "yes" : "no");
  330. }
  331. /*
  332. * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
  333. * Sending TERMINATE messages is best effort - such messages
  334. * can only be send if the QP is still connected and it does
  335. * not have another outbound message in-progress, i.e. the
  336. * TERMINATE message must not interfer with an incomplete current
  337. * transmit operation.
  338. */
  339. void siw_send_terminate(struct siw_qp *qp)
  340. {
  341. struct kvec iov[3];
  342. struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
  343. struct iwarp_terminate *term = NULL;
  344. union iwarp_hdr *err_hdr = NULL;
  345. struct socket *s = qp->attrs.sk;
  346. struct siw_rx_stream *srx = &qp->rx_stream;
  347. union iwarp_hdr *rx_hdr = &srx->hdr;
  348. u32 crc = 0;
  349. int num_frags, len_terminate, rv;
  350. if (!qp->term_info.valid)
  351. return;
  352. qp->term_info.valid = 0;
  353. if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
  354. siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
  355. tx_type(tx_wqe(qp)));
  356. return;
  357. }
  358. if (!s && qp->cep)
  359. /* QP not yet in RTS. Take socket from connection end point */
  360. s = qp->cep->sock;
  361. if (!s) {
  362. siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
  363. return;
  364. }
  365. term = kzalloc(sizeof(*term), GFP_KERNEL);
  366. if (!term)
  367. return;
  368. term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
  369. term->ddp_mo = 0;
  370. term->ddp_msn = cpu_to_be32(1);
  371. iov[0].iov_base = term;
  372. iov[0].iov_len = sizeof(*term);
  373. if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
  374. ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
  375. (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
  376. err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
  377. if (!err_hdr) {
  378. kfree(term);
  379. return;
  380. }
  381. }
  382. memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
  383. sizeof(struct iwarp_ctrl));
  384. __rdmap_term_set_layer(term, qp->term_info.layer);
  385. __rdmap_term_set_etype(term, qp->term_info.etype);
  386. __rdmap_term_set_ecode(term, qp->term_info.ecode);
  387. switch (qp->term_info.layer) {
  388. case TERM_ERROR_LAYER_RDMAP:
  389. if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
  390. /* No additional DDP/RDMAP header to be included */
  391. break;
  392. if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
  393. /*
  394. * Complete RDMAP frame will get attached, and
  395. * DDP segment length is valid
  396. */
  397. term->flag_m = 1;
  398. term->flag_d = 1;
  399. term->flag_r = 1;
  400. if (qp->term_info.in_tx) {
  401. struct iwarp_rdma_rreq *rreq;
  402. struct siw_wqe *wqe = tx_wqe(qp);
  403. /* Inbound RREQ error, detected during
  404. * RRESP creation. Take state from
  405. * current TX work queue element to
  406. * reconstruct peers RREQ.
  407. */
  408. rreq = (struct iwarp_rdma_rreq *)err_hdr;
  409. memcpy(&rreq->ctrl,
  410. &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
  411. sizeof(struct iwarp_ctrl));
  412. rreq->rsvd = 0;
  413. rreq->ddp_qn =
  414. htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
  415. /* Provide RREQ's MSN as kept aside */
  416. rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
  417. rreq->ddp_mo = htonl(wqe->processed);
  418. rreq->sink_stag = htonl(wqe->sqe.rkey);
  419. rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
  420. rreq->read_size = htonl(wqe->sqe.sge[0].length);
  421. rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
  422. rreq->source_to =
  423. cpu_to_be64(wqe->sqe.sge[0].laddr);
  424. iov[1].iov_base = rreq;
  425. iov[1].iov_len = sizeof(*rreq);
  426. rx_hdr = (union iwarp_hdr *)rreq;
  427. } else {
  428. /* Take RDMAP/DDP information from
  429. * current (failed) inbound frame.
  430. */
  431. iov[1].iov_base = rx_hdr;
  432. if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
  433. RDMAP_RDMA_READ_REQ)
  434. iov[1].iov_len =
  435. sizeof(struct iwarp_rdma_rreq);
  436. else /* SEND type */
  437. iov[1].iov_len =
  438. sizeof(struct iwarp_send);
  439. }
  440. } else {
  441. /* Do not report DDP hdr information if packet
  442. * layout is unknown
  443. */
  444. if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
  445. (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
  446. break;
  447. iov[1].iov_base = rx_hdr;
  448. /* Only DDP frame will get attached */
  449. if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
  450. iov[1].iov_len =
  451. sizeof(struct iwarp_rdma_write);
  452. else
  453. iov[1].iov_len = sizeof(struct iwarp_send);
  454. term->flag_m = 1;
  455. term->flag_d = 1;
  456. }
  457. term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
  458. break;
  459. case TERM_ERROR_LAYER_DDP:
  460. /* Report error encountered while DDP processing.
  461. * This can only happen as a result of inbound
  462. * DDP processing
  463. */
  464. /* Do not report DDP hdr information if packet
  465. * layout is unknown
  466. */
  467. if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
  468. (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
  469. ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
  470. (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
  471. break;
  472. iov[1].iov_base = rx_hdr;
  473. if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
  474. iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
  475. else
  476. iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
  477. term->flag_m = 1;
  478. term->flag_d = 1;
  479. break;
  480. default:
  481. break;
  482. }
  483. if (term->flag_m || term->flag_d || term->flag_r) {
  484. iov[2].iov_base = &crc;
  485. iov[2].iov_len = sizeof(crc);
  486. len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
  487. num_frags = 3;
  488. } else {
  489. iov[1].iov_base = &crc;
  490. iov[1].iov_len = sizeof(crc);
  491. len_terminate = sizeof(*term) + MPA_CRC_SIZE;
  492. num_frags = 2;
  493. }
  494. /* Adjust DDP Segment Length parameter, if valid */
  495. if (term->flag_m) {
  496. u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
  497. enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
  498. real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
  499. rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
  500. }
  501. term->ctrl.mpa_len =
  502. cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
  503. if (qp->tx_ctx.mpa_crc_hd) {
  504. crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
  505. if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
  506. (u8 *)iov[0].iov_base,
  507. iov[0].iov_len))
  508. goto out;
  509. if (num_frags == 3) {
  510. if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
  511. (u8 *)iov[1].iov_base,
  512. iov[1].iov_len))
  513. goto out;
  514. }
  515. crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
  516. }
  517. rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
  518. siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
  519. rv == len_terminate ? "success" : "failure",
  520. __rdmap_term_layer(term), __rdmap_term_etype(term),
  521. __rdmap_term_ecode(term), rv);
  522. out:
  523. kfree(term);
  524. kfree(err_hdr);
  525. }
  526. /*
  527. * Handle all attrs other than state
  528. */
  529. static void siw_qp_modify_nonstate(struct siw_qp *qp,
  530. struct siw_qp_attrs *attrs,
  531. enum siw_qp_attr_mask mask)
  532. {
  533. if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
  534. if (attrs->flags & SIW_RDMA_BIND_ENABLED)
  535. qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
  536. else
  537. qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
  538. if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
  539. qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
  540. else
  541. qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
  542. if (attrs->flags & SIW_RDMA_READ_ENABLED)
  543. qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
  544. else
  545. qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
  546. }
  547. }
  548. static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
  549. struct siw_qp_attrs *attrs,
  550. enum siw_qp_attr_mask mask)
  551. {
  552. int rv = 0;
  553. switch (attrs->state) {
  554. case SIW_QP_STATE_RTS:
  555. if (attrs->flags & SIW_MPA_CRC) {
  556. rv = siw_qp_enable_crc(qp);
  557. if (rv)
  558. break;
  559. }
  560. if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
  561. siw_dbg_qp(qp, "no socket\n");
  562. rv = -EINVAL;
  563. break;
  564. }
  565. if (!(mask & SIW_QP_ATTR_MPA)) {
  566. siw_dbg_qp(qp, "no MPA\n");
  567. rv = -EINVAL;
  568. break;
  569. }
  570. /*
  571. * Initialize iWARP TX state
  572. */
  573. qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
  574. qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
  575. qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
  576. /*
  577. * Initialize iWARP RX state
  578. */
  579. qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
  580. qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
  581. qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
  582. /*
  583. * init IRD free queue, caller has already checked
  584. * limits.
  585. */
  586. rv = siw_qp_readq_init(qp, attrs->irq_size,
  587. attrs->orq_size);
  588. if (rv)
  589. break;
  590. qp->attrs.sk = attrs->sk;
  591. qp->attrs.state = SIW_QP_STATE_RTS;
  592. siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
  593. attrs->flags & SIW_MPA_CRC ? "y" : "n",
  594. qp->attrs.orq_size, qp->attrs.irq_size);
  595. break;
  596. case SIW_QP_STATE_ERROR:
  597. siw_rq_flush(qp);
  598. qp->attrs.state = SIW_QP_STATE_ERROR;
  599. if (qp->cep) {
  600. siw_cep_put(qp->cep);
  601. qp->cep = NULL;
  602. }
  603. break;
  604. default:
  605. break;
  606. }
  607. return rv;
  608. }
  609. static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
  610. struct siw_qp_attrs *attrs)
  611. {
  612. int drop_conn = 0;
  613. switch (attrs->state) {
  614. case SIW_QP_STATE_CLOSING:
  615. /*
  616. * Verbs: move to IDLE if SQ and ORQ are empty.
  617. * Move to ERROR otherwise. But first of all we must
  618. * close the connection. So we keep CLOSING or ERROR
  619. * as a transient state, schedule connection drop work
  620. * and wait for the socket state change upcall to
  621. * come back closed.
  622. */
  623. if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
  624. qp->attrs.state = SIW_QP_STATE_CLOSING;
  625. } else {
  626. qp->attrs.state = SIW_QP_STATE_ERROR;
  627. siw_sq_flush(qp);
  628. }
  629. siw_rq_flush(qp);
  630. drop_conn = 1;
  631. break;
  632. case SIW_QP_STATE_TERMINATE:
  633. qp->attrs.state = SIW_QP_STATE_TERMINATE;
  634. siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
  635. RDMAP_ETYPE_CATASTROPHIC,
  636. RDMAP_ECODE_UNSPECIFIED, 1);
  637. drop_conn = 1;
  638. break;
  639. case SIW_QP_STATE_ERROR:
  640. /*
  641. * This is an emergency close.
  642. *
  643. * Any in progress transmit operation will get
  644. * cancelled.
  645. * This will likely result in a protocol failure,
  646. * if a TX operation is in transit. The caller
  647. * could unconditional wait to give the current
  648. * operation a chance to complete.
  649. * Esp., how to handle the non-empty IRQ case?
  650. * The peer was asking for data transfer at a valid
  651. * point in time.
  652. */
  653. siw_sq_flush(qp);
  654. siw_rq_flush(qp);
  655. qp->attrs.state = SIW_QP_STATE_ERROR;
  656. drop_conn = 1;
  657. break;
  658. default:
  659. break;
  660. }
  661. return drop_conn;
  662. }
  663. static void siw_qp_nextstate_from_term(struct siw_qp *qp,
  664. struct siw_qp_attrs *attrs)
  665. {
  666. switch (attrs->state) {
  667. case SIW_QP_STATE_ERROR:
  668. siw_rq_flush(qp);
  669. qp->attrs.state = SIW_QP_STATE_ERROR;
  670. if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
  671. siw_sq_flush(qp);
  672. break;
  673. default:
  674. break;
  675. }
  676. }
  677. static int siw_qp_nextstate_from_close(struct siw_qp *qp,
  678. struct siw_qp_attrs *attrs)
  679. {
  680. int rv = 0;
  681. switch (attrs->state) {
  682. case SIW_QP_STATE_IDLE:
  683. WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
  684. qp->attrs.state = SIW_QP_STATE_IDLE;
  685. break;
  686. case SIW_QP_STATE_CLOSING:
  687. /*
  688. * The LLP may already moved the QP to closing
  689. * due to graceful peer close init
  690. */
  691. break;
  692. case SIW_QP_STATE_ERROR:
  693. /*
  694. * QP was moved to CLOSING by LLP event
  695. * not yet seen by user.
  696. */
  697. qp->attrs.state = SIW_QP_STATE_ERROR;
  698. if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
  699. siw_sq_flush(qp);
  700. siw_rq_flush(qp);
  701. break;
  702. default:
  703. siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
  704. siw_qp_state_to_string[qp->attrs.state],
  705. siw_qp_state_to_string[attrs->state]);
  706. rv = -ECONNABORTED;
  707. }
  708. return rv;
  709. }
  710. /*
  711. * Caller must hold qp->state_lock
  712. */
  713. int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
  714. enum siw_qp_attr_mask mask)
  715. {
  716. int drop_conn = 0, rv = 0;
  717. if (!mask)
  718. return 0;
  719. siw_dbg_qp(qp, "state: %s => %s\n",
  720. siw_qp_state_to_string[qp->attrs.state],
  721. siw_qp_state_to_string[attrs->state]);
  722. if (mask != SIW_QP_ATTR_STATE)
  723. siw_qp_modify_nonstate(qp, attrs, mask);
  724. if (!(mask & SIW_QP_ATTR_STATE))
  725. return 0;
  726. switch (qp->attrs.state) {
  727. case SIW_QP_STATE_IDLE:
  728. case SIW_QP_STATE_RTR:
  729. rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
  730. break;
  731. case SIW_QP_STATE_RTS:
  732. drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
  733. break;
  734. case SIW_QP_STATE_TERMINATE:
  735. siw_qp_nextstate_from_term(qp, attrs);
  736. break;
  737. case SIW_QP_STATE_CLOSING:
  738. siw_qp_nextstate_from_close(qp, attrs);
  739. break;
  740. default:
  741. break;
  742. }
  743. if (drop_conn)
  744. siw_qp_cm_drop(qp, 0);
  745. return rv;
  746. }
  747. void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
  748. {
  749. rreq->id = sqe->id;
  750. rreq->opcode = sqe->opcode;
  751. rreq->sge[0].laddr = sqe->sge[0].laddr;
  752. rreq->sge[0].length = sqe->sge[0].length;
  753. rreq->sge[0].lkey = sqe->sge[0].lkey;
  754. rreq->sge[1].lkey = sqe->sge[1].lkey;
  755. rreq->flags = sqe->flags | SIW_WQE_VALID;
  756. rreq->num_sge = 1;
  757. }
  758. static int siw_activate_tx_from_sq(struct siw_qp *qp)
  759. {
  760. struct siw_sqe *sqe;
  761. struct siw_wqe *wqe = tx_wqe(qp);
  762. int rv = 1;
  763. sqe = sq_get_next(qp);
  764. if (!sqe)
  765. return 0;
  766. memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
  767. wqe->wr_status = SIW_WR_QUEUED;
  768. /* First copy SQE to kernel private memory */
  769. memcpy(&wqe->sqe, sqe, sizeof(*sqe));
  770. if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
  771. rv = -EINVAL;
  772. goto out;
  773. }
  774. if (wqe->sqe.flags & SIW_WQE_INLINE) {
  775. if (wqe->sqe.opcode != SIW_OP_SEND &&
  776. wqe->sqe.opcode != SIW_OP_WRITE) {
  777. rv = -EINVAL;
  778. goto out;
  779. }
  780. if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
  781. rv = -EINVAL;
  782. goto out;
  783. }
  784. wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
  785. wqe->sqe.sge[0].lkey = 0;
  786. wqe->sqe.num_sge = 1;
  787. }
  788. if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
  789. /* A READ cannot be fenced */
  790. if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
  791. wqe->sqe.opcode ==
  792. SIW_OP_READ_LOCAL_INV)) {
  793. siw_dbg_qp(qp, "cannot fence read\n");
  794. rv = -EINVAL;
  795. goto out;
  796. }
  797. spin_lock(&qp->orq_lock);
  798. if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
  799. qp->tx_ctx.orq_fence = 1;
  800. rv = 0;
  801. }
  802. spin_unlock(&qp->orq_lock);
  803. } else if (wqe->sqe.opcode == SIW_OP_READ ||
  804. wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
  805. struct siw_sqe *rreq;
  806. if (unlikely(!qp->attrs.orq_size)) {
  807. /* We negotiated not to send READ req's */
  808. rv = -EINVAL;
  809. goto out;
  810. }
  811. wqe->sqe.num_sge = 1;
  812. spin_lock(&qp->orq_lock);
  813. rreq = orq_get_free(qp);
  814. if (rreq) {
  815. /*
  816. * Make an immediate copy in ORQ to be ready
  817. * to process loopback READ reply
  818. */
  819. siw_read_to_orq(rreq, &wqe->sqe);
  820. qp->orq_put++;
  821. } else {
  822. qp->tx_ctx.orq_fence = 1;
  823. rv = 0;
  824. }
  825. spin_unlock(&qp->orq_lock);
  826. }
  827. /* Clear SQE, can be re-used by application */
  828. smp_store_mb(sqe->flags, 0);
  829. qp->sq_get++;
  830. out:
  831. if (unlikely(rv < 0)) {
  832. siw_dbg_qp(qp, "error %d\n", rv);
  833. wqe->wr_status = SIW_WR_IDLE;
  834. }
  835. return rv;
  836. }
  837. /*
  838. * Must be called with SQ locked.
  839. * To avoid complete SQ starvation by constant inbound READ requests,
  840. * the active IRQ will not be served after qp->irq_burst, if the
  841. * SQ has pending work.
  842. */
  843. int siw_activate_tx(struct siw_qp *qp)
  844. {
  845. struct siw_sqe *irqe;
  846. struct siw_wqe *wqe = tx_wqe(qp);
  847. if (!qp->attrs.irq_size)
  848. return siw_activate_tx_from_sq(qp);
  849. irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
  850. if (!(irqe->flags & SIW_WQE_VALID))
  851. return siw_activate_tx_from_sq(qp);
  852. /*
  853. * Avoid local WQE processing starvation in case
  854. * of constant inbound READ request stream
  855. */
  856. if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
  857. qp->irq_burst = 0;
  858. return siw_activate_tx_from_sq(qp);
  859. }
  860. memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
  861. wqe->wr_status = SIW_WR_QUEUED;
  862. /* start READ RESPONSE */
  863. wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
  864. wqe->sqe.flags = 0;
  865. if (irqe->num_sge) {
  866. wqe->sqe.num_sge = 1;
  867. wqe->sqe.sge[0].length = irqe->sge[0].length;
  868. wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
  869. wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
  870. } else {
  871. wqe->sqe.num_sge = 0;
  872. }
  873. /* Retain original RREQ's message sequence number for
  874. * potential error reporting cases.
  875. */
  876. wqe->sqe.sge[1].length = irqe->sge[1].length;
  877. wqe->sqe.rkey = irqe->rkey;
  878. wqe->sqe.raddr = irqe->raddr;
  879. wqe->processed = 0;
  880. qp->irq_get++;
  881. /* mark current IRQ entry free */
  882. smp_store_mb(irqe->flags, 0);
  883. return 1;
  884. }
  885. /*
  886. * Check if current CQ state qualifies for calling CQ completion
  887. * handler. Must be called with CQ lock held.
  888. */
  889. static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
  890. {
  891. u32 cq_notify;
  892. if (!cq->base_cq.comp_handler)
  893. return false;
  894. /* Read application shared notification state */
  895. cq_notify = READ_ONCE(cq->notify->flags);
  896. if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
  897. ((cq_notify & SIW_NOTIFY_SOLICITED) &&
  898. (flags & SIW_WQE_SOLICITED))) {
  899. /*
  900. * CQ notification is one-shot: Since the
  901. * current CQE causes user notification,
  902. * the CQ gets dis-aremd and must be re-aremd
  903. * by the user for a new notification.
  904. */
  905. WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
  906. return true;
  907. }
  908. return false;
  909. }
  910. int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
  911. enum siw_wc_status status)
  912. {
  913. struct siw_cq *cq = qp->scq;
  914. int rv = 0;
  915. if (cq) {
  916. u32 sqe_flags = sqe->flags;
  917. struct siw_cqe *cqe;
  918. u32 idx;
  919. unsigned long flags;
  920. spin_lock_irqsave(&cq->lock, flags);
  921. idx = cq->cq_put % cq->num_cqe;
  922. cqe = &cq->queue[idx];
  923. if (!READ_ONCE(cqe->flags)) {
  924. bool notify;
  925. cqe->id = sqe->id;
  926. cqe->opcode = sqe->opcode;
  927. cqe->status = status;
  928. cqe->imm_data = 0;
  929. cqe->bytes = bytes;
  930. if (rdma_is_kernel_res(&cq->base_cq.res))
  931. cqe->base_qp = &qp->base_qp;
  932. else
  933. cqe->qp_id = qp_id(qp);
  934. /* mark CQE valid for application */
  935. WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
  936. /* recycle SQE */
  937. smp_store_mb(sqe->flags, 0);
  938. cq->cq_put++;
  939. notify = siw_cq_notify_now(cq, sqe_flags);
  940. spin_unlock_irqrestore(&cq->lock, flags);
  941. if (notify) {
  942. siw_dbg_cq(cq, "Call completion handler\n");
  943. cq->base_cq.comp_handler(&cq->base_cq,
  944. cq->base_cq.cq_context);
  945. }
  946. } else {
  947. spin_unlock_irqrestore(&cq->lock, flags);
  948. rv = -ENOMEM;
  949. siw_cq_event(cq, IB_EVENT_CQ_ERR);
  950. }
  951. } else {
  952. /* recycle SQE */
  953. smp_store_mb(sqe->flags, 0);
  954. }
  955. return rv;
  956. }
  957. int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
  958. u32 inval_stag, enum siw_wc_status status)
  959. {
  960. struct siw_cq *cq = qp->rcq;
  961. int rv = 0;
  962. if (cq) {
  963. struct siw_cqe *cqe;
  964. u32 idx;
  965. unsigned long flags;
  966. spin_lock_irqsave(&cq->lock, flags);
  967. idx = cq->cq_put % cq->num_cqe;
  968. cqe = &cq->queue[idx];
  969. if (!READ_ONCE(cqe->flags)) {
  970. bool notify;
  971. u8 cqe_flags = SIW_WQE_VALID;
  972. cqe->id = rqe->id;
  973. cqe->opcode = SIW_OP_RECEIVE;
  974. cqe->status = status;
  975. cqe->imm_data = 0;
  976. cqe->bytes = bytes;
  977. if (rdma_is_kernel_res(&cq->base_cq.res)) {
  978. cqe->base_qp = &qp->base_qp;
  979. if (inval_stag) {
  980. cqe_flags |= SIW_WQE_REM_INVAL;
  981. cqe->inval_stag = inval_stag;
  982. }
  983. } else {
  984. cqe->qp_id = qp_id(qp);
  985. }
  986. /* mark CQE valid for application */
  987. WRITE_ONCE(cqe->flags, cqe_flags);
  988. /* recycle RQE */
  989. smp_store_mb(rqe->flags, 0);
  990. cq->cq_put++;
  991. notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
  992. spin_unlock_irqrestore(&cq->lock, flags);
  993. if (notify) {
  994. siw_dbg_cq(cq, "Call completion handler\n");
  995. cq->base_cq.comp_handler(&cq->base_cq,
  996. cq->base_cq.cq_context);
  997. }
  998. } else {
  999. spin_unlock_irqrestore(&cq->lock, flags);
  1000. rv = -ENOMEM;
  1001. siw_cq_event(cq, IB_EVENT_CQ_ERR);
  1002. }
  1003. } else {
  1004. /* recycle RQE */
  1005. smp_store_mb(rqe->flags, 0);
  1006. }
  1007. return rv;
  1008. }
  1009. /*
  1010. * siw_sq_flush()
  1011. *
  1012. * Flush SQ and ORRQ entries to CQ.
  1013. *
  1014. * Must be called with QP state write lock held.
  1015. * Therefore, SQ and ORQ lock must not be taken.
  1016. */
  1017. void siw_sq_flush(struct siw_qp *qp)
  1018. {
  1019. struct siw_sqe *sqe;
  1020. struct siw_wqe *wqe = tx_wqe(qp);
  1021. int async_event = 0;
  1022. /*
  1023. * Start with completing any work currently on the ORQ
  1024. */
  1025. while (qp->attrs.orq_size) {
  1026. sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
  1027. if (!READ_ONCE(sqe->flags))
  1028. break;
  1029. if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
  1030. break;
  1031. WRITE_ONCE(sqe->flags, 0);
  1032. qp->orq_get++;
  1033. }
  1034. /*
  1035. * Flush an in-progress WQE if present
  1036. */
  1037. if (wqe->wr_status != SIW_WR_IDLE) {
  1038. siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
  1039. tx_type(wqe), wqe->wr_status);
  1040. siw_wqe_put_mem(wqe, tx_type(wqe));
  1041. if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
  1042. ((tx_type(wqe) != SIW_OP_READ &&
  1043. tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
  1044. wqe->wr_status == SIW_WR_QUEUED))
  1045. /*
  1046. * An in-progress Read Request is already in
  1047. * the ORQ
  1048. */
  1049. siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
  1050. SIW_WC_WR_FLUSH_ERR);
  1051. wqe->wr_status = SIW_WR_IDLE;
  1052. }
  1053. /*
  1054. * Flush the Send Queue
  1055. */
  1056. while (qp->attrs.sq_size) {
  1057. sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
  1058. if (!READ_ONCE(sqe->flags))
  1059. break;
  1060. async_event = 1;
  1061. if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
  1062. /*
  1063. * Shall IB_EVENT_SQ_DRAINED be supressed if work
  1064. * completion fails?
  1065. */
  1066. break;
  1067. WRITE_ONCE(sqe->flags, 0);
  1068. qp->sq_get++;
  1069. }
  1070. if (async_event)
  1071. siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
  1072. }
  1073. /*
  1074. * siw_rq_flush()
  1075. *
  1076. * Flush recv queue entries to CQ. Also
  1077. * takes care of pending active tagged and untagged
  1078. * inbound transfers, which have target memory
  1079. * referenced.
  1080. *
  1081. * Must be called with QP state write lock held.
  1082. * Therefore, RQ lock must not be taken.
  1083. */
  1084. void siw_rq_flush(struct siw_qp *qp)
  1085. {
  1086. struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
  1087. /*
  1088. * Flush an in-progress untagged operation if present
  1089. */
  1090. if (wqe->wr_status != SIW_WR_IDLE) {
  1091. siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
  1092. rx_type(wqe), wqe->wr_status);
  1093. siw_wqe_put_mem(wqe, rx_type(wqe));
  1094. if (rx_type(wqe) == SIW_OP_RECEIVE) {
  1095. siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
  1096. 0, SIW_WC_WR_FLUSH_ERR);
  1097. } else if (rx_type(wqe) != SIW_OP_READ &&
  1098. rx_type(wqe) != SIW_OP_READ_RESPONSE &&
  1099. rx_type(wqe) != SIW_OP_WRITE) {
  1100. siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
  1101. }
  1102. wqe->wr_status = SIW_WR_IDLE;
  1103. }
  1104. wqe = &qp->rx_tagged.wqe_active;
  1105. if (wqe->wr_status != SIW_WR_IDLE) {
  1106. siw_wqe_put_mem(wqe, rx_type(wqe));
  1107. wqe->wr_status = SIW_WR_IDLE;
  1108. }
  1109. /*
  1110. * Flush the Receive Queue
  1111. */
  1112. while (qp->attrs.rq_size) {
  1113. struct siw_rqe *rqe =
  1114. &qp->recvq[qp->rq_get % qp->attrs.rq_size];
  1115. if (!READ_ONCE(rqe->flags))
  1116. break;
  1117. if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
  1118. break;
  1119. WRITE_ONCE(rqe->flags, 0);
  1120. qp->rq_get++;
  1121. }
  1122. }
  1123. int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
  1124. {
  1125. int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
  1126. GFP_KERNEL);
  1127. if (!rv) {
  1128. kref_init(&qp->ref);
  1129. qp->sdev = sdev;
  1130. siw_dbg_qp(qp, "new QP\n");
  1131. }
  1132. return rv;
  1133. }
  1134. void siw_free_qp(struct kref *ref)
  1135. {
  1136. struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
  1137. struct siw_device *sdev = qp->sdev;
  1138. unsigned long flags;
  1139. if (qp->cep)
  1140. siw_cep_put(qp->cep);
  1141. found = xa_erase(&sdev->qp_xa, qp_id(qp));
  1142. WARN_ON(found != qp);
  1143. spin_lock_irqsave(&sdev->lock, flags);
  1144. list_del(&qp->devq);
  1145. spin_unlock_irqrestore(&sdev->lock, flags);
  1146. vfree(qp->sendq);
  1147. vfree(qp->recvq);
  1148. vfree(qp->irq);
  1149. vfree(qp->orq);
  1150. siw_put_tx_cpu(qp->tx_cpu);
  1151. complete(&qp->qp_free);
  1152. atomic_dec(&sdev->num_qp);
  1153. }