verbs.c 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396
  1. // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2. /*
  3. * Copyright (c) 2014-2017 Oracle. All rights reserved.
  4. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  5. *
  6. * This software is available to you under a choice of one of two
  7. * licenses. You may choose to be licensed under the terms of the GNU
  8. * General Public License (GPL) Version 2, available from the file
  9. * COPYING in the main directory of this source tree, or the BSD-type
  10. * license below:
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. *
  16. * Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * Neither the name of the Network Appliance, Inc. nor the names of
  25. * its contributors may be used to endorse or promote products
  26. * derived from this software without specific prior written
  27. * permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40. */
  41. /*
  42. * verbs.c
  43. *
  44. * Encapsulates the major functions managing:
  45. * o adapters
  46. * o endpoints
  47. * o connections
  48. * o buffer memory
  49. */
  50. #include <linux/interrupt.h>
  51. #include <linux/slab.h>
  52. #include <linux/sunrpc/addr.h>
  53. #include <linux/sunrpc/svc_rdma.h>
  54. #include <linux/log2.h>
  55. #include <asm-generic/barrier.h>
  56. #include <asm/bitops.h>
  57. #include <rdma/ib_cm.h>
  58. #include "xprt_rdma.h"
  59. #include <trace/events/rpcrdma.h>
  60. static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
  61. static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
  62. static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
  63. struct rpcrdma_sendctx *sc);
  64. static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
  65. static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
  66. static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
  67. static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
  68. static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
  69. static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
  70. static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
  71. static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
  72. static struct rpcrdma_regbuf *
  73. rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
  74. static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
  75. static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
  76. /* Wait for outstanding transport work to finish. ib_drain_qp
  77. * handles the drains in the wrong order for us, so open code
  78. * them here.
  79. */
  80. static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
  81. {
  82. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  83. struct rdma_cm_id *id = ep->re_id;
  84. /* Wait for rpcrdma_post_recvs() to leave its critical
  85. * section.
  86. */
  87. if (atomic_inc_return(&ep->re_receiving) > 1)
  88. wait_for_completion(&ep->re_done);
  89. /* Flush Receives, then wait for deferred Reply work
  90. * to complete.
  91. */
  92. ib_drain_rq(id->qp);
  93. /* Deferred Reply processing might have scheduled
  94. * local invalidations.
  95. */
  96. ib_drain_sq(id->qp);
  97. rpcrdma_ep_put(ep);
  98. }
  99. /* Ensure xprt_force_disconnect() is invoked exactly once when a
  100. * connection is closed or lost. (The important thing is it needs
  101. * to be invoked "at least" once).
  102. */
  103. void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
  104. {
  105. if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
  106. xprt_force_disconnect(ep->re_xprt);
  107. }
  108. /**
  109. * rpcrdma_flush_disconnect - Disconnect on flushed completion
  110. * @r_xprt: transport to disconnect
  111. * @wc: work completion entry
  112. *
  113. * Must be called in process context.
  114. */
  115. void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc)
  116. {
  117. if (wc->status != IB_WC_SUCCESS)
  118. rpcrdma_force_disconnect(r_xprt->rx_ep);
  119. }
  120. /**
  121. * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
  122. * @cq: completion queue
  123. * @wc: WCE for a completed Send WR
  124. *
  125. */
  126. static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  127. {
  128. struct ib_cqe *cqe = wc->wr_cqe;
  129. struct rpcrdma_sendctx *sc =
  130. container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
  131. struct rpcrdma_xprt *r_xprt = cq->cq_context;
  132. /* WARNING: Only wr_cqe and status are reliable at this point */
  133. trace_xprtrdma_wc_send(wc, &sc->sc_cid);
  134. rpcrdma_sendctx_put_locked(r_xprt, sc);
  135. rpcrdma_flush_disconnect(r_xprt, wc);
  136. }
  137. /**
  138. * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  139. * @cq: completion queue
  140. * @wc: WCE for a completed Receive WR
  141. *
  142. */
  143. static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  144. {
  145. struct ib_cqe *cqe = wc->wr_cqe;
  146. struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
  147. rr_cqe);
  148. struct rpcrdma_xprt *r_xprt = cq->cq_context;
  149. /* WARNING: Only wr_cqe and status are reliable at this point */
  150. trace_xprtrdma_wc_receive(wc, &rep->rr_cid);
  151. --r_xprt->rx_ep->re_receive_count;
  152. if (wc->status != IB_WC_SUCCESS)
  153. goto out_flushed;
  154. /* status == SUCCESS means all fields in wc are trustworthy */
  155. rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
  156. rep->rr_wc_flags = wc->wc_flags;
  157. rep->rr_inv_rkey = wc->ex.invalidate_rkey;
  158. ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
  159. rdmab_addr(rep->rr_rdmabuf),
  160. wc->byte_len, DMA_FROM_DEVICE);
  161. rpcrdma_reply_handler(rep);
  162. return;
  163. out_flushed:
  164. rpcrdma_flush_disconnect(r_xprt, wc);
  165. rpcrdma_rep_put(&r_xprt->rx_buf, rep);
  166. }
  167. static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
  168. struct rdma_conn_param *param)
  169. {
  170. const struct rpcrdma_connect_private *pmsg = param->private_data;
  171. unsigned int rsize, wsize;
  172. /* Default settings for RPC-over-RDMA Version One */
  173. rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  174. wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  175. if (pmsg &&
  176. pmsg->cp_magic == rpcrdma_cmp_magic &&
  177. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  178. rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
  179. wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
  180. }
  181. if (rsize < ep->re_inline_recv)
  182. ep->re_inline_recv = rsize;
  183. if (wsize < ep->re_inline_send)
  184. ep->re_inline_send = wsize;
  185. rpcrdma_set_max_header_sizes(ep);
  186. }
  187. /**
  188. * rpcrdma_cm_event_handler - Handle RDMA CM events
  189. * @id: rdma_cm_id on which an event has occurred
  190. * @event: details of the event
  191. *
  192. * Called with @id's mutex held. Returns 1 if caller should
  193. * destroy @id, otherwise 0.
  194. */
  195. static int
  196. rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
  197. {
  198. struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
  199. struct rpcrdma_ep *ep = id->context;
  200. might_sleep();
  201. switch (event->event) {
  202. case RDMA_CM_EVENT_ADDR_RESOLVED:
  203. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  204. ep->re_async_rc = 0;
  205. complete(&ep->re_done);
  206. return 0;
  207. case RDMA_CM_EVENT_ADDR_ERROR:
  208. ep->re_async_rc = -EPROTO;
  209. complete(&ep->re_done);
  210. return 0;
  211. case RDMA_CM_EVENT_ROUTE_ERROR:
  212. ep->re_async_rc = -ENETUNREACH;
  213. complete(&ep->re_done);
  214. return 0;
  215. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  216. pr_info("rpcrdma: removing device %s for %pISpc\n",
  217. ep->re_id->device->name, sap);
  218. fallthrough;
  219. case RDMA_CM_EVENT_ADDR_CHANGE:
  220. ep->re_connect_status = -ENODEV;
  221. goto disconnected;
  222. case RDMA_CM_EVENT_ESTABLISHED:
  223. rpcrdma_ep_get(ep);
  224. ep->re_connect_status = 1;
  225. rpcrdma_update_cm_private(ep, &event->param.conn);
  226. trace_xprtrdma_inline_thresh(ep);
  227. wake_up_all(&ep->re_connect_wait);
  228. break;
  229. case RDMA_CM_EVENT_CONNECT_ERROR:
  230. ep->re_connect_status = -ENOTCONN;
  231. goto wake_connect_worker;
  232. case RDMA_CM_EVENT_UNREACHABLE:
  233. ep->re_connect_status = -ENETUNREACH;
  234. goto wake_connect_worker;
  235. case RDMA_CM_EVENT_REJECTED:
  236. ep->re_connect_status = -ECONNREFUSED;
  237. if (event->status == IB_CM_REJ_STALE_CONN)
  238. ep->re_connect_status = -ENOTCONN;
  239. wake_connect_worker:
  240. wake_up_all(&ep->re_connect_wait);
  241. return 0;
  242. case RDMA_CM_EVENT_DISCONNECTED:
  243. ep->re_connect_status = -ECONNABORTED;
  244. disconnected:
  245. rpcrdma_force_disconnect(ep);
  246. return rpcrdma_ep_put(ep);
  247. default:
  248. break;
  249. }
  250. return 0;
  251. }
  252. static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
  253. struct rpcrdma_ep *ep)
  254. {
  255. unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
  256. struct rpc_xprt *xprt = &r_xprt->rx_xprt;
  257. struct rdma_cm_id *id;
  258. int rc;
  259. init_completion(&ep->re_done);
  260. id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep,
  261. RDMA_PS_TCP, IB_QPT_RC);
  262. if (IS_ERR(id))
  263. return id;
  264. ep->re_async_rc = -ETIMEDOUT;
  265. rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr,
  266. RDMA_RESOLVE_TIMEOUT);
  267. if (rc)
  268. goto out;
  269. rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout);
  270. if (rc < 0)
  271. goto out;
  272. rc = ep->re_async_rc;
  273. if (rc)
  274. goto out;
  275. ep->re_async_rc = -ETIMEDOUT;
  276. rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
  277. if (rc)
  278. goto out;
  279. rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout);
  280. if (rc < 0)
  281. goto out;
  282. rc = ep->re_async_rc;
  283. if (rc)
  284. goto out;
  285. return id;
  286. out:
  287. rdma_destroy_id(id);
  288. return ERR_PTR(rc);
  289. }
  290. static void rpcrdma_ep_destroy(struct kref *kref)
  291. {
  292. struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref);
  293. if (ep->re_id->qp) {
  294. rdma_destroy_qp(ep->re_id);
  295. ep->re_id->qp = NULL;
  296. }
  297. if (ep->re_attr.recv_cq)
  298. ib_free_cq(ep->re_attr.recv_cq);
  299. ep->re_attr.recv_cq = NULL;
  300. if (ep->re_attr.send_cq)
  301. ib_free_cq(ep->re_attr.send_cq);
  302. ep->re_attr.send_cq = NULL;
  303. if (ep->re_pd)
  304. ib_dealloc_pd(ep->re_pd);
  305. ep->re_pd = NULL;
  306. kfree(ep);
  307. module_put(THIS_MODULE);
  308. }
  309. static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep)
  310. {
  311. kref_get(&ep->re_kref);
  312. }
  313. /* Returns:
  314. * %0 if @ep still has a positive kref count, or
  315. * %1 if @ep was destroyed successfully.
  316. */
  317. static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep)
  318. {
  319. return kref_put(&ep->re_kref, rpcrdma_ep_destroy);
  320. }
  321. static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
  322. {
  323. struct rpcrdma_connect_private *pmsg;
  324. struct ib_device *device;
  325. struct rdma_cm_id *id;
  326. struct rpcrdma_ep *ep;
  327. int rc;
  328. ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS);
  329. if (!ep)
  330. return -ENOTCONN;
  331. ep->re_xprt = &r_xprt->rx_xprt;
  332. kref_init(&ep->re_kref);
  333. id = rpcrdma_create_id(r_xprt, ep);
  334. if (IS_ERR(id)) {
  335. kfree(ep);
  336. return PTR_ERR(id);
  337. }
  338. __module_get(THIS_MODULE);
  339. device = id->device;
  340. ep->re_id = id;
  341. reinit_completion(&ep->re_done);
  342. ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
  343. ep->re_inline_send = xprt_rdma_max_inline_write;
  344. ep->re_inline_recv = xprt_rdma_max_inline_read;
  345. rc = frwr_query_device(ep, device);
  346. if (rc)
  347. goto out_destroy;
  348. r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
  349. ep->re_attr.srq = NULL;
  350. ep->re_attr.cap.max_inline_data = 0;
  351. ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  352. ep->re_attr.qp_type = IB_QPT_RC;
  353. ep->re_attr.port_num = ~0;
  354. ep->re_send_batch = ep->re_max_requests >> 3;
  355. ep->re_send_count = ep->re_send_batch;
  356. init_waitqueue_head(&ep->re_connect_wait);
  357. ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt,
  358. ep->re_attr.cap.max_send_wr,
  359. IB_POLL_WORKQUEUE);
  360. if (IS_ERR(ep->re_attr.send_cq)) {
  361. rc = PTR_ERR(ep->re_attr.send_cq);
  362. ep->re_attr.send_cq = NULL;
  363. goto out_destroy;
  364. }
  365. ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt,
  366. ep->re_attr.cap.max_recv_wr,
  367. IB_POLL_WORKQUEUE);
  368. if (IS_ERR(ep->re_attr.recv_cq)) {
  369. rc = PTR_ERR(ep->re_attr.recv_cq);
  370. ep->re_attr.recv_cq = NULL;
  371. goto out_destroy;
  372. }
  373. ep->re_receive_count = 0;
  374. /* Initialize cma parameters */
  375. memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma));
  376. /* Prepare RDMA-CM private message */
  377. pmsg = &ep->re_cm_private;
  378. pmsg->cp_magic = rpcrdma_cmp_magic;
  379. pmsg->cp_version = RPCRDMA_CMP_VERSION;
  380. pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
  381. pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send);
  382. pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv);
  383. ep->re_remote_cma.private_data = pmsg;
  384. ep->re_remote_cma.private_data_len = sizeof(*pmsg);
  385. /* Client offers RDMA Read but does not initiate */
  386. ep->re_remote_cma.initiator_depth = 0;
  387. ep->re_remote_cma.responder_resources =
  388. min_t(int, U8_MAX, device->attrs.max_qp_rd_atom);
  389. /* Limit transport retries so client can detect server
  390. * GID changes quickly. RPC layer handles re-establishing
  391. * transport connection and retransmission.
  392. */
  393. ep->re_remote_cma.retry_count = 6;
  394. /* RPC-over-RDMA handles its own flow control. In addition,
  395. * make all RNR NAKs visible so we know that RPC-over-RDMA
  396. * flow control is working correctly (no NAKs should be seen).
  397. */
  398. ep->re_remote_cma.flow_control = 0;
  399. ep->re_remote_cma.rnr_retry_count = 0;
  400. ep->re_pd = ib_alloc_pd(device, 0);
  401. if (IS_ERR(ep->re_pd)) {
  402. rc = PTR_ERR(ep->re_pd);
  403. ep->re_pd = NULL;
  404. goto out_destroy;
  405. }
  406. rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr);
  407. if (rc)
  408. goto out_destroy;
  409. r_xprt->rx_ep = ep;
  410. return 0;
  411. out_destroy:
  412. rpcrdma_ep_put(ep);
  413. rdma_destroy_id(id);
  414. return rc;
  415. }
  416. /**
  417. * rpcrdma_xprt_connect - Connect an unconnected transport
  418. * @r_xprt: controlling transport instance
  419. *
  420. * Returns 0 on success or a negative errno.
  421. */
  422. int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
  423. {
  424. struct rpc_xprt *xprt = &r_xprt->rx_xprt;
  425. struct rpcrdma_ep *ep;
  426. int rc;
  427. rc = rpcrdma_ep_create(r_xprt);
  428. if (rc)
  429. return rc;
  430. ep = r_xprt->rx_ep;
  431. xprt_clear_connected(xprt);
  432. rpcrdma_reset_cwnd(r_xprt);
  433. /* Bump the ep's reference count while there are
  434. * outstanding Receives.
  435. */
  436. rpcrdma_ep_get(ep);
  437. rpcrdma_post_recvs(r_xprt, 1, true);
  438. rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
  439. if (rc)
  440. goto out;
  441. if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
  442. xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
  443. wait_event_interruptible(ep->re_connect_wait,
  444. ep->re_connect_status != 0);
  445. if (ep->re_connect_status <= 0) {
  446. rc = ep->re_connect_status;
  447. goto out;
  448. }
  449. rc = rpcrdma_sendctxs_create(r_xprt);
  450. if (rc) {
  451. rc = -ENOTCONN;
  452. goto out;
  453. }
  454. rc = rpcrdma_reqs_setup(r_xprt);
  455. if (rc) {
  456. rc = -ENOTCONN;
  457. goto out;
  458. }
  459. rpcrdma_mrs_create(r_xprt);
  460. frwr_wp_create(r_xprt);
  461. out:
  462. trace_xprtrdma_connect(r_xprt, rc);
  463. return rc;
  464. }
  465. /**
  466. * rpcrdma_xprt_disconnect - Disconnect underlying transport
  467. * @r_xprt: controlling transport instance
  468. *
  469. * Caller serializes. Either the transport send lock is held,
  470. * or we're being called to destroy the transport.
  471. *
  472. * On return, @r_xprt is completely divested of all hardware
  473. * resources and prepared for the next ->connect operation.
  474. */
  475. void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt)
  476. {
  477. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  478. struct rdma_cm_id *id;
  479. int rc;
  480. if (!ep)
  481. return;
  482. id = ep->re_id;
  483. rc = rdma_disconnect(id);
  484. trace_xprtrdma_disconnect(r_xprt, rc);
  485. rpcrdma_xprt_drain(r_xprt);
  486. rpcrdma_reps_unmap(r_xprt);
  487. rpcrdma_reqs_reset(r_xprt);
  488. rpcrdma_mrs_destroy(r_xprt);
  489. rpcrdma_sendctxs_destroy(r_xprt);
  490. if (rpcrdma_ep_put(ep))
  491. rdma_destroy_id(id);
  492. r_xprt->rx_ep = NULL;
  493. }
  494. /* Fixed-size circular FIFO queue. This implementation is wait-free and
  495. * lock-free.
  496. *
  497. * Consumer is the code path that posts Sends. This path dequeues a
  498. * sendctx for use by a Send operation. Multiple consumer threads
  499. * are serialized by the RPC transport lock, which allows only one
  500. * ->send_request call at a time.
  501. *
  502. * Producer is the code path that handles Send completions. This path
  503. * enqueues a sendctx that has been completed. Multiple producer
  504. * threads are serialized by the ib_poll_cq() function.
  505. */
  506. /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
  507. * queue activity, and rpcrdma_xprt_drain has flushed all remaining
  508. * Send requests.
  509. */
  510. static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt)
  511. {
  512. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  513. unsigned long i;
  514. if (!buf->rb_sc_ctxs)
  515. return;
  516. for (i = 0; i <= buf->rb_sc_last; i++)
  517. kfree(buf->rb_sc_ctxs[i]);
  518. kfree(buf->rb_sc_ctxs);
  519. buf->rb_sc_ctxs = NULL;
  520. }
  521. static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
  522. {
  523. struct rpcrdma_sendctx *sc;
  524. sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge),
  525. XPRTRDMA_GFP_FLAGS);
  526. if (!sc)
  527. return NULL;
  528. sc->sc_cqe.done = rpcrdma_wc_send;
  529. sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id;
  530. sc->sc_cid.ci_completion_id =
  531. atomic_inc_return(&ep->re_completion_ids);
  532. return sc;
  533. }
  534. static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
  535. {
  536. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  537. struct rpcrdma_sendctx *sc;
  538. unsigned long i;
  539. /* Maximum number of concurrent outstanding Send WRs. Capping
  540. * the circular queue size stops Send Queue overflow by causing
  541. * the ->send_request call to fail temporarily before too many
  542. * Sends are posted.
  543. */
  544. i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS;
  545. buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS);
  546. if (!buf->rb_sc_ctxs)
  547. return -ENOMEM;
  548. buf->rb_sc_last = i - 1;
  549. for (i = 0; i <= buf->rb_sc_last; i++) {
  550. sc = rpcrdma_sendctx_create(r_xprt->rx_ep);
  551. if (!sc)
  552. return -ENOMEM;
  553. buf->rb_sc_ctxs[i] = sc;
  554. }
  555. buf->rb_sc_head = 0;
  556. buf->rb_sc_tail = 0;
  557. return 0;
  558. }
  559. /* The sendctx queue is not guaranteed to have a size that is a
  560. * power of two, thus the helpers in circ_buf.h cannot be used.
  561. * The other option is to use modulus (%), which can be expensive.
  562. */
  563. static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
  564. unsigned long item)
  565. {
  566. return likely(item < buf->rb_sc_last) ? item + 1 : 0;
  567. }
  568. /**
  569. * rpcrdma_sendctx_get_locked - Acquire a send context
  570. * @r_xprt: controlling transport instance
  571. *
  572. * Returns pointer to a free send completion context; or NULL if
  573. * the queue is empty.
  574. *
  575. * Usage: Called to acquire an SGE array before preparing a Send WR.
  576. *
  577. * The caller serializes calls to this function (per transport), and
  578. * provides an effective memory barrier that flushes the new value
  579. * of rb_sc_head.
  580. */
  581. struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
  582. {
  583. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  584. struct rpcrdma_sendctx *sc;
  585. unsigned long next_head;
  586. next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
  587. if (next_head == READ_ONCE(buf->rb_sc_tail))
  588. goto out_emptyq;
  589. /* ORDER: item must be accessed _before_ head is updated */
  590. sc = buf->rb_sc_ctxs[next_head];
  591. /* Releasing the lock in the caller acts as a memory
  592. * barrier that flushes rb_sc_head.
  593. */
  594. buf->rb_sc_head = next_head;
  595. return sc;
  596. out_emptyq:
  597. /* The queue is "empty" if there have not been enough Send
  598. * completions recently. This is a sign the Send Queue is
  599. * backing up. Cause the caller to pause and try again.
  600. */
  601. xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
  602. r_xprt->rx_stats.empty_sendctx_q++;
  603. return NULL;
  604. }
  605. /**
  606. * rpcrdma_sendctx_put_locked - Release a send context
  607. * @r_xprt: controlling transport instance
  608. * @sc: send context to release
  609. *
  610. * Usage: Called from Send completion to return a sendctxt
  611. * to the queue.
  612. *
  613. * The caller serializes calls to this function (per transport).
  614. */
  615. static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
  616. struct rpcrdma_sendctx *sc)
  617. {
  618. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  619. unsigned long next_tail;
  620. /* Unmap SGEs of previously completed but unsignaled
  621. * Sends by walking up the queue until @sc is found.
  622. */
  623. next_tail = buf->rb_sc_tail;
  624. do {
  625. next_tail = rpcrdma_sendctx_next(buf, next_tail);
  626. /* ORDER: item must be accessed _before_ tail is updated */
  627. rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
  628. } while (buf->rb_sc_ctxs[next_tail] != sc);
  629. /* Paired with READ_ONCE */
  630. smp_store_release(&buf->rb_sc_tail, next_tail);
  631. xprt_write_space(&r_xprt->rx_xprt);
  632. }
  633. static void
  634. rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
  635. {
  636. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  637. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  638. struct ib_device *device = ep->re_id->device;
  639. unsigned int count;
  640. /* Try to allocate enough to perform one full-sized I/O */
  641. for (count = 0; count < ep->re_max_rdma_segs; count++) {
  642. struct rpcrdma_mr *mr;
  643. int rc;
  644. mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS,
  645. ibdev_to_node(device));
  646. if (!mr)
  647. break;
  648. rc = frwr_mr_init(r_xprt, mr);
  649. if (rc) {
  650. kfree(mr);
  651. break;
  652. }
  653. spin_lock(&buf->rb_lock);
  654. rpcrdma_mr_push(mr, &buf->rb_mrs);
  655. list_add(&mr->mr_all, &buf->rb_all_mrs);
  656. spin_unlock(&buf->rb_lock);
  657. }
  658. r_xprt->rx_stats.mrs_allocated += count;
  659. trace_xprtrdma_createmrs(r_xprt, count);
  660. }
  661. static void
  662. rpcrdma_mr_refresh_worker(struct work_struct *work)
  663. {
  664. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  665. rb_refresh_worker);
  666. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  667. rx_buf);
  668. rpcrdma_mrs_create(r_xprt);
  669. xprt_write_space(&r_xprt->rx_xprt);
  670. }
  671. /**
  672. * rpcrdma_mrs_refresh - Wake the MR refresh worker
  673. * @r_xprt: controlling transport instance
  674. *
  675. */
  676. void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt)
  677. {
  678. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  679. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  680. /* If there is no underlying connection, it's no use
  681. * to wake the refresh worker.
  682. */
  683. if (ep->re_connect_status != 1)
  684. return;
  685. queue_work(system_highpri_wq, &buf->rb_refresh_worker);
  686. }
  687. /**
  688. * rpcrdma_req_create - Allocate an rpcrdma_req object
  689. * @r_xprt: controlling r_xprt
  690. * @size: initial size, in bytes, of send and receive buffers
  691. *
  692. * Returns an allocated and fully initialized rpcrdma_req or NULL.
  693. */
  694. struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt,
  695. size_t size)
  696. {
  697. struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
  698. struct rpcrdma_req *req;
  699. req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS);
  700. if (req == NULL)
  701. goto out1;
  702. req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE);
  703. if (!req->rl_sendbuf)
  704. goto out2;
  705. req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE);
  706. if (!req->rl_recvbuf)
  707. goto out3;
  708. INIT_LIST_HEAD(&req->rl_free_mrs);
  709. INIT_LIST_HEAD(&req->rl_registered);
  710. spin_lock(&buffer->rb_lock);
  711. list_add(&req->rl_all, &buffer->rb_allreqs);
  712. spin_unlock(&buffer->rb_lock);
  713. return req;
  714. out3:
  715. rpcrdma_regbuf_free(req->rl_sendbuf);
  716. out2:
  717. kfree(req);
  718. out1:
  719. return NULL;
  720. }
  721. /**
  722. * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object
  723. * @r_xprt: controlling transport instance
  724. * @req: rpcrdma_req object to set up
  725. *
  726. * Returns zero on success, and a negative errno on failure.
  727. */
  728. int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  729. {
  730. struct rpcrdma_regbuf *rb;
  731. size_t maxhdrsize;
  732. /* Compute maximum header buffer size in bytes */
  733. maxhdrsize = rpcrdma_fixed_maxsz + 3 +
  734. r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz;
  735. maxhdrsize *= sizeof(__be32);
  736. rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
  737. DMA_TO_DEVICE);
  738. if (!rb)
  739. goto out;
  740. if (!__rpcrdma_regbuf_dma_map(r_xprt, rb))
  741. goto out_free;
  742. req->rl_rdmabuf = rb;
  743. xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
  744. return 0;
  745. out_free:
  746. rpcrdma_regbuf_free(rb);
  747. out:
  748. return -ENOMEM;
  749. }
  750. /* ASSUMPTION: the rb_allreqs list is stable for the duration,
  751. * and thus can be walked without holding rb_lock. Eg. the
  752. * caller is holding the transport send lock to exclude
  753. * device removal or disconnection.
  754. */
  755. static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
  756. {
  757. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  758. struct rpcrdma_req *req;
  759. int rc;
  760. list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
  761. rc = rpcrdma_req_setup(r_xprt, req);
  762. if (rc)
  763. return rc;
  764. }
  765. return 0;
  766. }
  767. static void rpcrdma_req_reset(struct rpcrdma_req *req)
  768. {
  769. /* Credits are valid for only one connection */
  770. req->rl_slot.rq_cong = 0;
  771. rpcrdma_regbuf_free(req->rl_rdmabuf);
  772. req->rl_rdmabuf = NULL;
  773. rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
  774. rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
  775. frwr_reset(req);
  776. }
  777. /* ASSUMPTION: the rb_allreqs list is stable for the duration,
  778. * and thus can be walked without holding rb_lock. Eg. the
  779. * caller is holding the transport send lock to exclude
  780. * device removal or disconnection.
  781. */
  782. static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
  783. {
  784. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  785. struct rpcrdma_req *req;
  786. list_for_each_entry(req, &buf->rb_allreqs, rl_all)
  787. rpcrdma_req_reset(req);
  788. }
  789. static noinline
  790. struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
  791. bool temp)
  792. {
  793. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  794. struct rpcrdma_rep *rep;
  795. rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
  796. if (rep == NULL)
  797. goto out;
  798. rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
  799. DMA_FROM_DEVICE);
  800. if (!rep->rr_rdmabuf)
  801. goto out_free;
  802. rep->rr_cid.ci_completion_id =
  803. atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
  804. xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
  805. rdmab_length(rep->rr_rdmabuf));
  806. rep->rr_cqe.done = rpcrdma_wc_receive;
  807. rep->rr_rxprt = r_xprt;
  808. rep->rr_recv_wr.next = NULL;
  809. rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
  810. rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
  811. rep->rr_recv_wr.num_sge = 1;
  812. rep->rr_temp = temp;
  813. spin_lock(&buf->rb_lock);
  814. list_add(&rep->rr_all, &buf->rb_all_reps);
  815. spin_unlock(&buf->rb_lock);
  816. return rep;
  817. out_free:
  818. kfree(rep);
  819. out:
  820. return NULL;
  821. }
  822. static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
  823. {
  824. rpcrdma_regbuf_free(rep->rr_rdmabuf);
  825. kfree(rep);
  826. }
  827. static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
  828. {
  829. struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
  830. spin_lock(&buf->rb_lock);
  831. list_del(&rep->rr_all);
  832. spin_unlock(&buf->rb_lock);
  833. rpcrdma_rep_free(rep);
  834. }
  835. static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
  836. {
  837. struct llist_node *node;
  838. /* Calls to llist_del_first are required to be serialized */
  839. node = llist_del_first(&buf->rb_free_reps);
  840. if (!node)
  841. return NULL;
  842. return llist_entry(node, struct rpcrdma_rep, rr_node);
  843. }
  844. /**
  845. * rpcrdma_rep_put - Release rpcrdma_rep back to free list
  846. * @buf: buffer pool
  847. * @rep: rep to release
  848. *
  849. */
  850. void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
  851. {
  852. llist_add(&rep->rr_node, &buf->rb_free_reps);
  853. }
  854. /* Caller must ensure the QP is quiescent (RQ is drained) before
  855. * invoking this function, to guarantee rb_all_reps is not
  856. * changing.
  857. */
  858. static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
  859. {
  860. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  861. struct rpcrdma_rep *rep;
  862. list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
  863. rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
  864. rep->rr_temp = true; /* Mark this rep for destruction */
  865. }
  866. }
  867. static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
  868. {
  869. struct rpcrdma_rep *rep;
  870. spin_lock(&buf->rb_lock);
  871. while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
  872. struct rpcrdma_rep,
  873. rr_all)) != NULL) {
  874. list_del(&rep->rr_all);
  875. spin_unlock(&buf->rb_lock);
  876. rpcrdma_rep_free(rep);
  877. spin_lock(&buf->rb_lock);
  878. }
  879. spin_unlock(&buf->rb_lock);
  880. }
  881. /**
  882. * rpcrdma_buffer_create - Create initial set of req/rep objects
  883. * @r_xprt: transport instance to (re)initialize
  884. *
  885. * Returns zero on success, otherwise a negative errno.
  886. */
  887. int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
  888. {
  889. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  890. int i, rc;
  891. buf->rb_bc_srv_max_requests = 0;
  892. spin_lock_init(&buf->rb_lock);
  893. INIT_LIST_HEAD(&buf->rb_mrs);
  894. INIT_LIST_HEAD(&buf->rb_all_mrs);
  895. INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
  896. INIT_LIST_HEAD(&buf->rb_send_bufs);
  897. INIT_LIST_HEAD(&buf->rb_allreqs);
  898. INIT_LIST_HEAD(&buf->rb_all_reps);
  899. rc = -ENOMEM;
  900. for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) {
  901. struct rpcrdma_req *req;
  902. req = rpcrdma_req_create(r_xprt,
  903. RPCRDMA_V1_DEF_INLINE_SIZE * 2);
  904. if (!req)
  905. goto out;
  906. list_add(&req->rl_list, &buf->rb_send_bufs);
  907. }
  908. init_llist_head(&buf->rb_free_reps);
  909. return 0;
  910. out:
  911. rpcrdma_buffer_destroy(buf);
  912. return rc;
  913. }
  914. /**
  915. * rpcrdma_req_destroy - Destroy an rpcrdma_req object
  916. * @req: unused object to be destroyed
  917. *
  918. * Relies on caller holding the transport send lock to protect
  919. * removing req->rl_all from buf->rb_all_reqs safely.
  920. */
  921. void rpcrdma_req_destroy(struct rpcrdma_req *req)
  922. {
  923. struct rpcrdma_mr *mr;
  924. list_del(&req->rl_all);
  925. while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) {
  926. struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
  927. spin_lock(&buf->rb_lock);
  928. list_del(&mr->mr_all);
  929. spin_unlock(&buf->rb_lock);
  930. frwr_mr_release(mr);
  931. }
  932. rpcrdma_regbuf_free(req->rl_recvbuf);
  933. rpcrdma_regbuf_free(req->rl_sendbuf);
  934. rpcrdma_regbuf_free(req->rl_rdmabuf);
  935. kfree(req);
  936. }
  937. /**
  938. * rpcrdma_mrs_destroy - Release all of a transport's MRs
  939. * @r_xprt: controlling transport instance
  940. *
  941. * Relies on caller holding the transport send lock to protect
  942. * removing mr->mr_list from req->rl_free_mrs safely.
  943. */
  944. static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
  945. {
  946. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  947. struct rpcrdma_mr *mr;
  948. cancel_work_sync(&buf->rb_refresh_worker);
  949. spin_lock(&buf->rb_lock);
  950. while ((mr = list_first_entry_or_null(&buf->rb_all_mrs,
  951. struct rpcrdma_mr,
  952. mr_all)) != NULL) {
  953. list_del(&mr->mr_list);
  954. list_del(&mr->mr_all);
  955. spin_unlock(&buf->rb_lock);
  956. frwr_mr_release(mr);
  957. spin_lock(&buf->rb_lock);
  958. }
  959. spin_unlock(&buf->rb_lock);
  960. }
  961. /**
  962. * rpcrdma_buffer_destroy - Release all hw resources
  963. * @buf: root control block for resources
  964. *
  965. * ORDERING: relies on a prior rpcrdma_xprt_drain :
  966. * - No more Send or Receive completions can occur
  967. * - All MRs, reps, and reqs are returned to their free lists
  968. */
  969. void
  970. rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  971. {
  972. rpcrdma_reps_destroy(buf);
  973. while (!list_empty(&buf->rb_send_bufs)) {
  974. struct rpcrdma_req *req;
  975. req = list_first_entry(&buf->rb_send_bufs,
  976. struct rpcrdma_req, rl_list);
  977. list_del(&req->rl_list);
  978. rpcrdma_req_destroy(req);
  979. }
  980. }
  981. /**
  982. * rpcrdma_mr_get - Allocate an rpcrdma_mr object
  983. * @r_xprt: controlling transport
  984. *
  985. * Returns an initialized rpcrdma_mr or NULL if no free
  986. * rpcrdma_mr objects are available.
  987. */
  988. struct rpcrdma_mr *
  989. rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
  990. {
  991. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  992. struct rpcrdma_mr *mr;
  993. spin_lock(&buf->rb_lock);
  994. mr = rpcrdma_mr_pop(&buf->rb_mrs);
  995. spin_unlock(&buf->rb_lock);
  996. return mr;
  997. }
  998. /**
  999. * rpcrdma_reply_put - Put reply buffers back into pool
  1000. * @buffers: buffer pool
  1001. * @req: object to return
  1002. *
  1003. */
  1004. void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
  1005. {
  1006. if (req->rl_reply) {
  1007. rpcrdma_rep_put(buffers, req->rl_reply);
  1008. req->rl_reply = NULL;
  1009. }
  1010. }
  1011. /**
  1012. * rpcrdma_buffer_get - Get a request buffer
  1013. * @buffers: Buffer pool from which to obtain a buffer
  1014. *
  1015. * Returns a fresh rpcrdma_req, or NULL if none are available.
  1016. */
  1017. struct rpcrdma_req *
  1018. rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  1019. {
  1020. struct rpcrdma_req *req;
  1021. spin_lock(&buffers->rb_lock);
  1022. req = list_first_entry_or_null(&buffers->rb_send_bufs,
  1023. struct rpcrdma_req, rl_list);
  1024. if (req)
  1025. list_del_init(&req->rl_list);
  1026. spin_unlock(&buffers->rb_lock);
  1027. return req;
  1028. }
  1029. /**
  1030. * rpcrdma_buffer_put - Put request/reply buffers back into pool
  1031. * @buffers: buffer pool
  1032. * @req: object to return
  1033. *
  1034. */
  1035. void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
  1036. {
  1037. rpcrdma_reply_put(buffers, req);
  1038. spin_lock(&buffers->rb_lock);
  1039. list_add(&req->rl_list, &buffers->rb_send_bufs);
  1040. spin_unlock(&buffers->rb_lock);
  1041. }
  1042. /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
  1043. *
  1044. * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  1045. * receiving the payload of RDMA RECV operations. During Long Calls
  1046. * or Replies they may be registered externally via frwr_map.
  1047. */
  1048. static struct rpcrdma_regbuf *
  1049. rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
  1050. {
  1051. struct rpcrdma_regbuf *rb;
  1052. rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
  1053. if (!rb)
  1054. return NULL;
  1055. rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
  1056. if (!rb->rg_data) {
  1057. kfree(rb);
  1058. return NULL;
  1059. }
  1060. rb->rg_device = NULL;
  1061. rb->rg_direction = direction;
  1062. rb->rg_iov.length = size;
  1063. return rb;
  1064. }
  1065. /**
  1066. * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
  1067. * @rb: regbuf to reallocate
  1068. * @size: size of buffer to be allocated, in bytes
  1069. * @flags: GFP flags
  1070. *
  1071. * Returns true if reallocation was successful. If false is
  1072. * returned, @rb is left untouched.
  1073. */
  1074. bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
  1075. {
  1076. void *buf;
  1077. buf = kmalloc(size, flags);
  1078. if (!buf)
  1079. return false;
  1080. rpcrdma_regbuf_dma_unmap(rb);
  1081. kfree(rb->rg_data);
  1082. rb->rg_data = buf;
  1083. rb->rg_iov.length = size;
  1084. return true;
  1085. }
  1086. /**
  1087. * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
  1088. * @r_xprt: controlling transport instance
  1089. * @rb: regbuf to be mapped
  1090. *
  1091. * Returns true if the buffer is now DMA mapped to @r_xprt's device
  1092. */
  1093. bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
  1094. struct rpcrdma_regbuf *rb)
  1095. {
  1096. struct ib_device *device = r_xprt->rx_ep->re_id->device;
  1097. if (rb->rg_direction == DMA_NONE)
  1098. return false;
  1099. rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
  1100. rdmab_length(rb), rb->rg_direction);
  1101. if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
  1102. trace_xprtrdma_dma_maperr(rdmab_addr(rb));
  1103. return false;
  1104. }
  1105. rb->rg_device = device;
  1106. rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey;
  1107. return true;
  1108. }
  1109. static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
  1110. {
  1111. if (!rb)
  1112. return;
  1113. if (!rpcrdma_regbuf_is_mapped(rb))
  1114. return;
  1115. ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
  1116. rb->rg_direction);
  1117. rb->rg_device = NULL;
  1118. }
  1119. static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
  1120. {
  1121. rpcrdma_regbuf_dma_unmap(rb);
  1122. if (rb)
  1123. kfree(rb->rg_data);
  1124. kfree(rb);
  1125. }
  1126. /**
  1127. * rpcrdma_post_recvs - Refill the Receive Queue
  1128. * @r_xprt: controlling transport instance
  1129. * @needed: current credit grant
  1130. * @temp: mark Receive buffers to be deleted after one use
  1131. *
  1132. */
  1133. void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
  1134. {
  1135. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  1136. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  1137. struct ib_recv_wr *wr, *bad_wr;
  1138. struct rpcrdma_rep *rep;
  1139. int count, rc;
  1140. rc = 0;
  1141. count = 0;
  1142. if (likely(ep->re_receive_count > needed))
  1143. goto out;
  1144. needed -= ep->re_receive_count;
  1145. if (!temp)
  1146. needed += RPCRDMA_MAX_RECV_BATCH;
  1147. if (atomic_inc_return(&ep->re_receiving) > 1)
  1148. goto out;
  1149. /* fast path: all needed reps can be found on the free list */
  1150. wr = NULL;
  1151. while (needed) {
  1152. rep = rpcrdma_rep_get_locked(buf);
  1153. if (rep && rep->rr_temp) {
  1154. rpcrdma_rep_destroy(rep);
  1155. continue;
  1156. }
  1157. if (!rep)
  1158. rep = rpcrdma_rep_create(r_xprt, temp);
  1159. if (!rep)
  1160. break;
  1161. if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
  1162. rpcrdma_rep_put(buf, rep);
  1163. break;
  1164. }
  1165. rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
  1166. trace_xprtrdma_post_recv(rep);
  1167. rep->rr_recv_wr.next = wr;
  1168. wr = &rep->rr_recv_wr;
  1169. --needed;
  1170. ++count;
  1171. }
  1172. if (!wr)
  1173. goto out;
  1174. rc = ib_post_recv(ep->re_id->qp, wr,
  1175. (const struct ib_recv_wr **)&bad_wr);
  1176. if (rc) {
  1177. trace_xprtrdma_post_recvs_err(r_xprt, rc);
  1178. for (wr = bad_wr; wr;) {
  1179. struct rpcrdma_rep *rep;
  1180. rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
  1181. wr = wr->next;
  1182. rpcrdma_rep_put(buf, rep);
  1183. --count;
  1184. }
  1185. }
  1186. if (atomic_dec_return(&ep->re_receiving) > 0)
  1187. complete(&ep->re_done);
  1188. out:
  1189. trace_xprtrdma_post_recvs(r_xprt, count);
  1190. ep->re_receive_count += count;
  1191. return;
  1192. }