transport.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2. /*
  3. * Copyright (c) 2014-2017 Oracle. All rights reserved.
  4. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  5. *
  6. * This software is available to you under a choice of one of two
  7. * licenses. You may choose to be licensed under the terms of the GNU
  8. * General Public License (GPL) Version 2, available from the file
  9. * COPYING in the main directory of this source tree, or the BSD-type
  10. * license below:
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. *
  16. * Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * Neither the name of the Network Appliance, Inc. nor the names of
  25. * its contributors may be used to endorse or promote products
  26. * derived from this software without specific prior written
  27. * permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40. */
  41. /*
  42. * transport.c
  43. *
  44. * This file contains the top-level implementation of an RPC RDMA
  45. * transport.
  46. *
  47. * Naming convention: functions beginning with xprt_ are part of the
  48. * transport switch. All others are RPC RDMA internal.
  49. */
  50. #include <linux/module.h>
  51. #include <linux/slab.h>
  52. #include <linux/seq_file.h>
  53. #include <linux/smp.h>
  54. #include <linux/sunrpc/addr.h>
  55. #include <linux/sunrpc/svc_rdma.h>
  56. #include "xprt_rdma.h"
  57. #include <trace/events/rpcrdma.h>
  58. /*
  59. * tunables
  60. */
  61. static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
  62. unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
  63. unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
  64. unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
  65. int xprt_rdma_pad_optimize;
  66. static struct xprt_class xprt_rdma;
  67. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  68. static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
  69. static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
  70. static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
  71. static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
  72. static unsigned int max_padding = PAGE_SIZE;
  73. static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
  74. static unsigned int max_memreg = RPCRDMA_LAST - 1;
  75. static unsigned int dummy;
  76. static struct ctl_table_header *sunrpc_table_header;
  77. static struct ctl_table xr_tunables_table[] = {
  78. {
  79. .procname = "rdma_slot_table_entries",
  80. .data = &xprt_rdma_slot_table_entries,
  81. .maxlen = sizeof(unsigned int),
  82. .mode = 0644,
  83. .proc_handler = proc_dointvec_minmax,
  84. .extra1 = &min_slot_table_size,
  85. .extra2 = &max_slot_table_size
  86. },
  87. {
  88. .procname = "rdma_max_inline_read",
  89. .data = &xprt_rdma_max_inline_read,
  90. .maxlen = sizeof(unsigned int),
  91. .mode = 0644,
  92. .proc_handler = proc_dointvec_minmax,
  93. .extra1 = &min_inline_size,
  94. .extra2 = &max_inline_size,
  95. },
  96. {
  97. .procname = "rdma_max_inline_write",
  98. .data = &xprt_rdma_max_inline_write,
  99. .maxlen = sizeof(unsigned int),
  100. .mode = 0644,
  101. .proc_handler = proc_dointvec_minmax,
  102. .extra1 = &min_inline_size,
  103. .extra2 = &max_inline_size,
  104. },
  105. {
  106. .procname = "rdma_inline_write_padding",
  107. .data = &dummy,
  108. .maxlen = sizeof(unsigned int),
  109. .mode = 0644,
  110. .proc_handler = proc_dointvec_minmax,
  111. .extra1 = SYSCTL_ZERO,
  112. .extra2 = &max_padding,
  113. },
  114. {
  115. .procname = "rdma_memreg_strategy",
  116. .data = &xprt_rdma_memreg_strategy,
  117. .maxlen = sizeof(unsigned int),
  118. .mode = 0644,
  119. .proc_handler = proc_dointvec_minmax,
  120. .extra1 = &min_memreg,
  121. .extra2 = &max_memreg,
  122. },
  123. {
  124. .procname = "rdma_pad_optimize",
  125. .data = &xprt_rdma_pad_optimize,
  126. .maxlen = sizeof(unsigned int),
  127. .mode = 0644,
  128. .proc_handler = proc_dointvec,
  129. },
  130. { },
  131. };
  132. static struct ctl_table sunrpc_table[] = {
  133. {
  134. .procname = "sunrpc",
  135. .mode = 0555,
  136. .child = xr_tunables_table
  137. },
  138. { },
  139. };
  140. #endif
  141. static const struct rpc_xprt_ops xprt_rdma_procs;
  142. static void
  143. xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
  144. {
  145. struct sockaddr_in *sin = (struct sockaddr_in *)sap;
  146. char buf[20];
  147. snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
  148. xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
  149. xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA;
  150. }
  151. static void
  152. xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
  153. {
  154. struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
  155. char buf[40];
  156. snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
  157. xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
  158. xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
  159. }
  160. void
  161. xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
  162. {
  163. char buf[128];
  164. switch (sap->sa_family) {
  165. case AF_INET:
  166. xprt_rdma_format_addresses4(xprt, sap);
  167. break;
  168. case AF_INET6:
  169. xprt_rdma_format_addresses6(xprt, sap);
  170. break;
  171. default:
  172. pr_err("rpcrdma: Unrecognized address family\n");
  173. return;
  174. }
  175. (void)rpc_ntop(sap, buf, sizeof(buf));
  176. xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
  177. snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
  178. xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
  179. snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
  180. xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
  181. xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
  182. }
  183. void
  184. xprt_rdma_free_addresses(struct rpc_xprt *xprt)
  185. {
  186. unsigned int i;
  187. for (i = 0; i < RPC_DISPLAY_MAX; i++)
  188. switch (i) {
  189. case RPC_DISPLAY_PROTO:
  190. case RPC_DISPLAY_NETID:
  191. continue;
  192. default:
  193. kfree(xprt->address_strings[i]);
  194. }
  195. }
  196. /**
  197. * xprt_rdma_connect_worker - establish connection in the background
  198. * @work: worker thread context
  199. *
  200. * Requester holds the xprt's send lock to prevent activity on this
  201. * transport while a fresh connection is being established. RPC tasks
  202. * sleep on the xprt's pending queue waiting for connect to complete.
  203. */
  204. static void
  205. xprt_rdma_connect_worker(struct work_struct *work)
  206. {
  207. struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
  208. rx_connect_worker.work);
  209. struct rpc_xprt *xprt = &r_xprt->rx_xprt;
  210. unsigned int pflags = current->flags;
  211. int rc;
  212. if (atomic_read(&xprt->swapper))
  213. current->flags |= PF_MEMALLOC;
  214. rc = rpcrdma_xprt_connect(r_xprt);
  215. xprt_clear_connecting(xprt);
  216. if (!rc) {
  217. xprt->connect_cookie++;
  218. xprt->stat.connect_count++;
  219. xprt->stat.connect_time += (long)jiffies -
  220. xprt->stat.connect_start;
  221. xprt_set_connected(xprt);
  222. rc = -EAGAIN;
  223. } else
  224. rpcrdma_xprt_disconnect(r_xprt);
  225. xprt_unlock_connect(xprt, r_xprt);
  226. xprt_wake_pending_tasks(xprt, rc);
  227. current_restore_flags(pflags, PF_MEMALLOC);
  228. }
  229. /**
  230. * xprt_rdma_inject_disconnect - inject a connection fault
  231. * @xprt: transport context
  232. *
  233. * If @xprt is connected, disconnect it to simulate spurious
  234. * connection loss. Caller must hold @xprt's send lock to
  235. * ensure that data structures and hardware resources are
  236. * stable during the rdma_disconnect() call.
  237. */
  238. static void
  239. xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
  240. {
  241. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  242. trace_xprtrdma_op_inject_dsc(r_xprt);
  243. rdma_disconnect(r_xprt->rx_ep->re_id);
  244. }
  245. /**
  246. * xprt_rdma_destroy - Full tear down of transport
  247. * @xprt: doomed transport context
  248. *
  249. * Caller guarantees there will be no more calls to us with
  250. * this @xprt.
  251. */
  252. static void
  253. xprt_rdma_destroy(struct rpc_xprt *xprt)
  254. {
  255. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  256. cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
  257. rpcrdma_xprt_disconnect(r_xprt);
  258. rpcrdma_buffer_destroy(&r_xprt->rx_buf);
  259. xprt_rdma_free_addresses(xprt);
  260. xprt_free(xprt);
  261. module_put(THIS_MODULE);
  262. }
  263. /* 60 second timeout, no retries */
  264. static const struct rpc_timeout xprt_rdma_default_timeout = {
  265. .to_initval = 60 * HZ,
  266. .to_maxval = 60 * HZ,
  267. };
  268. /**
  269. * xprt_setup_rdma - Set up transport to use RDMA
  270. *
  271. * @args: rpc transport arguments
  272. */
  273. static struct rpc_xprt *
  274. xprt_setup_rdma(struct xprt_create *args)
  275. {
  276. struct rpc_xprt *xprt;
  277. struct rpcrdma_xprt *new_xprt;
  278. struct sockaddr *sap;
  279. int rc;
  280. if (args->addrlen > sizeof(xprt->addr))
  281. return ERR_PTR(-EBADF);
  282. if (!try_module_get(THIS_MODULE))
  283. return ERR_PTR(-EIO);
  284. xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0,
  285. xprt_rdma_slot_table_entries);
  286. if (!xprt) {
  287. module_put(THIS_MODULE);
  288. return ERR_PTR(-ENOMEM);
  289. }
  290. xprt->timeout = &xprt_rdma_default_timeout;
  291. xprt->connect_timeout = xprt->timeout->to_initval;
  292. xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
  293. xprt->bind_timeout = RPCRDMA_BIND_TO;
  294. xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
  295. xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
  296. xprt->resvport = 0; /* privileged port not needed */
  297. xprt->ops = &xprt_rdma_procs;
  298. /*
  299. * Set up RDMA-specific connect data.
  300. */
  301. sap = args->dstaddr;
  302. /* Ensure xprt->addr holds valid server TCP (not RDMA)
  303. * address, for any side protocols which peek at it */
  304. xprt->prot = IPPROTO_TCP;
  305. xprt->xprt_class = &xprt_rdma;
  306. xprt->addrlen = args->addrlen;
  307. memcpy(&xprt->addr, sap, xprt->addrlen);
  308. if (rpc_get_port(sap))
  309. xprt_set_bound(xprt);
  310. xprt_rdma_format_addresses(xprt, sap);
  311. new_xprt = rpcx_to_rdmax(xprt);
  312. rc = rpcrdma_buffer_create(new_xprt);
  313. if (rc) {
  314. xprt_rdma_free_addresses(xprt);
  315. xprt_free(xprt);
  316. module_put(THIS_MODULE);
  317. return ERR_PTR(rc);
  318. }
  319. INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
  320. xprt_rdma_connect_worker);
  321. xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
  322. return xprt;
  323. }
  324. /**
  325. * xprt_rdma_close - close a transport connection
  326. * @xprt: transport context
  327. *
  328. * Called during autoclose or device removal.
  329. *
  330. * Caller holds @xprt's send lock to prevent activity on this
  331. * transport while the connection is torn down.
  332. */
  333. void xprt_rdma_close(struct rpc_xprt *xprt)
  334. {
  335. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  336. rpcrdma_xprt_disconnect(r_xprt);
  337. xprt->reestablish_timeout = 0;
  338. ++xprt->connect_cookie;
  339. xprt_disconnect_done(xprt);
  340. }
  341. /**
  342. * xprt_rdma_set_port - update server port with rpcbind result
  343. * @xprt: controlling RPC transport
  344. * @port: new port value
  345. *
  346. * Transport connect status is unchanged.
  347. */
  348. static void
  349. xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
  350. {
  351. struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
  352. char buf[8];
  353. rpc_set_port(sap, port);
  354. kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
  355. snprintf(buf, sizeof(buf), "%u", port);
  356. xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
  357. kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
  358. snprintf(buf, sizeof(buf), "%4hx", port);
  359. xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
  360. }
  361. /**
  362. * xprt_rdma_timer - invoked when an RPC times out
  363. * @xprt: controlling RPC transport
  364. * @task: RPC task that timed out
  365. *
  366. * Invoked when the transport is still connected, but an RPC
  367. * retransmit timeout occurs.
  368. *
  369. * Since RDMA connections don't have a keep-alive, forcibly
  370. * disconnect and retry to connect. This drives full
  371. * detection of the network path, and retransmissions of
  372. * all pending RPCs.
  373. */
  374. static void
  375. xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
  376. {
  377. xprt_force_disconnect(xprt);
  378. }
  379. /**
  380. * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection
  381. * @xprt: controlling transport instance
  382. * @connect_timeout: reconnect timeout after client disconnects
  383. * @reconnect_timeout: reconnect timeout after server disconnects
  384. *
  385. */
  386. static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt,
  387. unsigned long connect_timeout,
  388. unsigned long reconnect_timeout)
  389. {
  390. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  391. trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout);
  392. spin_lock(&xprt->transport_lock);
  393. if (connect_timeout < xprt->connect_timeout) {
  394. struct rpc_timeout to;
  395. unsigned long initval;
  396. to = *xprt->timeout;
  397. initval = connect_timeout;
  398. if (initval < RPCRDMA_INIT_REEST_TO << 1)
  399. initval = RPCRDMA_INIT_REEST_TO << 1;
  400. to.to_initval = initval;
  401. to.to_maxval = initval;
  402. r_xprt->rx_timeout = to;
  403. xprt->timeout = &r_xprt->rx_timeout;
  404. xprt->connect_timeout = connect_timeout;
  405. }
  406. if (reconnect_timeout < xprt->max_reconnect_timeout)
  407. xprt->max_reconnect_timeout = reconnect_timeout;
  408. spin_unlock(&xprt->transport_lock);
  409. }
  410. /**
  411. * xprt_rdma_connect - schedule an attempt to reconnect
  412. * @xprt: transport state
  413. * @task: RPC scheduler context (unused)
  414. *
  415. */
  416. static void
  417. xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
  418. {
  419. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  420. struct rpcrdma_ep *ep = r_xprt->rx_ep;
  421. unsigned long delay;
  422. WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
  423. delay = 0;
  424. if (ep && ep->re_connect_status != 0) {
  425. delay = xprt_reconnect_delay(xprt);
  426. xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
  427. }
  428. trace_xprtrdma_op_connect(r_xprt, delay);
  429. queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay);
  430. }
  431. /**
  432. * xprt_rdma_alloc_slot - allocate an rpc_rqst
  433. * @xprt: controlling RPC transport
  434. * @task: RPC task requesting a fresh rpc_rqst
  435. *
  436. * tk_status values:
  437. * %0 if task->tk_rqstp points to a fresh rpc_rqst
  438. * %-EAGAIN if no rpc_rqst is available; queued on backlog
  439. */
  440. static void
  441. xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
  442. {
  443. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  444. struct rpcrdma_req *req;
  445. req = rpcrdma_buffer_get(&r_xprt->rx_buf);
  446. if (!req)
  447. goto out_sleep;
  448. task->tk_rqstp = &req->rl_slot;
  449. task->tk_status = 0;
  450. return;
  451. out_sleep:
  452. task->tk_status = -ENOMEM;
  453. xprt_add_backlog(xprt, task);
  454. }
  455. /**
  456. * xprt_rdma_free_slot - release an rpc_rqst
  457. * @xprt: controlling RPC transport
  458. * @rqst: rpc_rqst to release
  459. *
  460. */
  461. static void
  462. xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
  463. {
  464. struct rpcrdma_xprt *r_xprt =
  465. container_of(xprt, struct rpcrdma_xprt, rx_xprt);
  466. rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
  467. if (!xprt_wake_up_backlog(xprt, rqst)) {
  468. memset(rqst, 0, sizeof(*rqst));
  469. rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
  470. }
  471. }
  472. static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
  473. struct rpcrdma_regbuf *rb, size_t size,
  474. gfp_t flags)
  475. {
  476. if (unlikely(rdmab_length(rb) < size)) {
  477. if (!rpcrdma_regbuf_realloc(rb, size, flags))
  478. return false;
  479. r_xprt->rx_stats.hardway_register_count += size;
  480. }
  481. return true;
  482. }
  483. /**
  484. * xprt_rdma_allocate - allocate transport resources for an RPC
  485. * @task: RPC task
  486. *
  487. * Return values:
  488. * 0: Success; rq_buffer points to RPC buffer to use
  489. * ENOMEM: Out of memory, call again later
  490. * EIO: A permanent error occurred, do not retry
  491. */
  492. static int
  493. xprt_rdma_allocate(struct rpc_task *task)
  494. {
  495. struct rpc_rqst *rqst = task->tk_rqstp;
  496. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
  497. struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
  498. gfp_t flags = rpc_task_gfp_mask();
  499. if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
  500. flags))
  501. goto out_fail;
  502. if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
  503. flags))
  504. goto out_fail;
  505. rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
  506. rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
  507. return 0;
  508. out_fail:
  509. return -ENOMEM;
  510. }
  511. /**
  512. * xprt_rdma_free - release resources allocated by xprt_rdma_allocate
  513. * @task: RPC task
  514. *
  515. * Caller guarantees rqst->rq_buffer is non-NULL.
  516. */
  517. static void
  518. xprt_rdma_free(struct rpc_task *task)
  519. {
  520. struct rpc_rqst *rqst = task->tk_rqstp;
  521. struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
  522. if (unlikely(!list_empty(&req->rl_registered))) {
  523. trace_xprtrdma_mrs_zap(task);
  524. frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req);
  525. }
  526. /* XXX: If the RPC is completing because of a signal and
  527. * not because a reply was received, we ought to ensure
  528. * that the Send completion has fired, so that memory
  529. * involved with the Send is not still visible to the NIC.
  530. */
  531. }
  532. /**
  533. * xprt_rdma_send_request - marshal and send an RPC request
  534. * @rqst: RPC message in rq_snd_buf
  535. *
  536. * Caller holds the transport's write lock.
  537. *
  538. * Returns:
  539. * %0 if the RPC message has been sent
  540. * %-ENOTCONN if the caller should reconnect and call again
  541. * %-EAGAIN if the caller should call again
  542. * %-ENOBUFS if the caller should call again after a delay
  543. * %-EMSGSIZE if encoding ran out of buffer space. The request
  544. * was not sent. Do not try to send this message again.
  545. * %-EIO if an I/O error occurred. The request was not sent.
  546. * Do not try to send this message again.
  547. */
  548. static int
  549. xprt_rdma_send_request(struct rpc_rqst *rqst)
  550. {
  551. struct rpc_xprt *xprt = rqst->rq_xprt;
  552. struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
  553. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  554. int rc = 0;
  555. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  556. if (unlikely(!rqst->rq_buffer))
  557. return xprt_rdma_bc_send_reply(rqst);
  558. #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  559. if (!xprt_connected(xprt))
  560. return -ENOTCONN;
  561. if (!xprt_request_get_cong(xprt, rqst))
  562. return -EBADSLT;
  563. rc = rpcrdma_marshal_req(r_xprt, rqst);
  564. if (rc < 0)
  565. goto failed_marshal;
  566. /* Must suppress retransmit to maintain credits */
  567. if (rqst->rq_connect_cookie == xprt->connect_cookie)
  568. goto drop_connection;
  569. rqst->rq_xtime = ktime_get();
  570. if (frwr_send(r_xprt, req))
  571. goto drop_connection;
  572. rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
  573. /* An RPC with no reply will throw off credit accounting,
  574. * so drop the connection to reset the credit grant.
  575. */
  576. if (!rpc_reply_expected(rqst->rq_task))
  577. goto drop_connection;
  578. return 0;
  579. failed_marshal:
  580. if (rc != -ENOTCONN)
  581. return rc;
  582. drop_connection:
  583. xprt_rdma_close(xprt);
  584. return -ENOTCONN;
  585. }
  586. void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  587. {
  588. struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  589. long idle_time = 0;
  590. if (xprt_connected(xprt))
  591. idle_time = (long)(jiffies - xprt->last_used) / HZ;
  592. seq_puts(seq, "\txprt:\trdma ");
  593. seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ",
  594. 0, /* need a local port? */
  595. xprt->stat.bind_count,
  596. xprt->stat.connect_count,
  597. xprt->stat.connect_time / HZ,
  598. idle_time,
  599. xprt->stat.sends,
  600. xprt->stat.recvs,
  601. xprt->stat.bad_xids,
  602. xprt->stat.req_u,
  603. xprt->stat.bklog_u);
  604. seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ",
  605. r_xprt->rx_stats.read_chunk_count,
  606. r_xprt->rx_stats.write_chunk_count,
  607. r_xprt->rx_stats.reply_chunk_count,
  608. r_xprt->rx_stats.total_rdma_request,
  609. r_xprt->rx_stats.total_rdma_reply,
  610. r_xprt->rx_stats.pullup_copy_count,
  611. r_xprt->rx_stats.fixup_copy_count,
  612. r_xprt->rx_stats.hardway_register_count,
  613. r_xprt->rx_stats.failed_marshal_count,
  614. r_xprt->rx_stats.bad_reply_count,
  615. r_xprt->rx_stats.nomsg_call_count);
  616. seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
  617. r_xprt->rx_stats.mrs_recycled,
  618. r_xprt->rx_stats.mrs_orphaned,
  619. r_xprt->rx_stats.mrs_allocated,
  620. r_xprt->rx_stats.local_inv_needed,
  621. r_xprt->rx_stats.empty_sendctx_q,
  622. r_xprt->rx_stats.reply_waits_for_send);
  623. }
  624. static int
  625. xprt_rdma_enable_swap(struct rpc_xprt *xprt)
  626. {
  627. return 0;
  628. }
  629. static void
  630. xprt_rdma_disable_swap(struct rpc_xprt *xprt)
  631. {
  632. }
  633. /*
  634. * Plumbing for rpc transport switch and kernel module
  635. */
  636. static const struct rpc_xprt_ops xprt_rdma_procs = {
  637. .reserve_xprt = xprt_reserve_xprt_cong,
  638. .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
  639. .alloc_slot = xprt_rdma_alloc_slot,
  640. .free_slot = xprt_rdma_free_slot,
  641. .release_request = xprt_release_rqst_cong, /* ditto */
  642. .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
  643. .timer = xprt_rdma_timer,
  644. .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
  645. .set_port = xprt_rdma_set_port,
  646. .connect = xprt_rdma_connect,
  647. .buf_alloc = xprt_rdma_allocate,
  648. .buf_free = xprt_rdma_free,
  649. .send_request = xprt_rdma_send_request,
  650. .close = xprt_rdma_close,
  651. .destroy = xprt_rdma_destroy,
  652. .set_connect_timeout = xprt_rdma_set_connect_timeout,
  653. .print_stats = xprt_rdma_print_stats,
  654. .enable_swap = xprt_rdma_enable_swap,
  655. .disable_swap = xprt_rdma_disable_swap,
  656. .inject_disconnect = xprt_rdma_inject_disconnect,
  657. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  658. .bc_setup = xprt_rdma_bc_setup,
  659. .bc_maxpayload = xprt_rdma_bc_maxpayload,
  660. .bc_num_slots = xprt_rdma_bc_max_slots,
  661. .bc_free_rqst = xprt_rdma_bc_free_rqst,
  662. .bc_destroy = xprt_rdma_bc_destroy,
  663. #endif
  664. };
  665. static struct xprt_class xprt_rdma = {
  666. .list = LIST_HEAD_INIT(xprt_rdma.list),
  667. .name = "rdma",
  668. .owner = THIS_MODULE,
  669. .ident = XPRT_TRANSPORT_RDMA,
  670. .setup = xprt_setup_rdma,
  671. .netid = { "rdma", "rdma6", "" },
  672. };
  673. void xprt_rdma_cleanup(void)
  674. {
  675. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  676. if (sunrpc_table_header) {
  677. unregister_sysctl_table(sunrpc_table_header);
  678. sunrpc_table_header = NULL;
  679. }
  680. #endif
  681. xprt_unregister_transport(&xprt_rdma);
  682. xprt_unregister_transport(&xprt_rdma_bc);
  683. }
  684. int xprt_rdma_init(void)
  685. {
  686. int rc;
  687. rc = xprt_register_transport(&xprt_rdma);
  688. if (rc)
  689. return rc;
  690. rc = xprt_register_transport(&xprt_rdma_bc);
  691. if (rc) {
  692. xprt_unregister_transport(&xprt_rdma);
  693. return rc;
  694. }
  695. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  696. if (!sunrpc_table_header)
  697. sunrpc_table_header = register_sysctl_table(sunrpc_table);
  698. #endif
  699. return 0;
  700. }