svcsock.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * linux/net/sunrpc/svcsock.c
  4. *
  5. * These are the RPC server socket internals.
  6. *
  7. * The server scheduling algorithm does not always distribute the load
  8. * evenly when servicing a single client. May need to modify the
  9. * svc_xprt_enqueue procedure...
  10. *
  11. * TCP support is largely untested and may be a little slow. The problem
  12. * is that we currently do two separate recvfrom's, one for the 4-byte
  13. * record length, and the second for the actual record. This could possibly
  14. * be improved by always reading a minimum size of around 100 bytes and
  15. * tucking any superfluous bytes away in a temporary store. Still, that
  16. * leaves write requests out in the rain. An alternative may be to peek at
  17. * the first skb in the queue, and if it matches the next TCP sequence
  18. * number, to extract the record marker. Yuck.
  19. *
  20. * Copyright (C) 1995, 1996 Olaf Kirch <[email protected]>
  21. */
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/module.h>
  25. #include <linux/errno.h>
  26. #include <linux/fcntl.h>
  27. #include <linux/net.h>
  28. #include <linux/in.h>
  29. #include <linux/inet.h>
  30. #include <linux/udp.h>
  31. #include <linux/tcp.h>
  32. #include <linux/unistd.h>
  33. #include <linux/slab.h>
  34. #include <linux/netdevice.h>
  35. #include <linux/skbuff.h>
  36. #include <linux/file.h>
  37. #include <linux/freezer.h>
  38. #include <net/sock.h>
  39. #include <net/checksum.h>
  40. #include <net/ip.h>
  41. #include <net/ipv6.h>
  42. #include <net/udp.h>
  43. #include <net/tcp.h>
  44. #include <net/tcp_states.h>
  45. #include <linux/uaccess.h>
  46. #include <linux/highmem.h>
  47. #include <asm/ioctls.h>
  48. #include <linux/sunrpc/types.h>
  49. #include <linux/sunrpc/clnt.h>
  50. #include <linux/sunrpc/xdr.h>
  51. #include <linux/sunrpc/msg_prot.h>
  52. #include <linux/sunrpc/svcsock.h>
  53. #include <linux/sunrpc/stats.h>
  54. #include <linux/sunrpc/xprt.h>
  55. #include <trace/events/sunrpc.h>
  56. #include "socklib.h"
  57. #include "sunrpc.h"
  58. #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  59. static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
  60. int flags);
  61. static int svc_udp_recvfrom(struct svc_rqst *);
  62. static int svc_udp_sendto(struct svc_rqst *);
  63. static void svc_sock_detach(struct svc_xprt *);
  64. static void svc_tcp_sock_detach(struct svc_xprt *);
  65. static void svc_sock_free(struct svc_xprt *);
  66. static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
  67. struct net *, struct sockaddr *,
  68. int, int);
  69. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  70. static struct lock_class_key svc_key[2];
  71. static struct lock_class_key svc_slock_key[2];
  72. static void svc_reclassify_socket(struct socket *sock)
  73. {
  74. struct sock *sk = sock->sk;
  75. if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
  76. return;
  77. switch (sk->sk_family) {
  78. case AF_INET:
  79. sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD",
  80. &svc_slock_key[0],
  81. "sk_xprt.xpt_lock-AF_INET-NFSD",
  82. &svc_key[0]);
  83. break;
  84. case AF_INET6:
  85. sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD",
  86. &svc_slock_key[1],
  87. "sk_xprt.xpt_lock-AF_INET6-NFSD",
  88. &svc_key[1]);
  89. break;
  90. default:
  91. BUG();
  92. }
  93. }
  94. #else
  95. static void svc_reclassify_socket(struct socket *sock)
  96. {
  97. }
  98. #endif
  99. /**
  100. * svc_tcp_release_ctxt - Release transport-related resources
  101. * @xprt: the transport which owned the context
  102. * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
  103. *
  104. */
  105. static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
  106. {
  107. }
  108. /**
  109. * svc_udp_release_ctxt - Release transport-related resources
  110. * @xprt: the transport which owned the context
  111. * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
  112. *
  113. */
  114. static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
  115. {
  116. struct sk_buff *skb = ctxt;
  117. if (skb)
  118. consume_skb(skb);
  119. }
  120. union svc_pktinfo_u {
  121. struct in_pktinfo pkti;
  122. struct in6_pktinfo pkti6;
  123. };
  124. #define SVC_PKTINFO_SPACE \
  125. CMSG_SPACE(sizeof(union svc_pktinfo_u))
  126. static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
  127. {
  128. struct svc_sock *svsk =
  129. container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
  130. switch (svsk->sk_sk->sk_family) {
  131. case AF_INET: {
  132. struct in_pktinfo *pki = CMSG_DATA(cmh);
  133. cmh->cmsg_level = SOL_IP;
  134. cmh->cmsg_type = IP_PKTINFO;
  135. pki->ipi_ifindex = 0;
  136. pki->ipi_spec_dst.s_addr =
  137. svc_daddr_in(rqstp)->sin_addr.s_addr;
  138. cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
  139. }
  140. break;
  141. case AF_INET6: {
  142. struct in6_pktinfo *pki = CMSG_DATA(cmh);
  143. struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
  144. cmh->cmsg_level = SOL_IPV6;
  145. cmh->cmsg_type = IPV6_PKTINFO;
  146. pki->ipi6_ifindex = daddr->sin6_scope_id;
  147. pki->ipi6_addr = daddr->sin6_addr;
  148. cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
  149. }
  150. break;
  151. }
  152. }
  153. static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
  154. unsigned int length)
  155. {
  156. return 0;
  157. }
  158. /*
  159. * Report socket names for nfsdfs
  160. */
  161. static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
  162. {
  163. const struct sock *sk = svsk->sk_sk;
  164. const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
  165. "udp" : "tcp";
  166. int len;
  167. switch (sk->sk_family) {
  168. case PF_INET:
  169. len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
  170. proto_name,
  171. &inet_sk(sk)->inet_rcv_saddr,
  172. inet_sk(sk)->inet_num);
  173. break;
  174. #if IS_ENABLED(CONFIG_IPV6)
  175. case PF_INET6:
  176. len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
  177. proto_name,
  178. &sk->sk_v6_rcv_saddr,
  179. inet_sk(sk)->inet_num);
  180. break;
  181. #endif
  182. default:
  183. len = snprintf(buf, remaining, "*unknown-%d*\n",
  184. sk->sk_family);
  185. }
  186. if (len >= remaining) {
  187. *buf = '\0';
  188. return -ENAMETOOLONG;
  189. }
  190. return len;
  191. }
  192. #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  193. static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
  194. {
  195. struct bvec_iter bi = {
  196. .bi_size = size + seek,
  197. };
  198. struct bio_vec bv;
  199. bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
  200. for_each_bvec(bv, bvec, bi, bi)
  201. flush_dcache_page(bv.bv_page);
  202. }
  203. #else
  204. static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
  205. size_t seek)
  206. {
  207. }
  208. #endif
  209. /*
  210. * Read from @rqstp's transport socket. The incoming message fills whole
  211. * pages in @rqstp's rq_pages array until the last page of the message
  212. * has been received into a partial page.
  213. */
  214. static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
  215. size_t seek)
  216. {
  217. struct svc_sock *svsk =
  218. container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
  219. struct bio_vec *bvec = rqstp->rq_bvec;
  220. struct msghdr msg = { NULL };
  221. unsigned int i;
  222. ssize_t len;
  223. size_t t;
  224. clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  225. for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
  226. bvec[i].bv_page = rqstp->rq_pages[i];
  227. bvec[i].bv_len = PAGE_SIZE;
  228. bvec[i].bv_offset = 0;
  229. }
  230. rqstp->rq_respages = &rqstp->rq_pages[i];
  231. rqstp->rq_next_page = rqstp->rq_respages + 1;
  232. iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
  233. if (seek) {
  234. iov_iter_advance(&msg.msg_iter, seek);
  235. buflen -= seek;
  236. }
  237. len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
  238. if (len > 0)
  239. svc_flush_bvec(bvec, len, seek);
  240. /* If we read a full record, then assume there may be more
  241. * data to read (stream based sockets only!)
  242. */
  243. if (len == buflen)
  244. set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  245. return len;
  246. }
  247. /*
  248. * Set socket snd and rcv buffer lengths
  249. */
  250. static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
  251. {
  252. unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
  253. struct socket *sock = svsk->sk_sock;
  254. nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
  255. lock_sock(sock->sk);
  256. sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
  257. sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
  258. sock->sk->sk_write_space(sock->sk);
  259. release_sock(sock->sk);
  260. }
  261. static void svc_sock_secure_port(struct svc_rqst *rqstp)
  262. {
  263. if (svc_port_is_privileged(svc_addr(rqstp)))
  264. set_bit(RQ_SECURE, &rqstp->rq_flags);
  265. else
  266. clear_bit(RQ_SECURE, &rqstp->rq_flags);
  267. }
  268. /*
  269. * INET callback when data has been received on the socket.
  270. */
  271. static void svc_data_ready(struct sock *sk)
  272. {
  273. struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
  274. if (svsk) {
  275. /* Refer to svc_setup_socket() for details. */
  276. rmb();
  277. svsk->sk_odata(sk);
  278. trace_svcsock_data_ready(&svsk->sk_xprt, 0);
  279. if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
  280. svc_xprt_enqueue(&svsk->sk_xprt);
  281. }
  282. }
  283. /*
  284. * INET callback when space is newly available on the socket.
  285. */
  286. static void svc_write_space(struct sock *sk)
  287. {
  288. struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
  289. if (svsk) {
  290. /* Refer to svc_setup_socket() for details. */
  291. rmb();
  292. trace_svcsock_write_space(&svsk->sk_xprt, 0);
  293. svsk->sk_owspace(sk);
  294. svc_xprt_enqueue(&svsk->sk_xprt);
  295. }
  296. }
  297. static int svc_tcp_has_wspace(struct svc_xprt *xprt)
  298. {
  299. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  300. if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
  301. return 1;
  302. return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
  303. }
  304. static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
  305. {
  306. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  307. sock_no_linger(svsk->sk_sock->sk);
  308. }
  309. /*
  310. * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
  311. */
  312. static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
  313. struct cmsghdr *cmh)
  314. {
  315. struct in_pktinfo *pki = CMSG_DATA(cmh);
  316. struct sockaddr_in *daddr = svc_daddr_in(rqstp);
  317. if (cmh->cmsg_type != IP_PKTINFO)
  318. return 0;
  319. daddr->sin_family = AF_INET;
  320. daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr;
  321. return 1;
  322. }
  323. /*
  324. * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
  325. */
  326. static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
  327. struct cmsghdr *cmh)
  328. {
  329. struct in6_pktinfo *pki = CMSG_DATA(cmh);
  330. struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);
  331. if (cmh->cmsg_type != IPV6_PKTINFO)
  332. return 0;
  333. daddr->sin6_family = AF_INET6;
  334. daddr->sin6_addr = pki->ipi6_addr;
  335. daddr->sin6_scope_id = pki->ipi6_ifindex;
  336. return 1;
  337. }
  338. /*
  339. * Copy the UDP datagram's destination address to the rqstp structure.
  340. * The 'destination' address in this case is the address to which the
  341. * peer sent the datagram, i.e. our local address. For multihomed
  342. * hosts, this can change from msg to msg. Note that only the IP
  343. * address changes, the port number should remain the same.
  344. */
  345. static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
  346. struct cmsghdr *cmh)
  347. {
  348. switch (cmh->cmsg_level) {
  349. case SOL_IP:
  350. return svc_udp_get_dest_address4(rqstp, cmh);
  351. case SOL_IPV6:
  352. return svc_udp_get_dest_address6(rqstp, cmh);
  353. }
  354. return 0;
  355. }
  356. /**
  357. * svc_udp_recvfrom - Receive a datagram from a UDP socket.
  358. * @rqstp: request structure into which to receive an RPC Call
  359. *
  360. * Called in a loop when XPT_DATA has been set.
  361. *
  362. * Returns:
  363. * On success, the number of bytes in a received RPC Call, or
  364. * %0 if a complete RPC Call message was not ready to return
  365. */
  366. static int svc_udp_recvfrom(struct svc_rqst *rqstp)
  367. {
  368. struct svc_sock *svsk =
  369. container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
  370. struct svc_serv *serv = svsk->sk_xprt.xpt_server;
  371. struct sk_buff *skb;
  372. union {
  373. struct cmsghdr hdr;
  374. long all[SVC_PKTINFO_SPACE / sizeof(long)];
  375. } buffer;
  376. struct cmsghdr *cmh = &buffer.hdr;
  377. struct msghdr msg = {
  378. .msg_name = svc_addr(rqstp),
  379. .msg_control = cmh,
  380. .msg_controllen = sizeof(buffer),
  381. .msg_flags = MSG_DONTWAIT,
  382. };
  383. size_t len;
  384. int err;
  385. if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
  386. /* udp sockets need large rcvbuf as all pending
  387. * requests are still in that buffer. sndbuf must
  388. * also be large enough that there is enough space
  389. * for one reply per thread. We count all threads
  390. * rather than threads in a particular pool, which
  391. * provides an upper bound on the number of threads
  392. * which will access the socket.
  393. */
  394. svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
  395. clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  396. err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
  397. 0, 0, MSG_PEEK | MSG_DONTWAIT);
  398. if (err < 0)
  399. goto out_recv_err;
  400. skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
  401. if (!skb)
  402. goto out_recv_err;
  403. len = svc_addr_len(svc_addr(rqstp));
  404. rqstp->rq_addrlen = len;
  405. if (skb->tstamp == 0) {
  406. skb->tstamp = ktime_get_real();
  407. /* Don't enable netstamp, sunrpc doesn't
  408. need that much accuracy */
  409. }
  410. sock_write_timestamp(svsk->sk_sk, skb->tstamp);
  411. set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
  412. len = skb->len;
  413. rqstp->rq_arg.len = len;
  414. trace_svcsock_udp_recv(&svsk->sk_xprt, len);
  415. rqstp->rq_prot = IPPROTO_UDP;
  416. if (!svc_udp_get_dest_address(rqstp, cmh))
  417. goto out_cmsg_err;
  418. rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
  419. if (skb_is_nonlinear(skb)) {
  420. /* we have to copy */
  421. local_bh_disable();
  422. if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb))
  423. goto out_bh_enable;
  424. local_bh_enable();
  425. consume_skb(skb);
  426. } else {
  427. /* we can use it in-place */
  428. rqstp->rq_arg.head[0].iov_base = skb->data;
  429. rqstp->rq_arg.head[0].iov_len = len;
  430. if (skb_checksum_complete(skb))
  431. goto out_free;
  432. rqstp->rq_xprt_ctxt = skb;
  433. }
  434. rqstp->rq_arg.page_base = 0;
  435. if (len <= rqstp->rq_arg.head[0].iov_len) {
  436. rqstp->rq_arg.head[0].iov_len = len;
  437. rqstp->rq_arg.page_len = 0;
  438. rqstp->rq_respages = rqstp->rq_pages+1;
  439. } else {
  440. rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
  441. rqstp->rq_respages = rqstp->rq_pages + 1 +
  442. DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
  443. }
  444. rqstp->rq_next_page = rqstp->rq_respages+1;
  445. if (serv->sv_stats)
  446. serv->sv_stats->netudpcnt++;
  447. svc_xprt_received(rqstp->rq_xprt);
  448. return len;
  449. out_recv_err:
  450. if (err != -EAGAIN) {
  451. /* possibly an icmp error */
  452. set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  453. }
  454. trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
  455. goto out_clear_busy;
  456. out_cmsg_err:
  457. net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
  458. cmh->cmsg_level, cmh->cmsg_type);
  459. goto out_free;
  460. out_bh_enable:
  461. local_bh_enable();
  462. out_free:
  463. kfree_skb(skb);
  464. out_clear_busy:
  465. svc_xprt_received(rqstp->rq_xprt);
  466. return 0;
  467. }
  468. /**
  469. * svc_udp_sendto - Send out a reply on a UDP socket
  470. * @rqstp: completed svc_rqst
  471. *
  472. * xpt_mutex ensures @rqstp's whole message is written to the socket
  473. * without interruption.
  474. *
  475. * Returns the number of bytes sent, or a negative errno.
  476. */
  477. static int svc_udp_sendto(struct svc_rqst *rqstp)
  478. {
  479. struct svc_xprt *xprt = rqstp->rq_xprt;
  480. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  481. struct xdr_buf *xdr = &rqstp->rq_res;
  482. union {
  483. struct cmsghdr hdr;
  484. long all[SVC_PKTINFO_SPACE / sizeof(long)];
  485. } buffer;
  486. struct cmsghdr *cmh = &buffer.hdr;
  487. struct msghdr msg = {
  488. .msg_name = &rqstp->rq_addr,
  489. .msg_namelen = rqstp->rq_addrlen,
  490. .msg_control = cmh,
  491. .msg_controllen = sizeof(buffer),
  492. };
  493. unsigned int sent;
  494. int err;
  495. svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
  496. rqstp->rq_xprt_ctxt = NULL;
  497. svc_set_cmsg_data(rqstp, cmh);
  498. mutex_lock(&xprt->xpt_mutex);
  499. if (svc_xprt_is_dead(xprt))
  500. goto out_notconn;
  501. err = xdr_alloc_bvec(xdr, GFP_KERNEL);
  502. if (err < 0)
  503. goto out_unlock;
  504. err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
  505. if (err == -ECONNREFUSED) {
  506. /* ICMP error on earlier request. */
  507. err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
  508. }
  509. xdr_free_bvec(xdr);
  510. trace_svcsock_udp_send(xprt, err);
  511. out_unlock:
  512. mutex_unlock(&xprt->xpt_mutex);
  513. if (err < 0)
  514. return err;
  515. return sent;
  516. out_notconn:
  517. mutex_unlock(&xprt->xpt_mutex);
  518. return -ENOTCONN;
  519. }
  520. static int svc_udp_has_wspace(struct svc_xprt *xprt)
  521. {
  522. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  523. struct svc_serv *serv = xprt->xpt_server;
  524. unsigned long required;
  525. /*
  526. * Set the SOCK_NOSPACE flag before checking the available
  527. * sock space.
  528. */
  529. set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
  530. required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
  531. if (required*2 > sock_wspace(svsk->sk_sk))
  532. return 0;
  533. clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
  534. return 1;
  535. }
  536. static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
  537. {
  538. BUG();
  539. return NULL;
  540. }
  541. static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
  542. {
  543. }
  544. static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
  545. struct net *net,
  546. struct sockaddr *sa, int salen,
  547. int flags)
  548. {
  549. return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
  550. }
  551. static const struct svc_xprt_ops svc_udp_ops = {
  552. .xpo_create = svc_udp_create,
  553. .xpo_recvfrom = svc_udp_recvfrom,
  554. .xpo_sendto = svc_udp_sendto,
  555. .xpo_result_payload = svc_sock_result_payload,
  556. .xpo_release_ctxt = svc_udp_release_ctxt,
  557. .xpo_detach = svc_sock_detach,
  558. .xpo_free = svc_sock_free,
  559. .xpo_has_wspace = svc_udp_has_wspace,
  560. .xpo_accept = svc_udp_accept,
  561. .xpo_secure_port = svc_sock_secure_port,
  562. .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
  563. };
  564. static struct svc_xprt_class svc_udp_class = {
  565. .xcl_name = "udp",
  566. .xcl_owner = THIS_MODULE,
  567. .xcl_ops = &svc_udp_ops,
  568. .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
  569. .xcl_ident = XPRT_TRANSPORT_UDP,
  570. };
  571. static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
  572. {
  573. svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
  574. &svsk->sk_xprt, serv);
  575. clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
  576. svsk->sk_sk->sk_data_ready = svc_data_ready;
  577. svsk->sk_sk->sk_write_space = svc_write_space;
  578. /* initialise setting must have enough space to
  579. * receive and respond to one request.
  580. * svc_udp_recvfrom will re-adjust if necessary
  581. */
  582. svc_sock_setbufsize(svsk, 3);
  583. /* data might have come in before data_ready set up */
  584. set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  585. set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
  586. /* make sure we get destination address info */
  587. switch (svsk->sk_sk->sk_family) {
  588. case AF_INET:
  589. ip_sock_set_pktinfo(svsk->sk_sock->sk);
  590. break;
  591. case AF_INET6:
  592. ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
  593. break;
  594. default:
  595. BUG();
  596. }
  597. }
  598. /*
  599. * A data_ready event on a listening socket means there's a connection
  600. * pending. Do not use state_change as a substitute for it.
  601. */
  602. static void svc_tcp_listen_data_ready(struct sock *sk)
  603. {
  604. struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
  605. /*
  606. * This callback may called twice when a new connection
  607. * is established as a child socket inherits everything
  608. * from a parent LISTEN socket.
  609. * 1) data_ready method of the parent socket will be called
  610. * when one of child sockets become ESTABLISHED.
  611. * 2) data_ready method of the child socket may be called
  612. * when it receives data before the socket is accepted.
  613. * In case of 2, we should ignore it silently and DO NOT
  614. * dereference svsk.
  615. */
  616. if (sk->sk_state != TCP_LISTEN)
  617. return;
  618. if (svsk) {
  619. /* Refer to svc_setup_socket() for details. */
  620. rmb();
  621. svsk->sk_odata(sk);
  622. set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
  623. svc_xprt_enqueue(&svsk->sk_xprt);
  624. }
  625. }
  626. /*
  627. * A state change on a connected socket means it's dying or dead.
  628. */
  629. static void svc_tcp_state_change(struct sock *sk)
  630. {
  631. struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
  632. if (svsk) {
  633. /* Refer to svc_setup_socket() for details. */
  634. rmb();
  635. svsk->sk_ostate(sk);
  636. trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
  637. if (sk->sk_state != TCP_ESTABLISHED)
  638. svc_xprt_deferred_close(&svsk->sk_xprt);
  639. }
  640. }
  641. /*
  642. * Accept a TCP connection
  643. */
  644. static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
  645. {
  646. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  647. struct sockaddr_storage addr;
  648. struct sockaddr *sin = (struct sockaddr *) &addr;
  649. struct svc_serv *serv = svsk->sk_xprt.xpt_server;
  650. struct socket *sock = svsk->sk_sock;
  651. struct socket *newsock;
  652. struct svc_sock *newsvsk;
  653. int err, slen;
  654. if (!sock)
  655. return NULL;
  656. clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
  657. err = kernel_accept(sock, &newsock, O_NONBLOCK);
  658. if (err < 0) {
  659. if (err == -ENOMEM)
  660. printk(KERN_WARNING "%s: no more sockets!\n",
  661. serv->sv_name);
  662. else if (err != -EAGAIN)
  663. net_warn_ratelimited("%s: accept failed (err %d)!\n",
  664. serv->sv_name, -err);
  665. trace_svcsock_accept_err(xprt, serv->sv_name, err);
  666. return NULL;
  667. }
  668. set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
  669. err = kernel_getpeername(newsock, sin);
  670. if (err < 0) {
  671. trace_svcsock_getpeername_err(xprt, serv->sv_name, err);
  672. goto failed; /* aborted connection or whatever */
  673. }
  674. slen = err;
  675. /* Reset the inherited callbacks before calling svc_setup_socket */
  676. newsock->sk->sk_state_change = svsk->sk_ostate;
  677. newsock->sk->sk_data_ready = svsk->sk_odata;
  678. newsock->sk->sk_write_space = svsk->sk_owspace;
  679. /* make sure that a write doesn't block forever when
  680. * low on memory
  681. */
  682. newsock->sk->sk_sndtimeo = HZ*30;
  683. newsvsk = svc_setup_socket(serv, newsock,
  684. (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY));
  685. if (IS_ERR(newsvsk))
  686. goto failed;
  687. svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
  688. err = kernel_getsockname(newsock, sin);
  689. slen = err;
  690. if (unlikely(err < 0))
  691. slen = offsetof(struct sockaddr, sa_data);
  692. svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
  693. if (sock_is_loopback(newsock->sk))
  694. set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
  695. else
  696. clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
  697. if (serv->sv_stats)
  698. serv->sv_stats->nettcpconn++;
  699. return &newsvsk->sk_xprt;
  700. failed:
  701. sock_release(newsock);
  702. return NULL;
  703. }
  704. static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
  705. struct svc_rqst *rqstp)
  706. {
  707. size_t len = svsk->sk_datalen;
  708. unsigned int i, npages;
  709. if (!len)
  710. return 0;
  711. npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  712. for (i = 0; i < npages; i++) {
  713. if (rqstp->rq_pages[i] != NULL)
  714. put_page(rqstp->rq_pages[i]);
  715. BUG_ON(svsk->sk_pages[i] == NULL);
  716. rqstp->rq_pages[i] = svsk->sk_pages[i];
  717. svsk->sk_pages[i] = NULL;
  718. }
  719. rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
  720. return len;
  721. }
  722. static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
  723. {
  724. unsigned int i, len, npages;
  725. if (svsk->sk_datalen == 0)
  726. return;
  727. len = svsk->sk_datalen;
  728. npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  729. for (i = 0; i < npages; i++) {
  730. svsk->sk_pages[i] = rqstp->rq_pages[i];
  731. rqstp->rq_pages[i] = NULL;
  732. }
  733. }
  734. static void svc_tcp_clear_pages(struct svc_sock *svsk)
  735. {
  736. unsigned int i, len, npages;
  737. if (svsk->sk_datalen == 0)
  738. goto out;
  739. len = svsk->sk_datalen;
  740. npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  741. for (i = 0; i < npages; i++) {
  742. if (svsk->sk_pages[i] == NULL) {
  743. WARN_ON_ONCE(1);
  744. continue;
  745. }
  746. put_page(svsk->sk_pages[i]);
  747. svsk->sk_pages[i] = NULL;
  748. }
  749. out:
  750. svsk->sk_tcplen = 0;
  751. svsk->sk_datalen = 0;
  752. }
  753. /*
  754. * Receive fragment record header into sk_marker.
  755. */
  756. static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
  757. struct svc_rqst *rqstp)
  758. {
  759. ssize_t want, len;
  760. /* If we haven't gotten the record length yet,
  761. * get the next four bytes.
  762. */
  763. if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
  764. struct msghdr msg = { NULL };
  765. struct kvec iov;
  766. want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
  767. iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
  768. iov.iov_len = want;
  769. iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
  770. len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
  771. if (len < 0)
  772. return len;
  773. svsk->sk_tcplen += len;
  774. if (len < want) {
  775. /* call again to read the remaining bytes */
  776. goto err_short;
  777. }
  778. trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
  779. if (svc_sock_reclen(svsk) + svsk->sk_datalen >
  780. svsk->sk_xprt.xpt_server->sv_max_mesg)
  781. goto err_too_large;
  782. }
  783. return svc_sock_reclen(svsk);
  784. err_too_large:
  785. net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
  786. __func__, svsk->sk_xprt.xpt_server->sv_name,
  787. svc_sock_reclen(svsk));
  788. svc_xprt_deferred_close(&svsk->sk_xprt);
  789. err_short:
  790. return -EAGAIN;
  791. }
  792. static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
  793. {
  794. struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
  795. struct rpc_rqst *req = NULL;
  796. struct kvec *src, *dst;
  797. __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
  798. __be32 xid;
  799. __be32 calldir;
  800. xid = *p++;
  801. calldir = *p;
  802. if (!bc_xprt)
  803. return -EAGAIN;
  804. spin_lock(&bc_xprt->queue_lock);
  805. req = xprt_lookup_rqst(bc_xprt, xid);
  806. if (!req)
  807. goto unlock_notfound;
  808. memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
  809. /*
  810. * XXX!: cheating for now! Only copying HEAD.
  811. * But we know this is good enough for now (in fact, for any
  812. * callback reply in the forseeable future).
  813. */
  814. dst = &req->rq_private_buf.head[0];
  815. src = &rqstp->rq_arg.head[0];
  816. if (dst->iov_len < src->iov_len)
  817. goto unlock_eagain; /* whatever; just giving up. */
  818. memcpy(dst->iov_base, src->iov_base, src->iov_len);
  819. xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
  820. rqstp->rq_arg.len = 0;
  821. spin_unlock(&bc_xprt->queue_lock);
  822. return 0;
  823. unlock_notfound:
  824. printk(KERN_NOTICE
  825. "%s: Got unrecognized reply: "
  826. "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
  827. __func__, ntohl(calldir),
  828. bc_xprt, ntohl(xid));
  829. unlock_eagain:
  830. spin_unlock(&bc_xprt->queue_lock);
  831. return -EAGAIN;
  832. }
  833. static void svc_tcp_fragment_received(struct svc_sock *svsk)
  834. {
  835. /* If we have more data, signal svc_xprt_enqueue() to try again */
  836. svsk->sk_tcplen = 0;
  837. svsk->sk_marker = xdr_zero;
  838. }
  839. /**
  840. * svc_tcp_recvfrom - Receive data from a TCP socket
  841. * @rqstp: request structure into which to receive an RPC Call
  842. *
  843. * Called in a loop when XPT_DATA has been set.
  844. *
  845. * Read the 4-byte stream record marker, then use the record length
  846. * in that marker to set up exactly the resources needed to receive
  847. * the next RPC message into @rqstp.
  848. *
  849. * Returns:
  850. * On success, the number of bytes in a received RPC Call, or
  851. * %0 if a complete RPC Call message was not ready to return
  852. *
  853. * The zero return case handles partial receives and callback Replies.
  854. * The state of a partial receive is preserved in the svc_sock for
  855. * the next call to svc_tcp_recvfrom.
  856. */
  857. static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
  858. {
  859. struct svc_sock *svsk =
  860. container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
  861. struct svc_serv *serv = svsk->sk_xprt.xpt_server;
  862. size_t want, base;
  863. ssize_t len;
  864. __be32 *p;
  865. __be32 calldir;
  866. clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  867. len = svc_tcp_read_marker(svsk, rqstp);
  868. if (len < 0)
  869. goto error;
  870. base = svc_tcp_restore_pages(svsk, rqstp);
  871. want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
  872. len = svc_tcp_read_msg(rqstp, base + want, base);
  873. if (len >= 0) {
  874. trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
  875. svsk->sk_tcplen += len;
  876. svsk->sk_datalen += len;
  877. }
  878. if (len != want || !svc_sock_final_rec(svsk))
  879. goto err_incomplete;
  880. if (svsk->sk_datalen < 8)
  881. goto err_nuts;
  882. rqstp->rq_arg.len = svsk->sk_datalen;
  883. rqstp->rq_arg.page_base = 0;
  884. if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
  885. rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
  886. rqstp->rq_arg.page_len = 0;
  887. } else
  888. rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
  889. rqstp->rq_xprt_ctxt = NULL;
  890. rqstp->rq_prot = IPPROTO_TCP;
  891. if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
  892. set_bit(RQ_LOCAL, &rqstp->rq_flags);
  893. else
  894. clear_bit(RQ_LOCAL, &rqstp->rq_flags);
  895. p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
  896. calldir = p[1];
  897. if (calldir)
  898. len = receive_cb_reply(svsk, rqstp);
  899. /* Reset TCP read info */
  900. svsk->sk_datalen = 0;
  901. svc_tcp_fragment_received(svsk);
  902. if (len < 0)
  903. goto error;
  904. svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
  905. if (serv->sv_stats)
  906. serv->sv_stats->nettcpcnt++;
  907. svc_xprt_received(rqstp->rq_xprt);
  908. return rqstp->rq_arg.len;
  909. err_incomplete:
  910. svc_tcp_save_pages(svsk, rqstp);
  911. if (len < 0 && len != -EAGAIN)
  912. goto err_delete;
  913. if (len == want)
  914. svc_tcp_fragment_received(svsk);
  915. else
  916. trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
  917. svc_sock_reclen(svsk),
  918. svsk->sk_tcplen - sizeof(rpc_fraghdr));
  919. goto err_noclose;
  920. error:
  921. if (len != -EAGAIN)
  922. goto err_delete;
  923. trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
  924. goto err_noclose;
  925. err_nuts:
  926. svsk->sk_datalen = 0;
  927. err_delete:
  928. trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
  929. svc_xprt_deferred_close(&svsk->sk_xprt);
  930. err_noclose:
  931. svc_xprt_received(rqstp->rq_xprt);
  932. return 0; /* record not complete */
  933. }
  934. static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
  935. int flags)
  936. {
  937. return kernel_sendpage(sock, virt_to_page(vec->iov_base),
  938. offset_in_page(vec->iov_base),
  939. vec->iov_len, flags);
  940. }
  941. /*
  942. * kernel_sendpage() is used exclusively to reduce the number of
  943. * copy operations in this path. Therefore the caller must ensure
  944. * that the pages backing @xdr are unchanging.
  945. *
  946. * In addition, the logic assumes that * .bv_len is never larger
  947. * than PAGE_SIZE.
  948. */
  949. static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
  950. rpc_fraghdr marker, unsigned int *sentp)
  951. {
  952. const struct kvec *head = xdr->head;
  953. const struct kvec *tail = xdr->tail;
  954. struct kvec rm = {
  955. .iov_base = &marker,
  956. .iov_len = sizeof(marker),
  957. };
  958. struct msghdr msg = {
  959. .msg_flags = 0,
  960. };
  961. int ret;
  962. *sentp = 0;
  963. ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
  964. if (ret < 0)
  965. return ret;
  966. ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
  967. if (ret < 0)
  968. return ret;
  969. *sentp += ret;
  970. if (ret != rm.iov_len)
  971. return -EAGAIN;
  972. ret = svc_tcp_send_kvec(sock, head, 0);
  973. if (ret < 0)
  974. return ret;
  975. *sentp += ret;
  976. if (ret != head->iov_len)
  977. goto out;
  978. if (xdr->page_len) {
  979. unsigned int offset, len, remaining;
  980. struct bio_vec *bvec;
  981. bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
  982. offset = offset_in_page(xdr->page_base);
  983. remaining = xdr->page_len;
  984. while (remaining > 0) {
  985. len = min(remaining, bvec->bv_len - offset);
  986. ret = kernel_sendpage(sock, bvec->bv_page,
  987. bvec->bv_offset + offset,
  988. len, 0);
  989. if (ret < 0)
  990. return ret;
  991. *sentp += ret;
  992. if (ret != len)
  993. goto out;
  994. remaining -= len;
  995. offset = 0;
  996. bvec++;
  997. }
  998. }
  999. if (tail->iov_len) {
  1000. ret = svc_tcp_send_kvec(sock, tail, 0);
  1001. if (ret < 0)
  1002. return ret;
  1003. *sentp += ret;
  1004. }
  1005. out:
  1006. return 0;
  1007. }
  1008. /**
  1009. * svc_tcp_sendto - Send out a reply on a TCP socket
  1010. * @rqstp: completed svc_rqst
  1011. *
  1012. * xpt_mutex ensures @rqstp's whole message is written to the socket
  1013. * without interruption.
  1014. *
  1015. * Returns the number of bytes sent, or a negative errno.
  1016. */
  1017. static int svc_tcp_sendto(struct svc_rqst *rqstp)
  1018. {
  1019. struct svc_xprt *xprt = rqstp->rq_xprt;
  1020. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  1021. struct xdr_buf *xdr = &rqstp->rq_res;
  1022. rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
  1023. (u32)xdr->len);
  1024. unsigned int sent;
  1025. int err;
  1026. svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
  1027. rqstp->rq_xprt_ctxt = NULL;
  1028. atomic_inc(&svsk->sk_sendqlen);
  1029. mutex_lock(&xprt->xpt_mutex);
  1030. if (svc_xprt_is_dead(xprt))
  1031. goto out_notconn;
  1032. tcp_sock_set_cork(svsk->sk_sk, true);
  1033. err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
  1034. xdr_free_bvec(xdr);
  1035. trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
  1036. if (err < 0 || sent != (xdr->len + sizeof(marker)))
  1037. goto out_close;
  1038. if (atomic_dec_and_test(&svsk->sk_sendqlen))
  1039. tcp_sock_set_cork(svsk->sk_sk, false);
  1040. mutex_unlock(&xprt->xpt_mutex);
  1041. return sent;
  1042. out_notconn:
  1043. atomic_dec(&svsk->sk_sendqlen);
  1044. mutex_unlock(&xprt->xpt_mutex);
  1045. return -ENOTCONN;
  1046. out_close:
  1047. pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
  1048. xprt->xpt_server->sv_name,
  1049. (err < 0) ? "got error" : "sent",
  1050. (err < 0) ? err : sent, xdr->len);
  1051. svc_xprt_deferred_close(xprt);
  1052. atomic_dec(&svsk->sk_sendqlen);
  1053. mutex_unlock(&xprt->xpt_mutex);
  1054. return -EAGAIN;
  1055. }
  1056. static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
  1057. struct net *net,
  1058. struct sockaddr *sa, int salen,
  1059. int flags)
  1060. {
  1061. return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
  1062. }
  1063. static const struct svc_xprt_ops svc_tcp_ops = {
  1064. .xpo_create = svc_tcp_create,
  1065. .xpo_recvfrom = svc_tcp_recvfrom,
  1066. .xpo_sendto = svc_tcp_sendto,
  1067. .xpo_result_payload = svc_sock_result_payload,
  1068. .xpo_release_ctxt = svc_tcp_release_ctxt,
  1069. .xpo_detach = svc_tcp_sock_detach,
  1070. .xpo_free = svc_sock_free,
  1071. .xpo_has_wspace = svc_tcp_has_wspace,
  1072. .xpo_accept = svc_tcp_accept,
  1073. .xpo_secure_port = svc_sock_secure_port,
  1074. .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
  1075. };
  1076. static struct svc_xprt_class svc_tcp_class = {
  1077. .xcl_name = "tcp",
  1078. .xcl_owner = THIS_MODULE,
  1079. .xcl_ops = &svc_tcp_ops,
  1080. .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
  1081. .xcl_ident = XPRT_TRANSPORT_TCP,
  1082. };
  1083. void svc_init_xprt_sock(void)
  1084. {
  1085. svc_reg_xprt_class(&svc_tcp_class);
  1086. svc_reg_xprt_class(&svc_udp_class);
  1087. }
  1088. void svc_cleanup_xprt_sock(void)
  1089. {
  1090. svc_unreg_xprt_class(&svc_tcp_class);
  1091. svc_unreg_xprt_class(&svc_udp_class);
  1092. }
  1093. static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
  1094. {
  1095. struct sock *sk = svsk->sk_sk;
  1096. svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
  1097. &svsk->sk_xprt, serv);
  1098. set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
  1099. set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
  1100. if (sk->sk_state == TCP_LISTEN) {
  1101. strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
  1102. set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
  1103. sk->sk_data_ready = svc_tcp_listen_data_ready;
  1104. set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
  1105. } else {
  1106. sk->sk_state_change = svc_tcp_state_change;
  1107. sk->sk_data_ready = svc_data_ready;
  1108. sk->sk_write_space = svc_write_space;
  1109. svsk->sk_marker = xdr_zero;
  1110. svsk->sk_tcplen = 0;
  1111. svsk->sk_datalen = 0;
  1112. memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
  1113. tcp_sock_set_nodelay(sk);
  1114. set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  1115. switch (sk->sk_state) {
  1116. case TCP_SYN_RECV:
  1117. case TCP_ESTABLISHED:
  1118. break;
  1119. default:
  1120. svc_xprt_deferred_close(&svsk->sk_xprt);
  1121. }
  1122. }
  1123. }
  1124. void svc_sock_update_bufs(struct svc_serv *serv)
  1125. {
  1126. /*
  1127. * The number of server threads has changed. Update
  1128. * rcvbuf and sndbuf accordingly on all sockets
  1129. */
  1130. struct svc_sock *svsk;
  1131. spin_lock_bh(&serv->sv_lock);
  1132. list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
  1133. set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
  1134. spin_unlock_bh(&serv->sv_lock);
  1135. }
  1136. EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
  1137. /*
  1138. * Initialize socket for RPC use and create svc_sock struct
  1139. */
  1140. static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
  1141. struct socket *sock,
  1142. int flags)
  1143. {
  1144. struct svc_sock *svsk;
  1145. struct sock *inet;
  1146. int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
  1147. int err = 0;
  1148. svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
  1149. if (!svsk)
  1150. return ERR_PTR(-ENOMEM);
  1151. inet = sock->sk;
  1152. /* Register socket with portmapper */
  1153. if (pmap_register)
  1154. err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
  1155. inet->sk_protocol,
  1156. ntohs(inet_sk(inet)->inet_sport));
  1157. if (err < 0) {
  1158. kfree(svsk);
  1159. return ERR_PTR(err);
  1160. }
  1161. svsk->sk_sock = sock;
  1162. svsk->sk_sk = inet;
  1163. svsk->sk_ostate = inet->sk_state_change;
  1164. svsk->sk_odata = inet->sk_data_ready;
  1165. svsk->sk_owspace = inet->sk_write_space;
  1166. /*
  1167. * This barrier is necessary in order to prevent race condition
  1168. * with svc_data_ready(), svc_listen_data_ready() and others
  1169. * when calling callbacks above.
  1170. */
  1171. wmb();
  1172. inet->sk_user_data = svsk;
  1173. /* Initialize the socket */
  1174. if (sock->type == SOCK_DGRAM)
  1175. svc_udp_init(svsk, serv);
  1176. else
  1177. svc_tcp_init(svsk, serv);
  1178. trace_svcsock_new_socket(sock);
  1179. return svsk;
  1180. }
  1181. /**
  1182. * svc_addsock - add a listener socket to an RPC service
  1183. * @serv: pointer to RPC service to which to add a new listener
  1184. * @net: caller's network namespace
  1185. * @fd: file descriptor of the new listener
  1186. * @name_return: pointer to buffer to fill in with name of listener
  1187. * @len: size of the buffer
  1188. * @cred: credential
  1189. *
  1190. * Fills in socket name and returns positive length of name if successful.
  1191. * Name is terminated with '\n'. On error, returns a negative errno
  1192. * value.
  1193. */
  1194. int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
  1195. char *name_return, const size_t len, const struct cred *cred)
  1196. {
  1197. int err = 0;
  1198. struct socket *so = sockfd_lookup(fd, &err);
  1199. struct svc_sock *svsk = NULL;
  1200. struct sockaddr_storage addr;
  1201. struct sockaddr *sin = (struct sockaddr *)&addr;
  1202. int salen;
  1203. if (!so)
  1204. return err;
  1205. err = -EINVAL;
  1206. if (sock_net(so->sk) != net)
  1207. goto out;
  1208. err = -EAFNOSUPPORT;
  1209. if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
  1210. goto out;
  1211. err = -EPROTONOSUPPORT;
  1212. if (so->sk->sk_protocol != IPPROTO_TCP &&
  1213. so->sk->sk_protocol != IPPROTO_UDP)
  1214. goto out;
  1215. err = -EISCONN;
  1216. if (so->state > SS_UNCONNECTED)
  1217. goto out;
  1218. err = -ENOENT;
  1219. if (!try_module_get(THIS_MODULE))
  1220. goto out;
  1221. svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
  1222. if (IS_ERR(svsk)) {
  1223. module_put(THIS_MODULE);
  1224. err = PTR_ERR(svsk);
  1225. goto out;
  1226. }
  1227. salen = kernel_getsockname(svsk->sk_sock, sin);
  1228. if (salen >= 0)
  1229. svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
  1230. svsk->sk_xprt.xpt_cred = get_cred(cred);
  1231. svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
  1232. return svc_one_sock_name(svsk, name_return, len);
  1233. out:
  1234. sockfd_put(so);
  1235. return err;
  1236. }
  1237. EXPORT_SYMBOL_GPL(svc_addsock);
  1238. /*
  1239. * Create socket for RPC service.
  1240. */
  1241. static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
  1242. int protocol,
  1243. struct net *net,
  1244. struct sockaddr *sin, int len,
  1245. int flags)
  1246. {
  1247. struct svc_sock *svsk;
  1248. struct socket *sock;
  1249. int error;
  1250. int type;
  1251. struct sockaddr_storage addr;
  1252. struct sockaddr *newsin = (struct sockaddr *)&addr;
  1253. int newlen;
  1254. int family;
  1255. if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
  1256. printk(KERN_WARNING "svc: only UDP and TCP "
  1257. "sockets supported\n");
  1258. return ERR_PTR(-EINVAL);
  1259. }
  1260. type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
  1261. switch (sin->sa_family) {
  1262. case AF_INET6:
  1263. family = PF_INET6;
  1264. break;
  1265. case AF_INET:
  1266. family = PF_INET;
  1267. break;
  1268. default:
  1269. return ERR_PTR(-EINVAL);
  1270. }
  1271. error = __sock_create(net, family, type, protocol, &sock, 1);
  1272. if (error < 0)
  1273. return ERR_PTR(error);
  1274. svc_reclassify_socket(sock);
  1275. /*
  1276. * If this is an PF_INET6 listener, we want to avoid
  1277. * getting requests from IPv4 remotes. Those should
  1278. * be shunted to a PF_INET listener via rpcbind.
  1279. */
  1280. if (family == PF_INET6)
  1281. ip6_sock_set_v6only(sock->sk);
  1282. if (type == SOCK_STREAM)
  1283. sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
  1284. error = kernel_bind(sock, sin, len);
  1285. if (error < 0)
  1286. goto bummer;
  1287. error = kernel_getsockname(sock, newsin);
  1288. if (error < 0)
  1289. goto bummer;
  1290. newlen = error;
  1291. if (protocol == IPPROTO_TCP) {
  1292. if ((error = kernel_listen(sock, 64)) < 0)
  1293. goto bummer;
  1294. }
  1295. svsk = svc_setup_socket(serv, sock, flags);
  1296. if (IS_ERR(svsk)) {
  1297. error = PTR_ERR(svsk);
  1298. goto bummer;
  1299. }
  1300. svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
  1301. return (struct svc_xprt *)svsk;
  1302. bummer:
  1303. sock_release(sock);
  1304. return ERR_PTR(error);
  1305. }
  1306. /*
  1307. * Detach the svc_sock from the socket so that no
  1308. * more callbacks occur.
  1309. */
  1310. static void svc_sock_detach(struct svc_xprt *xprt)
  1311. {
  1312. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  1313. struct sock *sk = svsk->sk_sk;
  1314. /* put back the old socket callbacks */
  1315. lock_sock(sk);
  1316. sk->sk_state_change = svsk->sk_ostate;
  1317. sk->sk_data_ready = svsk->sk_odata;
  1318. sk->sk_write_space = svsk->sk_owspace;
  1319. sk->sk_user_data = NULL;
  1320. release_sock(sk);
  1321. }
  1322. /*
  1323. * Disconnect the socket, and reset the callbacks
  1324. */
  1325. static void svc_tcp_sock_detach(struct svc_xprt *xprt)
  1326. {
  1327. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  1328. svc_sock_detach(xprt);
  1329. if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
  1330. svc_tcp_clear_pages(svsk);
  1331. kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
  1332. }
  1333. }
  1334. /*
  1335. * Free the svc_sock's socket resources and the svc_sock itself.
  1336. */
  1337. static void svc_sock_free(struct svc_xprt *xprt)
  1338. {
  1339. struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
  1340. if (svsk->sk_sock->file)
  1341. sockfd_put(svsk->sk_sock);
  1342. else
  1343. sock_release(svsk->sk_sock);
  1344. kfree(svsk);
  1345. }