espintcp.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <net/tcp.h>
  3. #include <net/strparser.h>
  4. #include <net/xfrm.h>
  5. #include <net/esp.h>
  6. #include <net/espintcp.h>
  7. #include <linux/skmsg.h>
  8. #include <net/inet_common.h>
  9. #if IS_ENABLED(CONFIG_IPV6)
  10. #include <net/ipv6_stubs.h>
  11. #endif
  12. static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
  13. struct sock *sk)
  14. {
  15. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
  16. !sk_rmem_schedule(sk, skb, skb->truesize)) {
  17. XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR);
  18. kfree_skb(skb);
  19. return;
  20. }
  21. skb_set_owner_r(skb, sk);
  22. memset(skb->cb, 0, sizeof(skb->cb));
  23. skb_queue_tail(&ctx->ike_queue, skb);
  24. ctx->saved_data_ready(sk);
  25. }
  26. static void handle_esp(struct sk_buff *skb, struct sock *sk)
  27. {
  28. struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb;
  29. skb_reset_transport_header(skb);
  30. /* restore IP CB, we need at least IP6CB->nhoff */
  31. memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header));
  32. rcu_read_lock();
  33. skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
  34. local_bh_disable();
  35. #if IS_ENABLED(CONFIG_IPV6)
  36. if (sk->sk_family == AF_INET6)
  37. ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
  38. else
  39. #endif
  40. xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
  41. local_bh_enable();
  42. rcu_read_unlock();
  43. }
  44. static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
  45. {
  46. struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
  47. strp);
  48. struct strp_msg *rxm = strp_msg(skb);
  49. int len = rxm->full_len - 2;
  50. u32 nonesp_marker;
  51. int err;
  52. /* keepalive packet? */
  53. if (unlikely(len == 1)) {
  54. u8 data;
  55. err = skb_copy_bits(skb, rxm->offset + 2, &data, 1);
  56. if (err < 0) {
  57. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  58. kfree_skb(skb);
  59. return;
  60. }
  61. if (data == 0xff) {
  62. kfree_skb(skb);
  63. return;
  64. }
  65. }
  66. /* drop other short messages */
  67. if (unlikely(len <= sizeof(nonesp_marker))) {
  68. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  69. kfree_skb(skb);
  70. return;
  71. }
  72. err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
  73. sizeof(nonesp_marker));
  74. if (err < 0) {
  75. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
  76. kfree_skb(skb);
  77. return;
  78. }
  79. /* remove header, leave non-ESP marker/SPI */
  80. if (!pskb_pull(skb, rxm->offset + 2)) {
  81. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
  82. kfree_skb(skb);
  83. return;
  84. }
  85. if (pskb_trim(skb, rxm->full_len - 2) != 0) {
  86. XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
  87. kfree_skb(skb);
  88. return;
  89. }
  90. if (nonesp_marker == 0)
  91. handle_nonesp(ctx, skb, strp->sk);
  92. else
  93. handle_esp(skb, strp->sk);
  94. }
  95. static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
  96. {
  97. struct strp_msg *rxm = strp_msg(skb);
  98. __be16 blen;
  99. u16 len;
  100. int err;
  101. if (skb->len < rxm->offset + 2)
  102. return 0;
  103. err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
  104. if (err < 0)
  105. return err;
  106. len = be16_to_cpu(blen);
  107. if (len < 2)
  108. return -EINVAL;
  109. return len;
  110. }
  111. static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
  112. int flags, int *addr_len)
  113. {
  114. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  115. struct sk_buff *skb;
  116. int err = 0;
  117. int copied;
  118. int off = 0;
  119. skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
  120. if (!skb) {
  121. if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
  122. return 0;
  123. return err;
  124. }
  125. copied = len;
  126. if (copied > skb->len)
  127. copied = skb->len;
  128. else if (copied < skb->len)
  129. msg->msg_flags |= MSG_TRUNC;
  130. err = skb_copy_datagram_msg(skb, 0, msg, copied);
  131. if (unlikely(err)) {
  132. kfree_skb(skb);
  133. return err;
  134. }
  135. if (flags & MSG_TRUNC)
  136. copied = skb->len;
  137. kfree_skb(skb);
  138. return copied;
  139. }
  140. int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
  141. {
  142. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  143. if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
  144. return -ENOBUFS;
  145. __skb_queue_tail(&ctx->out_queue, skb);
  146. return 0;
  147. }
  148. EXPORT_SYMBOL_GPL(espintcp_queue_out);
  149. /* espintcp length field is 2B and length includes the length field's size */
  150. #define MAX_ESPINTCP_MSG (((1 << 16) - 1) - 2)
  151. static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg,
  152. int flags)
  153. {
  154. do {
  155. int ret;
  156. ret = skb_send_sock_locked(sk, emsg->skb,
  157. emsg->offset, emsg->len);
  158. if (ret < 0)
  159. return ret;
  160. emsg->len -= ret;
  161. emsg->offset += ret;
  162. } while (emsg->len > 0);
  163. kfree_skb(emsg->skb);
  164. memset(emsg, 0, sizeof(*emsg));
  165. return 0;
  166. }
  167. static int espintcp_sendskmsg_locked(struct sock *sk,
  168. struct espintcp_msg *emsg, int flags)
  169. {
  170. struct sk_msg *skmsg = &emsg->skmsg;
  171. struct scatterlist *sg;
  172. int done = 0;
  173. int ret;
  174. flags |= MSG_SENDPAGE_NOTLAST;
  175. sg = &skmsg->sg.data[skmsg->sg.start];
  176. do {
  177. size_t size = sg->length - emsg->offset;
  178. int offset = sg->offset + emsg->offset;
  179. struct page *p;
  180. emsg->offset = 0;
  181. if (sg_is_last(sg))
  182. flags &= ~MSG_SENDPAGE_NOTLAST;
  183. p = sg_page(sg);
  184. retry:
  185. ret = do_tcp_sendpages(sk, p, offset, size, flags);
  186. if (ret < 0) {
  187. emsg->offset = offset - sg->offset;
  188. skmsg->sg.start += done;
  189. return ret;
  190. }
  191. if (ret != size) {
  192. offset += ret;
  193. size -= ret;
  194. goto retry;
  195. }
  196. done++;
  197. put_page(p);
  198. sk_mem_uncharge(sk, sg->length);
  199. sg = sg_next(sg);
  200. } while (sg);
  201. memset(emsg, 0, sizeof(*emsg));
  202. return 0;
  203. }
  204. static int espintcp_push_msgs(struct sock *sk, int flags)
  205. {
  206. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  207. struct espintcp_msg *emsg = &ctx->partial;
  208. int err;
  209. if (!emsg->len)
  210. return 0;
  211. if (ctx->tx_running)
  212. return -EAGAIN;
  213. ctx->tx_running = 1;
  214. if (emsg->skb)
  215. err = espintcp_sendskb_locked(sk, emsg, flags);
  216. else
  217. err = espintcp_sendskmsg_locked(sk, emsg, flags);
  218. if (err == -EAGAIN) {
  219. ctx->tx_running = 0;
  220. return flags & MSG_DONTWAIT ? -EAGAIN : 0;
  221. }
  222. if (!err)
  223. memset(emsg, 0, sizeof(*emsg));
  224. ctx->tx_running = 0;
  225. return err;
  226. }
  227. int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
  228. {
  229. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  230. struct espintcp_msg *emsg = &ctx->partial;
  231. unsigned int len;
  232. int offset;
  233. if (sk->sk_state != TCP_ESTABLISHED) {
  234. kfree_skb(skb);
  235. return -ECONNRESET;
  236. }
  237. offset = skb_transport_offset(skb);
  238. len = skb->len - offset;
  239. espintcp_push_msgs(sk, 0);
  240. if (emsg->len) {
  241. kfree_skb(skb);
  242. return -ENOBUFS;
  243. }
  244. skb_set_owner_w(skb, sk);
  245. emsg->offset = offset;
  246. emsg->len = len;
  247. emsg->skb = skb;
  248. espintcp_push_msgs(sk, 0);
  249. return 0;
  250. }
  251. EXPORT_SYMBOL_GPL(espintcp_push_skb);
  252. static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
  253. {
  254. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  255. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  256. struct espintcp_msg *emsg = &ctx->partial;
  257. struct iov_iter pfx_iter;
  258. struct kvec pfx_iov = {};
  259. size_t msglen = size + 2;
  260. char buf[2] = {0};
  261. int err, end;
  262. if (msg->msg_flags & ~MSG_DONTWAIT)
  263. return -EOPNOTSUPP;
  264. if (size > MAX_ESPINTCP_MSG)
  265. return -EMSGSIZE;
  266. if (msg->msg_controllen)
  267. return -EOPNOTSUPP;
  268. lock_sock(sk);
  269. err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
  270. if (err < 0) {
  271. if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT))
  272. err = -ENOBUFS;
  273. goto unlock;
  274. }
  275. sk_msg_init(&emsg->skmsg);
  276. while (1) {
  277. /* only -ENOMEM is possible since we don't coalesce */
  278. err = sk_msg_alloc(sk, &emsg->skmsg, msglen, 0);
  279. if (!err)
  280. break;
  281. err = sk_stream_wait_memory(sk, &timeo);
  282. if (err)
  283. goto fail;
  284. }
  285. *((__be16 *)buf) = cpu_to_be16(msglen);
  286. pfx_iov.iov_base = buf;
  287. pfx_iov.iov_len = sizeof(buf);
  288. iov_iter_kvec(&pfx_iter, ITER_SOURCE, &pfx_iov, 1, pfx_iov.iov_len);
  289. err = sk_msg_memcopy_from_iter(sk, &pfx_iter, &emsg->skmsg,
  290. pfx_iov.iov_len);
  291. if (err < 0)
  292. goto fail;
  293. err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, &emsg->skmsg, size);
  294. if (err < 0)
  295. goto fail;
  296. end = emsg->skmsg.sg.end;
  297. emsg->len = size;
  298. sk_msg_iter_var_prev(end);
  299. sg_mark_end(sk_msg_elem(&emsg->skmsg, end));
  300. tcp_rate_check_app_limited(sk);
  301. err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
  302. /* this message could be partially sent, keep it */
  303. release_sock(sk);
  304. return size;
  305. fail:
  306. sk_msg_free(sk, &emsg->skmsg);
  307. memset(emsg, 0, sizeof(*emsg));
  308. unlock:
  309. release_sock(sk);
  310. return err;
  311. }
  312. static struct proto espintcp_prot __ro_after_init;
  313. static struct proto_ops espintcp_ops __ro_after_init;
  314. static struct proto espintcp6_prot;
  315. static struct proto_ops espintcp6_ops;
  316. static DEFINE_MUTEX(tcpv6_prot_mutex);
  317. static void espintcp_data_ready(struct sock *sk)
  318. {
  319. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  320. strp_data_ready(&ctx->strp);
  321. }
  322. static void espintcp_tx_work(struct work_struct *work)
  323. {
  324. struct espintcp_ctx *ctx = container_of(work,
  325. struct espintcp_ctx, work);
  326. struct sock *sk = ctx->strp.sk;
  327. lock_sock(sk);
  328. if (!ctx->tx_running)
  329. espintcp_push_msgs(sk, 0);
  330. release_sock(sk);
  331. }
  332. static void espintcp_write_space(struct sock *sk)
  333. {
  334. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  335. schedule_work(&ctx->work);
  336. ctx->saved_write_space(sk);
  337. }
  338. static void espintcp_destruct(struct sock *sk)
  339. {
  340. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  341. ctx->saved_destruct(sk);
  342. kfree(ctx);
  343. }
  344. bool tcp_is_ulp_esp(struct sock *sk)
  345. {
  346. return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
  347. }
  348. EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
  349. static void build_protos(struct proto *espintcp_prot,
  350. struct proto_ops *espintcp_ops,
  351. const struct proto *orig_prot,
  352. const struct proto_ops *orig_ops);
  353. static int espintcp_init_sk(struct sock *sk)
  354. {
  355. struct inet_connection_sock *icsk = inet_csk(sk);
  356. struct strp_callbacks cb = {
  357. .rcv_msg = espintcp_rcv,
  358. .parse_msg = espintcp_parse,
  359. };
  360. struct espintcp_ctx *ctx;
  361. int err;
  362. /* sockmap is not compatible with espintcp */
  363. if (sk->sk_user_data)
  364. return -EBUSY;
  365. ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
  366. if (!ctx)
  367. return -ENOMEM;
  368. err = strp_init(&ctx->strp, sk, &cb);
  369. if (err)
  370. goto free;
  371. __sk_dst_reset(sk);
  372. strp_check_rcv(&ctx->strp);
  373. skb_queue_head_init(&ctx->ike_queue);
  374. skb_queue_head_init(&ctx->out_queue);
  375. if (sk->sk_family == AF_INET) {
  376. sk->sk_prot = &espintcp_prot;
  377. sk->sk_socket->ops = &espintcp_ops;
  378. } else {
  379. mutex_lock(&tcpv6_prot_mutex);
  380. if (!espintcp6_prot.recvmsg)
  381. build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
  382. mutex_unlock(&tcpv6_prot_mutex);
  383. sk->sk_prot = &espintcp6_prot;
  384. sk->sk_socket->ops = &espintcp6_ops;
  385. }
  386. ctx->saved_data_ready = sk->sk_data_ready;
  387. ctx->saved_write_space = sk->sk_write_space;
  388. ctx->saved_destruct = sk->sk_destruct;
  389. sk->sk_data_ready = espintcp_data_ready;
  390. sk->sk_write_space = espintcp_write_space;
  391. sk->sk_destruct = espintcp_destruct;
  392. rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
  393. INIT_WORK(&ctx->work, espintcp_tx_work);
  394. /* avoid using task_frag */
  395. sk->sk_allocation = GFP_ATOMIC;
  396. return 0;
  397. free:
  398. kfree(ctx);
  399. return err;
  400. }
  401. static void espintcp_release(struct sock *sk)
  402. {
  403. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  404. struct sk_buff_head queue;
  405. struct sk_buff *skb;
  406. __skb_queue_head_init(&queue);
  407. skb_queue_splice_init(&ctx->out_queue, &queue);
  408. while ((skb = __skb_dequeue(&queue)))
  409. espintcp_push_skb(sk, skb);
  410. tcp_release_cb(sk);
  411. }
  412. static void espintcp_close(struct sock *sk, long timeout)
  413. {
  414. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  415. struct espintcp_msg *emsg = &ctx->partial;
  416. strp_stop(&ctx->strp);
  417. sk->sk_prot = &tcp_prot;
  418. barrier();
  419. cancel_work_sync(&ctx->work);
  420. strp_done(&ctx->strp);
  421. skb_queue_purge(&ctx->out_queue);
  422. skb_queue_purge(&ctx->ike_queue);
  423. if (emsg->len) {
  424. if (emsg->skb)
  425. kfree_skb(emsg->skb);
  426. else
  427. sk_msg_free(sk, &emsg->skmsg);
  428. }
  429. tcp_close(sk, timeout);
  430. }
  431. static __poll_t espintcp_poll(struct file *file, struct socket *sock,
  432. poll_table *wait)
  433. {
  434. __poll_t mask = datagram_poll(file, sock, wait);
  435. struct sock *sk = sock->sk;
  436. struct espintcp_ctx *ctx = espintcp_getctx(sk);
  437. if (!skb_queue_empty(&ctx->ike_queue))
  438. mask |= EPOLLIN | EPOLLRDNORM;
  439. return mask;
  440. }
  441. static void build_protos(struct proto *espintcp_prot,
  442. struct proto_ops *espintcp_ops,
  443. const struct proto *orig_prot,
  444. const struct proto_ops *orig_ops)
  445. {
  446. memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
  447. memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
  448. espintcp_prot->sendmsg = espintcp_sendmsg;
  449. espintcp_prot->recvmsg = espintcp_recvmsg;
  450. espintcp_prot->close = espintcp_close;
  451. espintcp_prot->release_cb = espintcp_release;
  452. espintcp_ops->poll = espintcp_poll;
  453. }
  454. static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
  455. .name = "espintcp",
  456. .owner = THIS_MODULE,
  457. .init = espintcp_init_sk,
  458. };
  459. void __init espintcp_init(void)
  460. {
  461. build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
  462. tcp_register_ulp(&espintcp_ulp);
  463. }