peer_event.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Peer event handling, typically ICMP messages.
  3. *
  4. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells ([email protected])
  6. */
  7. #include <linux/module.h>
  8. #include <linux/net.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/errqueue.h>
  11. #include <linux/udp.h>
  12. #include <linux/in.h>
  13. #include <linux/in6.h>
  14. #include <linux/icmp.h>
  15. #include <net/sock.h>
  16. #include <net/af_rxrpc.h>
  17. #include <net/ip.h>
  18. #include <net/icmp.h>
  19. #include "ar-internal.h"
  20. static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
  21. static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
  22. static void rxrpc_distribute_error(struct rxrpc_peer *, int,
  23. enum rxrpc_call_completion);
  24. /*
  25. * Find the peer associated with an ICMPv4 packet.
  26. */
  27. static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
  28. struct sk_buff *skb,
  29. unsigned int udp_offset,
  30. unsigned int *info,
  31. struct sockaddr_rxrpc *srx)
  32. {
  33. struct iphdr *ip, *ip0 = ip_hdr(skb);
  34. struct icmphdr *icmp = icmp_hdr(skb);
  35. struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
  36. _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
  37. switch (icmp->type) {
  38. case ICMP_DEST_UNREACH:
  39. *info = ntohs(icmp->un.frag.mtu);
  40. fallthrough;
  41. case ICMP_TIME_EXCEEDED:
  42. case ICMP_PARAMETERPROB:
  43. ip = (struct iphdr *)((void *)icmp + 8);
  44. break;
  45. default:
  46. return NULL;
  47. }
  48. memset(srx, 0, sizeof(*srx));
  49. srx->transport_type = local->srx.transport_type;
  50. srx->transport_len = local->srx.transport_len;
  51. srx->transport.family = local->srx.transport.family;
  52. /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
  53. * versa?
  54. */
  55. switch (srx->transport.family) {
  56. case AF_INET:
  57. srx->transport_len = sizeof(srx->transport.sin);
  58. srx->transport.family = AF_INET;
  59. srx->transport.sin.sin_port = udp->dest;
  60. memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
  61. sizeof(struct in_addr));
  62. break;
  63. #ifdef CONFIG_AF_RXRPC_IPV6
  64. case AF_INET6:
  65. srx->transport_len = sizeof(srx->transport.sin);
  66. srx->transport.family = AF_INET;
  67. srx->transport.sin.sin_port = udp->dest;
  68. memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
  69. sizeof(struct in_addr));
  70. break;
  71. #endif
  72. default:
  73. WARN_ON_ONCE(1);
  74. return NULL;
  75. }
  76. _net("ICMP {%pISp}", &srx->transport);
  77. return rxrpc_lookup_peer_rcu(local, srx);
  78. }
  79. #ifdef CONFIG_AF_RXRPC_IPV6
  80. /*
  81. * Find the peer associated with an ICMPv6 packet.
  82. */
  83. static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
  84. struct sk_buff *skb,
  85. unsigned int udp_offset,
  86. unsigned int *info,
  87. struct sockaddr_rxrpc *srx)
  88. {
  89. struct icmp6hdr *icmp = icmp6_hdr(skb);
  90. struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
  91. struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
  92. _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
  93. switch (icmp->icmp6_type) {
  94. case ICMPV6_DEST_UNREACH:
  95. *info = ntohl(icmp->icmp6_mtu);
  96. fallthrough;
  97. case ICMPV6_PKT_TOOBIG:
  98. case ICMPV6_TIME_EXCEED:
  99. case ICMPV6_PARAMPROB:
  100. ip = (struct ipv6hdr *)((void *)icmp + 8);
  101. break;
  102. default:
  103. return NULL;
  104. }
  105. memset(srx, 0, sizeof(*srx));
  106. srx->transport_type = local->srx.transport_type;
  107. srx->transport_len = local->srx.transport_len;
  108. srx->transport.family = local->srx.transport.family;
  109. /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
  110. * versa?
  111. */
  112. switch (srx->transport.family) {
  113. case AF_INET:
  114. _net("Rx ICMP6 on v4 sock");
  115. srx->transport_len = sizeof(srx->transport.sin);
  116. srx->transport.family = AF_INET;
  117. srx->transport.sin.sin_port = udp->dest;
  118. memcpy(&srx->transport.sin.sin_addr,
  119. &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
  120. break;
  121. case AF_INET6:
  122. _net("Rx ICMP6");
  123. srx->transport.sin.sin_port = udp->dest;
  124. memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
  125. sizeof(struct in6_addr));
  126. break;
  127. default:
  128. WARN_ON_ONCE(1);
  129. return NULL;
  130. }
  131. _net("ICMP {%pISp}", &srx->transport);
  132. return rxrpc_lookup_peer_rcu(local, srx);
  133. }
  134. #endif /* CONFIG_AF_RXRPC_IPV6 */
  135. /*
  136. * Handle an error received on the local endpoint as a tunnel.
  137. */
  138. void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
  139. unsigned int udp_offset)
  140. {
  141. struct sock_extended_err ee;
  142. struct sockaddr_rxrpc srx;
  143. struct rxrpc_local *local;
  144. struct rxrpc_peer *peer;
  145. unsigned int info = 0;
  146. int err;
  147. u8 version = ip_hdr(skb)->version;
  148. u8 type = icmp_hdr(skb)->type;
  149. u8 code = icmp_hdr(skb)->code;
  150. rcu_read_lock();
  151. local = rcu_dereference_sk_user_data(sk);
  152. if (unlikely(!local)) {
  153. rcu_read_unlock();
  154. return;
  155. }
  156. rxrpc_new_skb(skb, rxrpc_skb_received);
  157. switch (ip_hdr(skb)->version) {
  158. case IPVERSION:
  159. peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
  160. &info, &srx);
  161. break;
  162. #ifdef CONFIG_AF_RXRPC_IPV6
  163. case 6:
  164. peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
  165. &info, &srx);
  166. break;
  167. #endif
  168. default:
  169. rcu_read_unlock();
  170. return;
  171. }
  172. if (peer && !rxrpc_get_peer_maybe(peer))
  173. peer = NULL;
  174. if (!peer) {
  175. rcu_read_unlock();
  176. return;
  177. }
  178. memset(&ee, 0, sizeof(ee));
  179. switch (version) {
  180. case IPVERSION:
  181. switch (type) {
  182. case ICMP_DEST_UNREACH:
  183. switch (code) {
  184. case ICMP_FRAG_NEEDED:
  185. rxrpc_adjust_mtu(peer, info);
  186. rcu_read_unlock();
  187. rxrpc_put_peer(peer);
  188. return;
  189. default:
  190. break;
  191. }
  192. err = EHOSTUNREACH;
  193. if (code <= NR_ICMP_UNREACH) {
  194. /* Might want to do something different with
  195. * non-fatal errors
  196. */
  197. //harderr = icmp_err_convert[code].fatal;
  198. err = icmp_err_convert[code].errno;
  199. }
  200. break;
  201. case ICMP_TIME_EXCEEDED:
  202. err = EHOSTUNREACH;
  203. break;
  204. default:
  205. err = EPROTO;
  206. break;
  207. }
  208. ee.ee_origin = SO_EE_ORIGIN_ICMP;
  209. ee.ee_type = type;
  210. ee.ee_code = code;
  211. ee.ee_errno = err;
  212. break;
  213. #ifdef CONFIG_AF_RXRPC_IPV6
  214. case 6:
  215. switch (type) {
  216. case ICMPV6_PKT_TOOBIG:
  217. rxrpc_adjust_mtu(peer, info);
  218. rcu_read_unlock();
  219. rxrpc_put_peer(peer);
  220. return;
  221. }
  222. icmpv6_err_convert(type, code, &err);
  223. if (err == EACCES)
  224. err = EHOSTUNREACH;
  225. ee.ee_origin = SO_EE_ORIGIN_ICMP6;
  226. ee.ee_type = type;
  227. ee.ee_code = code;
  228. ee.ee_errno = err;
  229. break;
  230. #endif
  231. }
  232. trace_rxrpc_rx_icmp(peer, &ee, &srx);
  233. rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
  234. rcu_read_unlock();
  235. rxrpc_put_peer(peer);
  236. }
  237. /*
  238. * Find the peer associated with a local error.
  239. */
  240. static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
  241. const struct sk_buff *skb,
  242. struct sockaddr_rxrpc *srx)
  243. {
  244. struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  245. _enter("");
  246. memset(srx, 0, sizeof(*srx));
  247. srx->transport_type = local->srx.transport_type;
  248. srx->transport_len = local->srx.transport_len;
  249. srx->transport.family = local->srx.transport.family;
  250. switch (srx->transport.family) {
  251. case AF_INET:
  252. srx->transport_len = sizeof(srx->transport.sin);
  253. srx->transport.family = AF_INET;
  254. srx->transport.sin.sin_port = serr->port;
  255. switch (serr->ee.ee_origin) {
  256. case SO_EE_ORIGIN_ICMP:
  257. _net("Rx ICMP");
  258. memcpy(&srx->transport.sin.sin_addr,
  259. skb_network_header(skb) + serr->addr_offset,
  260. sizeof(struct in_addr));
  261. break;
  262. case SO_EE_ORIGIN_ICMP6:
  263. _net("Rx ICMP6 on v4 sock");
  264. memcpy(&srx->transport.sin.sin_addr,
  265. skb_network_header(skb) + serr->addr_offset + 12,
  266. sizeof(struct in_addr));
  267. break;
  268. default:
  269. memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
  270. sizeof(struct in_addr));
  271. break;
  272. }
  273. break;
  274. #ifdef CONFIG_AF_RXRPC_IPV6
  275. case AF_INET6:
  276. switch (serr->ee.ee_origin) {
  277. case SO_EE_ORIGIN_ICMP6:
  278. _net("Rx ICMP6");
  279. srx->transport.sin6.sin6_port = serr->port;
  280. memcpy(&srx->transport.sin6.sin6_addr,
  281. skb_network_header(skb) + serr->addr_offset,
  282. sizeof(struct in6_addr));
  283. break;
  284. case SO_EE_ORIGIN_ICMP:
  285. _net("Rx ICMP on v6 sock");
  286. srx->transport_len = sizeof(srx->transport.sin);
  287. srx->transport.family = AF_INET;
  288. srx->transport.sin.sin_port = serr->port;
  289. memcpy(&srx->transport.sin.sin_addr,
  290. skb_network_header(skb) + serr->addr_offset,
  291. sizeof(struct in_addr));
  292. break;
  293. default:
  294. memcpy(&srx->transport.sin6.sin6_addr,
  295. &ipv6_hdr(skb)->saddr,
  296. sizeof(struct in6_addr));
  297. break;
  298. }
  299. break;
  300. #endif
  301. default:
  302. BUG();
  303. }
  304. return rxrpc_lookup_peer_rcu(local, srx);
  305. }
  306. /*
  307. * Handle an MTU/fragmentation problem.
  308. */
  309. static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
  310. {
  311. _net("Rx ICMP Fragmentation Needed (%d)", mtu);
  312. /* wind down the local interface MTU */
  313. if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
  314. peer->if_mtu = mtu;
  315. _net("I/F MTU %u", mtu);
  316. }
  317. if (mtu == 0) {
  318. /* they didn't give us a size, estimate one */
  319. mtu = peer->if_mtu;
  320. if (mtu > 1500) {
  321. mtu >>= 1;
  322. if (mtu < 1500)
  323. mtu = 1500;
  324. } else {
  325. mtu -= 100;
  326. if (mtu < peer->hdrsize)
  327. mtu = peer->hdrsize + 4;
  328. }
  329. }
  330. if (mtu < peer->mtu) {
  331. spin_lock_bh(&peer->lock);
  332. peer->mtu = mtu;
  333. peer->maxdata = peer->mtu - peer->hdrsize;
  334. spin_unlock_bh(&peer->lock);
  335. _net("Net MTU %u (maxdata %u)",
  336. peer->mtu, peer->maxdata);
  337. }
  338. }
  339. /*
  340. * Handle an error received on the local endpoint.
  341. */
  342. void rxrpc_error_report(struct sock *sk)
  343. {
  344. struct sock_exterr_skb *serr;
  345. struct sockaddr_rxrpc srx;
  346. struct rxrpc_local *local;
  347. struct rxrpc_peer *peer = NULL;
  348. struct sk_buff *skb;
  349. rcu_read_lock();
  350. local = rcu_dereference_sk_user_data(sk);
  351. if (unlikely(!local)) {
  352. rcu_read_unlock();
  353. return;
  354. }
  355. _enter("%p{%d}", sk, local->debug_id);
  356. /* Clear the outstanding error value on the socket so that it doesn't
  357. * cause kernel_sendmsg() to return it later.
  358. */
  359. sock_error(sk);
  360. skb = sock_dequeue_err_skb(sk);
  361. if (!skb) {
  362. rcu_read_unlock();
  363. _leave("UDP socket errqueue empty");
  364. return;
  365. }
  366. rxrpc_new_skb(skb, rxrpc_skb_received);
  367. serr = SKB_EXT_ERR(skb);
  368. if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
  369. peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
  370. if (peer && !rxrpc_get_peer_maybe(peer))
  371. peer = NULL;
  372. if (peer) {
  373. trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
  374. rxrpc_store_error(peer, serr);
  375. }
  376. }
  377. rcu_read_unlock();
  378. rxrpc_free_skb(skb, rxrpc_skb_freed);
  379. rxrpc_put_peer(peer);
  380. _leave("");
  381. }
  382. /*
  383. * Map an error report to error codes on the peer record.
  384. */
  385. static void rxrpc_store_error(struct rxrpc_peer *peer,
  386. struct sock_exterr_skb *serr)
  387. {
  388. enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
  389. struct sock_extended_err *ee;
  390. int err;
  391. _enter("");
  392. ee = &serr->ee;
  393. err = ee->ee_errno;
  394. switch (ee->ee_origin) {
  395. case SO_EE_ORIGIN_ICMP:
  396. switch (ee->ee_type) {
  397. case ICMP_DEST_UNREACH:
  398. switch (ee->ee_code) {
  399. case ICMP_NET_UNREACH:
  400. _net("Rx Received ICMP Network Unreachable");
  401. break;
  402. case ICMP_HOST_UNREACH:
  403. _net("Rx Received ICMP Host Unreachable");
  404. break;
  405. case ICMP_PORT_UNREACH:
  406. _net("Rx Received ICMP Port Unreachable");
  407. break;
  408. case ICMP_NET_UNKNOWN:
  409. _net("Rx Received ICMP Unknown Network");
  410. break;
  411. case ICMP_HOST_UNKNOWN:
  412. _net("Rx Received ICMP Unknown Host");
  413. break;
  414. default:
  415. _net("Rx Received ICMP DestUnreach code=%u",
  416. ee->ee_code);
  417. break;
  418. }
  419. break;
  420. case ICMP_TIME_EXCEEDED:
  421. _net("Rx Received ICMP TTL Exceeded");
  422. break;
  423. default:
  424. _proto("Rx Received ICMP error { type=%u code=%u }",
  425. ee->ee_type, ee->ee_code);
  426. break;
  427. }
  428. break;
  429. case SO_EE_ORIGIN_NONE:
  430. case SO_EE_ORIGIN_LOCAL:
  431. _proto("Rx Received local error { error=%d }", err);
  432. compl = RXRPC_CALL_LOCAL_ERROR;
  433. break;
  434. case SO_EE_ORIGIN_ICMP6:
  435. if (err == EACCES)
  436. err = EHOSTUNREACH;
  437. fallthrough;
  438. default:
  439. _proto("Rx Received error report { orig=%u }", ee->ee_origin);
  440. break;
  441. }
  442. rxrpc_distribute_error(peer, err, compl);
  443. }
  444. /*
  445. * Distribute an error that occurred on a peer.
  446. */
  447. static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
  448. enum rxrpc_call_completion compl)
  449. {
  450. struct rxrpc_call *call;
  451. hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
  452. rxrpc_see_call(call);
  453. rxrpc_set_call_completion(call, compl, 0, -error);
  454. }
  455. }
  456. /*
  457. * Perform keep-alive pings.
  458. */
  459. static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
  460. struct list_head *collector,
  461. time64_t base,
  462. u8 cursor)
  463. {
  464. struct rxrpc_peer *peer;
  465. const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
  466. time64_t keepalive_at;
  467. int slot;
  468. spin_lock_bh(&rxnet->peer_hash_lock);
  469. while (!list_empty(collector)) {
  470. peer = list_entry(collector->next,
  471. struct rxrpc_peer, keepalive_link);
  472. list_del_init(&peer->keepalive_link);
  473. if (!rxrpc_get_peer_maybe(peer))
  474. continue;
  475. if (__rxrpc_use_local(peer->local)) {
  476. spin_unlock_bh(&rxnet->peer_hash_lock);
  477. keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
  478. slot = keepalive_at - base;
  479. _debug("%02x peer %u t=%d {%pISp}",
  480. cursor, peer->debug_id, slot, &peer->srx.transport);
  481. if (keepalive_at <= base ||
  482. keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
  483. rxrpc_send_keepalive(peer);
  484. slot = RXRPC_KEEPALIVE_TIME;
  485. }
  486. /* A transmission to this peer occurred since last we
  487. * examined it so put it into the appropriate future
  488. * bucket.
  489. */
  490. slot += cursor;
  491. slot &= mask;
  492. spin_lock_bh(&rxnet->peer_hash_lock);
  493. list_add_tail(&peer->keepalive_link,
  494. &rxnet->peer_keepalive[slot & mask]);
  495. rxrpc_unuse_local(peer->local);
  496. }
  497. rxrpc_put_peer_locked(peer);
  498. }
  499. spin_unlock_bh(&rxnet->peer_hash_lock);
  500. }
  501. /*
  502. * Perform keep-alive pings with VERSION packets to keep any NAT alive.
  503. */
  504. void rxrpc_peer_keepalive_worker(struct work_struct *work)
  505. {
  506. struct rxrpc_net *rxnet =
  507. container_of(work, struct rxrpc_net, peer_keepalive_work);
  508. const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
  509. time64_t base, now, delay;
  510. u8 cursor, stop;
  511. LIST_HEAD(collector);
  512. now = ktime_get_seconds();
  513. base = rxnet->peer_keepalive_base;
  514. cursor = rxnet->peer_keepalive_cursor;
  515. _enter("%lld,%u", base - now, cursor);
  516. if (!rxnet->live)
  517. return;
  518. /* Remove to a temporary list all the peers that are currently lodged
  519. * in expired buckets plus all new peers.
  520. *
  521. * Everything in the bucket at the cursor is processed this
  522. * second; the bucket at cursor + 1 goes at now + 1s and so
  523. * on...
  524. */
  525. spin_lock_bh(&rxnet->peer_hash_lock);
  526. list_splice_init(&rxnet->peer_keepalive_new, &collector);
  527. stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
  528. while (base <= now && (s8)(cursor - stop) < 0) {
  529. list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
  530. &collector);
  531. base++;
  532. cursor++;
  533. }
  534. base = now;
  535. spin_unlock_bh(&rxnet->peer_hash_lock);
  536. rxnet->peer_keepalive_base = base;
  537. rxnet->peer_keepalive_cursor = cursor;
  538. rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
  539. ASSERT(list_empty(&collector));
  540. /* Schedule the timer for the next occupied timeslot. */
  541. cursor = rxnet->peer_keepalive_cursor;
  542. stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
  543. for (; (s8)(cursor - stop) < 0; cursor++) {
  544. if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
  545. break;
  546. base++;
  547. }
  548. now = ktime_get_seconds();
  549. delay = base - now;
  550. if (delay < 1)
  551. delay = 1;
  552. delay *= HZ;
  553. if (rxnet->live)
  554. timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
  555. _leave("");
  556. }