ip_fragment.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * INET An implementation of the TCP/IP protocol suite for the LINUX
  4. * operating system. INET is implemented using the BSD Socket
  5. * interface as the means of communication with the user level.
  6. *
  7. * The IP fragmentation functionality.
  8. *
  9. * Authors: Fred N. van Kempen <[email protected]>
  10. * Alan Cox <[email protected]>
  11. *
  12. * Fixes:
  13. * Alan Cox : Split from ip.c , see ip_input.c for history.
  14. * David S. Miller : Begin massive cleanup...
  15. * Andi Kleen : Add sysctls.
  16. * xxxx : Overlapfrag bug.
  17. * Ultima : ip_expire() kernel panic.
  18. * Bill Hawes : Frag accounting and evictor fixes.
  19. * John McDonald : 0 length frag bug.
  20. * Alexey Kuznetsov: SMP races, threading, cleanup.
  21. * Patrick McHardy : LRU queue of frag heads for evictor.
  22. */
  23. #define pr_fmt(fmt) "IPv4: " fmt
  24. #include <linux/compiler.h>
  25. #include <linux/module.h>
  26. #include <linux/types.h>
  27. #include <linux/mm.h>
  28. #include <linux/jiffies.h>
  29. #include <linux/skbuff.h>
  30. #include <linux/list.h>
  31. #include <linux/ip.h>
  32. #include <linux/icmp.h>
  33. #include <linux/netdevice.h>
  34. #include <linux/jhash.h>
  35. #include <linux/random.h>
  36. #include <linux/slab.h>
  37. #include <net/route.h>
  38. #include <net/dst.h>
  39. #include <net/sock.h>
  40. #include <net/ip.h>
  41. #include <net/icmp.h>
  42. #include <net/checksum.h>
  43. #include <net/inetpeer.h>
  44. #include <net/inet_frag.h>
  45. #include <linux/tcp.h>
  46. #include <linux/udp.h>
  47. #include <linux/inet.h>
  48. #include <linux/netfilter_ipv4.h>
  49. #include <net/inet_ecn.h>
  50. #include <net/l3mdev.h>
  51. /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  52. * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
  53. * as well. Or notify me, at least. --ANK
  54. */
  55. static const char ip_frag_cache_name[] = "ip4-frags";
  56. /* Describe an entry in the "incomplete datagrams" queue. */
  57. struct ipq {
  58. struct inet_frag_queue q;
  59. u8 ecn; /* RFC3168 support */
  60. u16 max_df_size; /* largest frag with DF set seen */
  61. int iif;
  62. unsigned int rid;
  63. struct inet_peer *peer;
  64. };
  65. static u8 ip4_frag_ecn(u8 tos)
  66. {
  67. return 1 << (tos & INET_ECN_MASK);
  68. }
  69. static struct inet_frags ip4_frags;
  70. static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
  71. struct sk_buff *prev_tail, struct net_device *dev);
  72. static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
  73. {
  74. struct ipq *qp = container_of(q, struct ipq, q);
  75. struct net *net = q->fqdir->net;
  76. const struct frag_v4_compare_key *key = a;
  77. q->key.v4 = *key;
  78. qp->ecn = 0;
  79. qp->peer = q->fqdir->max_dist ?
  80. inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
  81. NULL;
  82. }
  83. static void ip4_frag_free(struct inet_frag_queue *q)
  84. {
  85. struct ipq *qp;
  86. qp = container_of(q, struct ipq, q);
  87. if (qp->peer)
  88. inet_putpeer(qp->peer);
  89. }
  90. /* Destruction primitives. */
  91. static void ipq_put(struct ipq *ipq)
  92. {
  93. inet_frag_put(&ipq->q);
  94. }
  95. /* Kill ipq entry. It is not destroyed immediately,
  96. * because caller (and someone more) holds reference count.
  97. */
  98. static void ipq_kill(struct ipq *ipq)
  99. {
  100. inet_frag_kill(&ipq->q);
  101. }
  102. static bool frag_expire_skip_icmp(u32 user)
  103. {
  104. return user == IP_DEFRAG_AF_PACKET ||
  105. ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
  106. __IP_DEFRAG_CONNTRACK_IN_END) ||
  107. ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
  108. __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
  109. }
  110. /*
  111. * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
  112. */
  113. static void ip_expire(struct timer_list *t)
  114. {
  115. struct inet_frag_queue *frag = from_timer(frag, t, timer);
  116. const struct iphdr *iph;
  117. struct sk_buff *head = NULL;
  118. struct net *net;
  119. struct ipq *qp;
  120. int err;
  121. qp = container_of(frag, struct ipq, q);
  122. net = qp->q.fqdir->net;
  123. rcu_read_lock();
  124. /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
  125. if (READ_ONCE(qp->q.fqdir->dead))
  126. goto out_rcu_unlock;
  127. spin_lock(&qp->q.lock);
  128. if (qp->q.flags & INET_FRAG_COMPLETE)
  129. goto out;
  130. ipq_kill(qp);
  131. __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
  132. __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
  133. if (!(qp->q.flags & INET_FRAG_FIRST_IN))
  134. goto out;
  135. /* sk_buff::dev and sk_buff::rbnode are unionized. So we
  136. * pull the head out of the tree in order to be able to
  137. * deal with head->dev.
  138. */
  139. head = inet_frag_pull_head(&qp->q);
  140. if (!head)
  141. goto out;
  142. head->dev = dev_get_by_index_rcu(net, qp->iif);
  143. if (!head->dev)
  144. goto out;
  145. /* skb has no dst, perform route lookup again */
  146. iph = ip_hdr(head);
  147. err = ip_route_input_noref(head, iph->daddr, iph->saddr,
  148. iph->tos, head->dev);
  149. if (err)
  150. goto out;
  151. /* Only an end host needs to send an ICMP
  152. * "Fragment Reassembly Timeout" message, per RFC792.
  153. */
  154. if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
  155. (skb_rtable(head)->rt_type != RTN_LOCAL))
  156. goto out;
  157. spin_unlock(&qp->q.lock);
  158. icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
  159. goto out_rcu_unlock;
  160. out:
  161. spin_unlock(&qp->q.lock);
  162. out_rcu_unlock:
  163. rcu_read_unlock();
  164. kfree_skb(head);
  165. ipq_put(qp);
  166. }
  167. /* Find the correct entry in the "incomplete datagrams" queue for
  168. * this IP datagram, and create new one, if nothing is found.
  169. */
  170. static struct ipq *ip_find(struct net *net, struct iphdr *iph,
  171. u32 user, int vif)
  172. {
  173. struct frag_v4_compare_key key = {
  174. .saddr = iph->saddr,
  175. .daddr = iph->daddr,
  176. .user = user,
  177. .vif = vif,
  178. .id = iph->id,
  179. .protocol = iph->protocol,
  180. };
  181. struct inet_frag_queue *q;
  182. q = inet_frag_find(net->ipv4.fqdir, &key);
  183. if (!q)
  184. return NULL;
  185. return container_of(q, struct ipq, q);
  186. }
  187. /* Is the fragment too far ahead to be part of ipq? */
  188. static int ip_frag_too_far(struct ipq *qp)
  189. {
  190. struct inet_peer *peer = qp->peer;
  191. unsigned int max = qp->q.fqdir->max_dist;
  192. unsigned int start, end;
  193. int rc;
  194. if (!peer || !max)
  195. return 0;
  196. start = qp->rid;
  197. end = atomic_inc_return(&peer->rid);
  198. qp->rid = end;
  199. rc = qp->q.fragments_tail && (end - start) > max;
  200. if (rc)
  201. __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
  202. return rc;
  203. }
  204. static int ip_frag_reinit(struct ipq *qp)
  205. {
  206. unsigned int sum_truesize = 0;
  207. if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
  208. refcount_inc(&qp->q.refcnt);
  209. return -ETIMEDOUT;
  210. }
  211. sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
  212. sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
  213. qp->q.flags = 0;
  214. qp->q.len = 0;
  215. qp->q.meat = 0;
  216. qp->q.rb_fragments = RB_ROOT;
  217. qp->q.fragments_tail = NULL;
  218. qp->q.last_run_head = NULL;
  219. qp->iif = 0;
  220. qp->ecn = 0;
  221. return 0;
  222. }
  223. /* Add new segment to existing queue. */
  224. static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
  225. {
  226. struct net *net = qp->q.fqdir->net;
  227. int ihl, end, flags, offset;
  228. struct sk_buff *prev_tail;
  229. struct net_device *dev;
  230. unsigned int fragsize;
  231. int err = -ENOENT;
  232. u8 ecn;
  233. if (qp->q.flags & INET_FRAG_COMPLETE)
  234. goto err;
  235. if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
  236. unlikely(ip_frag_too_far(qp)) &&
  237. unlikely(err = ip_frag_reinit(qp))) {
  238. ipq_kill(qp);
  239. goto err;
  240. }
  241. ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
  242. offset = ntohs(ip_hdr(skb)->frag_off);
  243. flags = offset & ~IP_OFFSET;
  244. offset &= IP_OFFSET;
  245. offset <<= 3; /* offset is in 8-byte chunks */
  246. ihl = ip_hdrlen(skb);
  247. /* Determine the position of this fragment. */
  248. end = offset + skb->len - skb_network_offset(skb) - ihl;
  249. err = -EINVAL;
  250. /* Is this the final fragment? */
  251. if ((flags & IP_MF) == 0) {
  252. /* If we already have some bits beyond end
  253. * or have different end, the segment is corrupted.
  254. */
  255. if (end < qp->q.len ||
  256. ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
  257. goto discard_qp;
  258. qp->q.flags |= INET_FRAG_LAST_IN;
  259. qp->q.len = end;
  260. } else {
  261. if (end&7) {
  262. end &= ~7;
  263. if (skb->ip_summed != CHECKSUM_UNNECESSARY)
  264. skb->ip_summed = CHECKSUM_NONE;
  265. }
  266. if (end > qp->q.len) {
  267. /* Some bits beyond end -> corruption. */
  268. if (qp->q.flags & INET_FRAG_LAST_IN)
  269. goto discard_qp;
  270. qp->q.len = end;
  271. }
  272. }
  273. if (end == offset)
  274. goto discard_qp;
  275. err = -ENOMEM;
  276. if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
  277. goto discard_qp;
  278. err = pskb_trim_rcsum(skb, end - offset);
  279. if (err)
  280. goto discard_qp;
  281. /* Note : skb->rbnode and skb->dev share the same location. */
  282. dev = skb->dev;
  283. /* Makes sure compiler wont do silly aliasing games */
  284. barrier();
  285. prev_tail = qp->q.fragments_tail;
  286. err = inet_frag_queue_insert(&qp->q, skb, offset, end);
  287. if (err)
  288. goto insert_error;
  289. if (dev)
  290. qp->iif = dev->ifindex;
  291. qp->q.stamp = skb->tstamp;
  292. qp->q.mono_delivery_time = skb->mono_delivery_time;
  293. qp->q.meat += skb->len;
  294. qp->ecn |= ecn;
  295. add_frag_mem_limit(qp->q.fqdir, skb->truesize);
  296. if (offset == 0)
  297. qp->q.flags |= INET_FRAG_FIRST_IN;
  298. fragsize = skb->len + ihl;
  299. if (fragsize > qp->q.max_size)
  300. qp->q.max_size = fragsize;
  301. if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
  302. fragsize > qp->max_df_size)
  303. qp->max_df_size = fragsize;
  304. if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
  305. qp->q.meat == qp->q.len) {
  306. unsigned long orefdst = skb->_skb_refdst;
  307. skb->_skb_refdst = 0UL;
  308. err = ip_frag_reasm(qp, skb, prev_tail, dev);
  309. skb->_skb_refdst = orefdst;
  310. if (err)
  311. inet_frag_kill(&qp->q);
  312. return err;
  313. }
  314. skb_dst_drop(skb);
  315. return -EINPROGRESS;
  316. insert_error:
  317. if (err == IPFRAG_DUP) {
  318. kfree_skb(skb);
  319. return -EINVAL;
  320. }
  321. err = -EINVAL;
  322. __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
  323. discard_qp:
  324. inet_frag_kill(&qp->q);
  325. __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
  326. err:
  327. kfree_skb(skb);
  328. return err;
  329. }
  330. static bool ip_frag_coalesce_ok(const struct ipq *qp)
  331. {
  332. return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
  333. }
  334. /* Build a new IP datagram from all its fragments. */
  335. static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
  336. struct sk_buff *prev_tail, struct net_device *dev)
  337. {
  338. struct net *net = qp->q.fqdir->net;
  339. struct iphdr *iph;
  340. void *reasm_data;
  341. int len, err;
  342. u8 ecn;
  343. ipq_kill(qp);
  344. ecn = ip_frag_ecn_table[qp->ecn];
  345. if (unlikely(ecn == 0xff)) {
  346. err = -EINVAL;
  347. goto out_fail;
  348. }
  349. /* Make the one we just received the head. */
  350. reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
  351. if (!reasm_data)
  352. goto out_nomem;
  353. len = ip_hdrlen(skb) + qp->q.len;
  354. err = -E2BIG;
  355. if (len > 65535)
  356. goto out_oversize;
  357. inet_frag_reasm_finish(&qp->q, skb, reasm_data,
  358. ip_frag_coalesce_ok(qp));
  359. skb->dev = dev;
  360. IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
  361. iph = ip_hdr(skb);
  362. iph->tot_len = htons(len);
  363. iph->tos |= ecn;
  364. /* When we set IP_DF on a refragmented skb we must also force a
  365. * call to ip_fragment to avoid forwarding a DF-skb of size s while
  366. * original sender only sent fragments of size f (where f < s).
  367. *
  368. * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
  369. * frag seen to avoid sending tiny DF-fragments in case skb was built
  370. * from one very small df-fragment and one large non-df frag.
  371. */
  372. if (qp->max_df_size == qp->q.max_size) {
  373. IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
  374. iph->frag_off = htons(IP_DF);
  375. } else {
  376. iph->frag_off = 0;
  377. }
  378. ip_send_check(iph);
  379. __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
  380. qp->q.rb_fragments = RB_ROOT;
  381. qp->q.fragments_tail = NULL;
  382. qp->q.last_run_head = NULL;
  383. return 0;
  384. out_nomem:
  385. net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp);
  386. err = -ENOMEM;
  387. goto out_fail;
  388. out_oversize:
  389. net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
  390. out_fail:
  391. __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
  392. return err;
  393. }
  394. /* Process an incoming IP datagram fragment. */
  395. int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
  396. {
  397. struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
  398. int vif = l3mdev_master_ifindex_rcu(dev);
  399. struct ipq *qp;
  400. __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
  401. skb_orphan(skb);
  402. /* Lookup (or create) queue header */
  403. qp = ip_find(net, ip_hdr(skb), user, vif);
  404. if (qp) {
  405. int ret;
  406. spin_lock(&qp->q.lock);
  407. ret = ip_frag_queue(qp, skb);
  408. spin_unlock(&qp->q.lock);
  409. ipq_put(qp);
  410. return ret;
  411. }
  412. __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
  413. kfree_skb(skb);
  414. return -ENOMEM;
  415. }
  416. EXPORT_SYMBOL(ip_defrag);
  417. struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
  418. {
  419. struct iphdr iph;
  420. int netoff;
  421. u32 len;
  422. if (skb->protocol != htons(ETH_P_IP))
  423. return skb;
  424. netoff = skb_network_offset(skb);
  425. if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
  426. return skb;
  427. if (iph.ihl < 5 || iph.version != 4)
  428. return skb;
  429. len = ntohs(iph.tot_len);
  430. if (skb->len < netoff + len || len < (iph.ihl * 4))
  431. return skb;
  432. if (ip_is_fragment(&iph)) {
  433. skb = skb_share_check(skb, GFP_ATOMIC);
  434. if (skb) {
  435. if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
  436. kfree_skb(skb);
  437. return NULL;
  438. }
  439. if (pskb_trim_rcsum(skb, netoff + len)) {
  440. kfree_skb(skb);
  441. return NULL;
  442. }
  443. memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
  444. if (ip_defrag(net, skb, user))
  445. return NULL;
  446. skb_clear_hash(skb);
  447. }
  448. }
  449. return skb;
  450. }
  451. EXPORT_SYMBOL(ip_check_defrag);
  452. #ifdef CONFIG_SYSCTL
  453. static int dist_min;
  454. static struct ctl_table ip4_frags_ns_ctl_table[] = {
  455. {
  456. .procname = "ipfrag_high_thresh",
  457. .maxlen = sizeof(unsigned long),
  458. .mode = 0644,
  459. .proc_handler = proc_doulongvec_minmax,
  460. },
  461. {
  462. .procname = "ipfrag_low_thresh",
  463. .maxlen = sizeof(unsigned long),
  464. .mode = 0644,
  465. .proc_handler = proc_doulongvec_minmax,
  466. },
  467. {
  468. .procname = "ipfrag_time",
  469. .maxlen = sizeof(int),
  470. .mode = 0644,
  471. .proc_handler = proc_dointvec_jiffies,
  472. },
  473. {
  474. .procname = "ipfrag_max_dist",
  475. .maxlen = sizeof(int),
  476. .mode = 0644,
  477. .proc_handler = proc_dointvec_minmax,
  478. .extra1 = &dist_min,
  479. },
  480. { }
  481. };
  482. /* secret interval has been deprecated */
  483. static int ip4_frags_secret_interval_unused;
  484. static struct ctl_table ip4_frags_ctl_table[] = {
  485. {
  486. .procname = "ipfrag_secret_interval",
  487. .data = &ip4_frags_secret_interval_unused,
  488. .maxlen = sizeof(int),
  489. .mode = 0644,
  490. .proc_handler = proc_dointvec_jiffies,
  491. },
  492. { }
  493. };
  494. static int __net_init ip4_frags_ns_ctl_register(struct net *net)
  495. {
  496. struct ctl_table *table;
  497. struct ctl_table_header *hdr;
  498. table = ip4_frags_ns_ctl_table;
  499. if (!net_eq(net, &init_net)) {
  500. table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
  501. if (!table)
  502. goto err_alloc;
  503. }
  504. table[0].data = &net->ipv4.fqdir->high_thresh;
  505. table[0].extra1 = &net->ipv4.fqdir->low_thresh;
  506. table[1].data = &net->ipv4.fqdir->low_thresh;
  507. table[1].extra2 = &net->ipv4.fqdir->high_thresh;
  508. table[2].data = &net->ipv4.fqdir->timeout;
  509. table[3].data = &net->ipv4.fqdir->max_dist;
  510. hdr = register_net_sysctl(net, "net/ipv4", table);
  511. if (!hdr)
  512. goto err_reg;
  513. net->ipv4.frags_hdr = hdr;
  514. return 0;
  515. err_reg:
  516. if (!net_eq(net, &init_net))
  517. kfree(table);
  518. err_alloc:
  519. return -ENOMEM;
  520. }
  521. static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
  522. {
  523. struct ctl_table *table;
  524. table = net->ipv4.frags_hdr->ctl_table_arg;
  525. unregister_net_sysctl_table(net->ipv4.frags_hdr);
  526. kfree(table);
  527. }
  528. static void __init ip4_frags_ctl_register(void)
  529. {
  530. register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
  531. }
  532. #else
  533. static int ip4_frags_ns_ctl_register(struct net *net)
  534. {
  535. return 0;
  536. }
  537. static void ip4_frags_ns_ctl_unregister(struct net *net)
  538. {
  539. }
  540. static void __init ip4_frags_ctl_register(void)
  541. {
  542. }
  543. #endif
  544. static int __net_init ipv4_frags_init_net(struct net *net)
  545. {
  546. int res;
  547. res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
  548. if (res < 0)
  549. return res;
  550. /* Fragment cache limits.
  551. *
  552. * The fragment memory accounting code, (tries to) account for
  553. * the real memory usage, by measuring both the size of frag
  554. * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
  555. * and the SKB's truesize.
  556. *
  557. * A 64K fragment consumes 129736 bytes (44*2944)+200
  558. * (1500 truesize == 2944, sizeof(struct ipq) == 200)
  559. *
  560. * We will commit 4MB at one time. Should we cross that limit
  561. * we will prune down to 3MB, making room for approx 8 big 64K
  562. * fragments 8x128k.
  563. */
  564. net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
  565. net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024;
  566. /*
  567. * Important NOTE! Fragment queue must be destroyed before MSL expires.
  568. * RFC791 is wrong proposing to prolongate timer each fragment arrival
  569. * by TTL.
  570. */
  571. net->ipv4.fqdir->timeout = IP_FRAG_TIME;
  572. net->ipv4.fqdir->max_dist = 64;
  573. res = ip4_frags_ns_ctl_register(net);
  574. if (res < 0)
  575. fqdir_exit(net->ipv4.fqdir);
  576. return res;
  577. }
  578. static void __net_exit ipv4_frags_pre_exit_net(struct net *net)
  579. {
  580. fqdir_pre_exit(net->ipv4.fqdir);
  581. }
  582. static void __net_exit ipv4_frags_exit_net(struct net *net)
  583. {
  584. ip4_frags_ns_ctl_unregister(net);
  585. fqdir_exit(net->ipv4.fqdir);
  586. }
  587. static struct pernet_operations ip4_frags_ops = {
  588. .init = ipv4_frags_init_net,
  589. .pre_exit = ipv4_frags_pre_exit_net,
  590. .exit = ipv4_frags_exit_net,
  591. };
  592. static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
  593. {
  594. return jhash2(data,
  595. sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
  596. }
  597. static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
  598. {
  599. const struct inet_frag_queue *fq = data;
  600. return jhash2((const u32 *)&fq->key.v4,
  601. sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
  602. }
  603. static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
  604. {
  605. const struct frag_v4_compare_key *key = arg->key;
  606. const struct inet_frag_queue *fq = ptr;
  607. return !!memcmp(&fq->key, key, sizeof(*key));
  608. }
  609. static const struct rhashtable_params ip4_rhash_params = {
  610. .head_offset = offsetof(struct inet_frag_queue, node),
  611. .key_offset = offsetof(struct inet_frag_queue, key),
  612. .key_len = sizeof(struct frag_v4_compare_key),
  613. .hashfn = ip4_key_hashfn,
  614. .obj_hashfn = ip4_obj_hashfn,
  615. .obj_cmpfn = ip4_obj_cmpfn,
  616. .automatic_shrinking = true,
  617. };
  618. void __init ipfrag_init(void)
  619. {
  620. ip4_frags.constructor = ip4_frag_init;
  621. ip4_frags.destructor = ip4_frag_free;
  622. ip4_frags.qsize = sizeof(struct ipq);
  623. ip4_frags.frag_expire = ip_expire;
  624. ip4_frags.frags_cache_name = ip_frag_cache_name;
  625. ip4_frags.rhash_params = ip4_rhash_params;
  626. if (inet_frags_init(&ip4_frags))
  627. panic("IP: failed to allocate ip4_frags cache\n");
  628. ip4_frags_ctl_register();
  629. register_pernet_subsys(&ip4_frags_ops);
  630. }