ackvec.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * net/dccp/ackvec.c
  4. *
  5. * An implementation of Ack Vectors for the DCCP protocol
  6. * Copyright (c) 2007 University of Aberdeen, Scotland, UK
  7. * Copyright (c) 2005 Arnaldo Carvalho de Melo <[email protected]>
  8. */
  9. #include "dccp.h"
  10. #include <linux/kernel.h>
  11. #include <linux/slab.h>
  12. #include <linux/export.h>
  13. static struct kmem_cache *dccp_ackvec_slab;
  14. static struct kmem_cache *dccp_ackvec_record_slab;
  15. struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
  16. {
  17. struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
  18. if (av != NULL) {
  19. av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
  20. INIT_LIST_HEAD(&av->av_records);
  21. }
  22. return av;
  23. }
  24. static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
  25. {
  26. struct dccp_ackvec_record *cur, *next;
  27. list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
  28. kmem_cache_free(dccp_ackvec_record_slab, cur);
  29. INIT_LIST_HEAD(&av->av_records);
  30. }
  31. void dccp_ackvec_free(struct dccp_ackvec *av)
  32. {
  33. if (likely(av != NULL)) {
  34. dccp_ackvec_purge_records(av);
  35. kmem_cache_free(dccp_ackvec_slab, av);
  36. }
  37. }
  38. /**
  39. * dccp_ackvec_update_records - Record information about sent Ack Vectors
  40. * @av: Ack Vector records to update
  41. * @seqno: Sequence number of the packet carrying the Ack Vector just sent
  42. * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
  43. */
  44. int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
  45. {
  46. struct dccp_ackvec_record *avr;
  47. avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
  48. if (avr == NULL)
  49. return -ENOBUFS;
  50. avr->avr_ack_seqno = seqno;
  51. avr->avr_ack_ptr = av->av_buf_head;
  52. avr->avr_ack_ackno = av->av_buf_ackno;
  53. avr->avr_ack_nonce = nonce_sum;
  54. avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
  55. /*
  56. * When the buffer overflows, we keep no more than one record. This is
  57. * the simplest way of disambiguating sender-Acks dating from before the
  58. * overflow from sender-Acks which refer to after the overflow; a simple
  59. * solution is preferable here since we are handling an exception.
  60. */
  61. if (av->av_overflow)
  62. dccp_ackvec_purge_records(av);
  63. /*
  64. * Since GSS is incremented for each packet, the list is automatically
  65. * arranged in descending order of @ack_seqno.
  66. */
  67. list_add(&avr->avr_node, &av->av_records);
  68. dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
  69. (unsigned long long)avr->avr_ack_seqno,
  70. (unsigned long long)avr->avr_ack_ackno,
  71. avr->avr_ack_runlen);
  72. return 0;
  73. }
  74. static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
  75. const u64 ackno)
  76. {
  77. struct dccp_ackvec_record *avr;
  78. /*
  79. * Exploit that records are inserted in descending order of sequence
  80. * number, start with the oldest record first. If @ackno is `before'
  81. * the earliest ack_ackno, the packet is too old to be considered.
  82. */
  83. list_for_each_entry_reverse(avr, av_list, avr_node) {
  84. if (avr->avr_ack_seqno == ackno)
  85. return avr;
  86. if (before48(ackno, avr->avr_ack_seqno))
  87. break;
  88. }
  89. return NULL;
  90. }
  91. /*
  92. * Buffer index and length computation using modulo-buffersize arithmetic.
  93. * Note that, as pointers move from right to left, head is `before' tail.
  94. */
  95. static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
  96. {
  97. return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
  98. }
  99. static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
  100. {
  101. return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
  102. }
  103. u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
  104. {
  105. if (unlikely(av->av_overflow))
  106. return DCCPAV_MAX_ACKVEC_LEN;
  107. return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
  108. }
  109. /**
  110. * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
  111. * @av: non-empty buffer to update
  112. * @distance: negative or zero distance of @seqno from buf_ackno downward
  113. * @seqno: the (old) sequence number whose record is to be updated
  114. * @state: state in which packet carrying @seqno was received
  115. */
  116. static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
  117. u64 seqno, enum dccp_ackvec_states state)
  118. {
  119. u16 ptr = av->av_buf_head;
  120. BUG_ON(distance > 0);
  121. if (unlikely(dccp_ackvec_is_empty(av)))
  122. return;
  123. do {
  124. u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
  125. if (distance + runlen >= 0) {
  126. /*
  127. * Only update the state if packet has not been received
  128. * yet. This is OK as per the second table in RFC 4340,
  129. * 11.4.1; i.e. here we are using the following table:
  130. * RECEIVED
  131. * 0 1 3
  132. * S +---+---+---+
  133. * T 0 | 0 | 0 | 0 |
  134. * O +---+---+---+
  135. * R 1 | 1 | 1 | 1 |
  136. * E +---+---+---+
  137. * D 3 | 0 | 1 | 3 |
  138. * +---+---+---+
  139. * The "Not Received" state was set by reserve_seats().
  140. */
  141. if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
  142. av->av_buf[ptr] = state;
  143. else
  144. dccp_pr_debug("Not changing %llu state to %u\n",
  145. (unsigned long long)seqno, state);
  146. break;
  147. }
  148. distance += runlen + 1;
  149. ptr = __ackvec_idx_add(ptr, 1);
  150. } while (ptr != av->av_buf_tail);
  151. }
  152. /* Mark @num entries after buf_head as "Not yet received". */
  153. static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
  154. {
  155. u16 start = __ackvec_idx_add(av->av_buf_head, 1),
  156. len = DCCPAV_MAX_ACKVEC_LEN - start;
  157. /* check for buffer wrap-around */
  158. if (num > len) {
  159. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
  160. start = 0;
  161. num -= len;
  162. }
  163. if (num)
  164. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
  165. }
  166. /**
  167. * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
  168. * @av: container of buffer to update (can be empty or non-empty)
  169. * @num_packets: number of packets to register (must be >= 1)
  170. * @seqno: sequence number of the first packet in @num_packets
  171. * @state: state in which packet carrying @seqno was received
  172. */
  173. static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
  174. u64 seqno, enum dccp_ackvec_states state)
  175. {
  176. u32 num_cells = num_packets;
  177. if (num_packets > DCCPAV_BURST_THRESH) {
  178. u32 lost_packets = num_packets - 1;
  179. DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
  180. /*
  181. * We received 1 packet and have a loss of size "num_packets-1"
  182. * which we squeeze into num_cells-1 rather than reserving an
  183. * entire byte for each lost packet.
  184. * The reason is that the vector grows in O(burst_length); when
  185. * it grows too large there will no room left for the payload.
  186. * This is a trade-off: if a few packets out of the burst show
  187. * up later, their state will not be changed; it is simply too
  188. * costly to reshuffle/reallocate/copy the buffer each time.
  189. * Should such problems persist, we will need to switch to a
  190. * different underlying data structure.
  191. */
  192. for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
  193. u8 len = min_t(u32, lost_packets, DCCPAV_MAX_RUNLEN);
  194. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
  195. av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
  196. lost_packets -= len;
  197. }
  198. }
  199. if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
  200. DCCP_CRIT("Ack Vector buffer overflow: dropping old entries");
  201. av->av_overflow = true;
  202. }
  203. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
  204. if (av->av_overflow)
  205. av->av_buf_tail = av->av_buf_head;
  206. av->av_buf[av->av_buf_head] = state;
  207. av->av_buf_ackno = seqno;
  208. if (num_packets > 1)
  209. dccp_ackvec_reserve_seats(av, num_packets - 1);
  210. }
  211. /**
  212. * dccp_ackvec_input - Register incoming packet in the buffer
  213. * @av: Ack Vector to register packet to
  214. * @skb: Packet to register
  215. */
  216. void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
  217. {
  218. u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
  219. enum dccp_ackvec_states state = DCCPAV_RECEIVED;
  220. if (dccp_ackvec_is_empty(av)) {
  221. dccp_ackvec_add_new(av, 1, seqno, state);
  222. av->av_tail_ackno = seqno;
  223. } else {
  224. s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
  225. u8 *current_head = av->av_buf + av->av_buf_head;
  226. if (num_packets == 1 &&
  227. dccp_ackvec_state(current_head) == state &&
  228. dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
  229. *current_head += 1;
  230. av->av_buf_ackno = seqno;
  231. } else if (num_packets > 0) {
  232. dccp_ackvec_add_new(av, num_packets, seqno, state);
  233. } else {
  234. dccp_ackvec_update_old(av, num_packets, seqno, state);
  235. }
  236. }
  237. }
  238. /**
  239. * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
  240. * @av: Ack Vector record to clean
  241. * @ackno: last Ack Vector which has been acknowledged
  242. *
  243. * This routine is called when the peer acknowledges the receipt of Ack Vectors
  244. * up to and including @ackno. While based on section A.3 of RFC 4340, here
  245. * are additional precautions to prevent corrupted buffer state. In particular,
  246. * we use tail_ackno to identify outdated records; it always marks the earliest
  247. * packet of group (2) in 11.4.2.
  248. */
  249. void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
  250. {
  251. struct dccp_ackvec_record *avr, *next;
  252. u8 runlen_now, eff_runlen;
  253. s64 delta;
  254. avr = dccp_ackvec_lookup(&av->av_records, ackno);
  255. if (avr == NULL)
  256. return;
  257. /*
  258. * Deal with outdated acknowledgments: this arises when e.g. there are
  259. * several old records and the acks from the peer come in slowly. In
  260. * that case we may still have records that pre-date tail_ackno.
  261. */
  262. delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
  263. if (delta < 0)
  264. goto free_records;
  265. /*
  266. * Deal with overlapping Ack Vectors: don't subtract more than the
  267. * number of packets between tail_ackno and ack_ackno.
  268. */
  269. eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
  270. runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
  271. /*
  272. * The run length of Ack Vector cells does not decrease over time. If
  273. * the run length is the same as at the time the Ack Vector was sent, we
  274. * free the ack_ptr cell. That cell can however not be freed if the run
  275. * length has increased: in this case we need to move the tail pointer
  276. * backwards (towards higher indices), to its next-oldest neighbour.
  277. */
  278. if (runlen_now > eff_runlen) {
  279. av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
  280. av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
  281. /* This move may not have cleared the overflow flag. */
  282. if (av->av_overflow)
  283. av->av_overflow = (av->av_buf_head == av->av_buf_tail);
  284. } else {
  285. av->av_buf_tail = avr->avr_ack_ptr;
  286. /*
  287. * We have made sure that avr points to a valid cell within the
  288. * buffer. This cell is either older than head, or equals head
  289. * (empty buffer): in both cases we no longer have any overflow.
  290. */
  291. av->av_overflow = 0;
  292. }
  293. /*
  294. * The peer has acknowledged up to and including ack_ackno. Hence the
  295. * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
  296. */
  297. av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
  298. free_records:
  299. list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
  300. list_del(&avr->avr_node);
  301. kmem_cache_free(dccp_ackvec_record_slab, avr);
  302. }
  303. }
  304. /*
  305. * Routines to keep track of Ack Vectors received in an skb
  306. */
  307. int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
  308. {
  309. struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
  310. if (new == NULL)
  311. return -ENOBUFS;
  312. new->vec = vec;
  313. new->len = len;
  314. new->nonce = nonce;
  315. list_add_tail(&new->node, head);
  316. return 0;
  317. }
  318. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
  319. void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
  320. {
  321. struct dccp_ackvec_parsed *cur, *next;
  322. list_for_each_entry_safe(cur, next, parsed_chunks, node)
  323. kfree(cur);
  324. INIT_LIST_HEAD(parsed_chunks);
  325. }
  326. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
  327. int __init dccp_ackvec_init(void)
  328. {
  329. dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
  330. sizeof(struct dccp_ackvec), 0,
  331. SLAB_HWCACHE_ALIGN, NULL);
  332. if (dccp_ackvec_slab == NULL)
  333. goto out_err;
  334. dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
  335. sizeof(struct dccp_ackvec_record),
  336. 0, SLAB_HWCACHE_ALIGN, NULL);
  337. if (dccp_ackvec_record_slab == NULL)
  338. goto out_destroy_slab;
  339. return 0;
  340. out_destroy_slab:
  341. kmem_cache_destroy(dccp_ackvec_slab);
  342. dccp_ackvec_slab = NULL;
  343. out_err:
  344. DCCP_CRIT("Unable to create Ack Vector slab cache");
  345. return -ENOBUFS;
  346. }
  347. void dccp_ackvec_exit(void)
  348. {
  349. kmem_cache_destroy(dccp_ackvec_slab);
  350. dccp_ackvec_slab = NULL;
  351. kmem_cache_destroy(dccp_ackvec_record_slab);
  352. dccp_ackvec_record_slab = NULL;
  353. }