sch_etf.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
  3. *
  4. * Authors: Jesus Sanchez-Palencia <[email protected]>
  5. * Vinicius Costa Gomes <[email protected]>
  6. */
  7. #include <linux/module.h>
  8. #include <linux/types.h>
  9. #include <linux/kernel.h>
  10. #include <linux/string.h>
  11. #include <linux/errno.h>
  12. #include <linux/errqueue.h>
  13. #include <linux/rbtree.h>
  14. #include <linux/skbuff.h>
  15. #include <linux/posix-timers.h>
  16. #include <net/netlink.h>
  17. #include <net/sch_generic.h>
  18. #include <net/pkt_sched.h>
  19. #include <net/sock.h>
  20. #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
  21. #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
  22. #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
  23. struct etf_sched_data {
  24. bool offload;
  25. bool deadline_mode;
  26. bool skip_sock_check;
  27. int clockid;
  28. int queue;
  29. s32 delta; /* in ns */
  30. ktime_t last; /* The txtime of the last skb sent to the netdevice. */
  31. struct rb_root_cached head;
  32. struct qdisc_watchdog watchdog;
  33. ktime_t (*get_time)(void);
  34. };
  35. static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
  36. [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
  37. };
  38. static inline int validate_input_params(struct tc_etf_qopt *qopt,
  39. struct netlink_ext_ack *extack)
  40. {
  41. /* Check if params comply to the following rules:
  42. * * Clockid and delta must be valid.
  43. *
  44. * * Dynamic clockids are not supported.
  45. *
  46. * * Delta must be a positive integer.
  47. *
  48. * Also note that for the HW offload case, we must
  49. * expect that system clocks have been synchronized to PHC.
  50. */
  51. if (qopt->clockid < 0) {
  52. NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
  53. return -ENOTSUPP;
  54. }
  55. if (qopt->clockid != CLOCK_TAI) {
  56. NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
  57. return -EINVAL;
  58. }
  59. if (qopt->delta < 0) {
  60. NL_SET_ERR_MSG(extack, "Delta must be positive");
  61. return -EINVAL;
  62. }
  63. return 0;
  64. }
  65. static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
  66. {
  67. struct etf_sched_data *q = qdisc_priv(sch);
  68. ktime_t txtime = nskb->tstamp;
  69. struct sock *sk = nskb->sk;
  70. ktime_t now;
  71. if (q->skip_sock_check)
  72. goto skip;
  73. if (!sk || !sk_fullsock(sk))
  74. return false;
  75. if (!sock_flag(sk, SOCK_TXTIME))
  76. return false;
  77. /* We don't perform crosstimestamping.
  78. * Drop if packet's clockid differs from qdisc's.
  79. */
  80. if (sk->sk_clockid != q->clockid)
  81. return false;
  82. if (sk->sk_txtime_deadline_mode != q->deadline_mode)
  83. return false;
  84. skip:
  85. now = q->get_time();
  86. if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
  87. return false;
  88. return true;
  89. }
  90. static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
  91. {
  92. struct etf_sched_data *q = qdisc_priv(sch);
  93. struct rb_node *p;
  94. p = rb_first_cached(&q->head);
  95. if (!p)
  96. return NULL;
  97. return rb_to_skb(p);
  98. }
  99. static void reset_watchdog(struct Qdisc *sch)
  100. {
  101. struct etf_sched_data *q = qdisc_priv(sch);
  102. struct sk_buff *skb = etf_peek_timesortedlist(sch);
  103. ktime_t next;
  104. if (!skb) {
  105. qdisc_watchdog_cancel(&q->watchdog);
  106. return;
  107. }
  108. next = ktime_sub_ns(skb->tstamp, q->delta);
  109. qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
  110. }
  111. static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
  112. {
  113. struct sock_exterr_skb *serr;
  114. struct sk_buff *clone;
  115. ktime_t txtime = skb->tstamp;
  116. struct sock *sk = skb->sk;
  117. if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
  118. return;
  119. clone = skb_clone(skb, GFP_ATOMIC);
  120. if (!clone)
  121. return;
  122. serr = SKB_EXT_ERR(clone);
  123. serr->ee.ee_errno = err;
  124. serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
  125. serr->ee.ee_type = 0;
  126. serr->ee.ee_code = code;
  127. serr->ee.ee_pad = 0;
  128. serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
  129. serr->ee.ee_info = txtime; /* low part of tstamp */
  130. if (sock_queue_err_skb(sk, clone))
  131. kfree_skb(clone);
  132. }
  133. static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
  134. struct sk_buff **to_free)
  135. {
  136. struct etf_sched_data *q = qdisc_priv(sch);
  137. struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
  138. ktime_t txtime = nskb->tstamp;
  139. bool leftmost = true;
  140. if (!is_packet_valid(sch, nskb)) {
  141. report_sock_error(nskb, EINVAL,
  142. SO_EE_CODE_TXTIME_INVALID_PARAM);
  143. return qdisc_drop(nskb, sch, to_free);
  144. }
  145. while (*p) {
  146. struct sk_buff *skb;
  147. parent = *p;
  148. skb = rb_to_skb(parent);
  149. if (ktime_compare(txtime, skb->tstamp) >= 0) {
  150. p = &parent->rb_right;
  151. leftmost = false;
  152. } else {
  153. p = &parent->rb_left;
  154. }
  155. }
  156. rb_link_node(&nskb->rbnode, parent, p);
  157. rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
  158. qdisc_qstats_backlog_inc(sch, nskb);
  159. sch->q.qlen++;
  160. /* Now we may need to re-arm the qdisc watchdog for the next packet. */
  161. reset_watchdog(sch);
  162. return NET_XMIT_SUCCESS;
  163. }
  164. static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
  165. ktime_t now)
  166. {
  167. struct etf_sched_data *q = qdisc_priv(sch);
  168. struct sk_buff *to_free = NULL;
  169. struct sk_buff *tmp = NULL;
  170. skb_rbtree_walk_from_safe(skb, tmp) {
  171. if (ktime_after(skb->tstamp, now))
  172. break;
  173. rb_erase_cached(&skb->rbnode, &q->head);
  174. /* The rbnode field in the skb re-uses these fields, now that
  175. * we are done with the rbnode, reset them.
  176. */
  177. skb->next = NULL;
  178. skb->prev = NULL;
  179. skb->dev = qdisc_dev(sch);
  180. report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
  181. qdisc_qstats_backlog_dec(sch, skb);
  182. qdisc_drop(skb, sch, &to_free);
  183. qdisc_qstats_overlimit(sch);
  184. sch->q.qlen--;
  185. }
  186. kfree_skb_list(to_free);
  187. }
  188. static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
  189. {
  190. struct etf_sched_data *q = qdisc_priv(sch);
  191. rb_erase_cached(&skb->rbnode, &q->head);
  192. /* The rbnode field in the skb re-uses these fields, now that
  193. * we are done with the rbnode, reset them.
  194. */
  195. skb->next = NULL;
  196. skb->prev = NULL;
  197. skb->dev = qdisc_dev(sch);
  198. qdisc_qstats_backlog_dec(sch, skb);
  199. qdisc_bstats_update(sch, skb);
  200. q->last = skb->tstamp;
  201. sch->q.qlen--;
  202. }
  203. static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
  204. {
  205. struct etf_sched_data *q = qdisc_priv(sch);
  206. struct sk_buff *skb;
  207. ktime_t now, next;
  208. skb = etf_peek_timesortedlist(sch);
  209. if (!skb)
  210. return NULL;
  211. now = q->get_time();
  212. /* Drop if packet has expired while in queue. */
  213. if (ktime_before(skb->tstamp, now)) {
  214. timesortedlist_drop(sch, skb, now);
  215. skb = NULL;
  216. goto out;
  217. }
  218. /* When in deadline mode, dequeue as soon as possible and change the
  219. * txtime from deadline to (now + delta).
  220. */
  221. if (q->deadline_mode) {
  222. timesortedlist_remove(sch, skb);
  223. skb->tstamp = now;
  224. goto out;
  225. }
  226. next = ktime_sub_ns(skb->tstamp, q->delta);
  227. /* Dequeue only if now is within the [txtime - delta, txtime] range. */
  228. if (ktime_after(now, next))
  229. timesortedlist_remove(sch, skb);
  230. else
  231. skb = NULL;
  232. out:
  233. /* Now we may need to re-arm the qdisc watchdog for the next packet. */
  234. reset_watchdog(sch);
  235. return skb;
  236. }
  237. static void etf_disable_offload(struct net_device *dev,
  238. struct etf_sched_data *q)
  239. {
  240. struct tc_etf_qopt_offload etf = { };
  241. const struct net_device_ops *ops;
  242. int err;
  243. if (!q->offload)
  244. return;
  245. ops = dev->netdev_ops;
  246. if (!ops->ndo_setup_tc)
  247. return;
  248. etf.queue = q->queue;
  249. etf.enable = 0;
  250. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
  251. if (err < 0)
  252. pr_warn("Couldn't disable ETF offload for queue %d\n",
  253. etf.queue);
  254. }
  255. static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
  256. struct netlink_ext_ack *extack)
  257. {
  258. const struct net_device_ops *ops = dev->netdev_ops;
  259. struct tc_etf_qopt_offload etf = { };
  260. int err;
  261. if (!ops->ndo_setup_tc) {
  262. NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
  263. return -EOPNOTSUPP;
  264. }
  265. etf.queue = q->queue;
  266. etf.enable = 1;
  267. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
  268. if (err < 0) {
  269. NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
  270. return err;
  271. }
  272. return 0;
  273. }
  274. static int etf_init(struct Qdisc *sch, struct nlattr *opt,
  275. struct netlink_ext_ack *extack)
  276. {
  277. struct etf_sched_data *q = qdisc_priv(sch);
  278. struct net_device *dev = qdisc_dev(sch);
  279. struct nlattr *tb[TCA_ETF_MAX + 1];
  280. struct tc_etf_qopt *qopt;
  281. int err;
  282. if (!opt) {
  283. NL_SET_ERR_MSG(extack,
  284. "Missing ETF qdisc options which are mandatory");
  285. return -EINVAL;
  286. }
  287. err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
  288. extack);
  289. if (err < 0)
  290. return err;
  291. if (!tb[TCA_ETF_PARMS]) {
  292. NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
  293. return -EINVAL;
  294. }
  295. qopt = nla_data(tb[TCA_ETF_PARMS]);
  296. pr_debug("delta %d clockid %d offload %s deadline %s\n",
  297. qopt->delta, qopt->clockid,
  298. OFFLOAD_IS_ON(qopt) ? "on" : "off",
  299. DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
  300. err = validate_input_params(qopt, extack);
  301. if (err < 0)
  302. return err;
  303. q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
  304. if (OFFLOAD_IS_ON(qopt)) {
  305. err = etf_enable_offload(dev, q, extack);
  306. if (err < 0)
  307. return err;
  308. }
  309. /* Everything went OK, save the parameters used. */
  310. q->delta = qopt->delta;
  311. q->clockid = qopt->clockid;
  312. q->offload = OFFLOAD_IS_ON(qopt);
  313. q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
  314. q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
  315. switch (q->clockid) {
  316. case CLOCK_REALTIME:
  317. q->get_time = ktime_get_real;
  318. break;
  319. case CLOCK_MONOTONIC:
  320. q->get_time = ktime_get;
  321. break;
  322. case CLOCK_BOOTTIME:
  323. q->get_time = ktime_get_boottime;
  324. break;
  325. case CLOCK_TAI:
  326. q->get_time = ktime_get_clocktai;
  327. break;
  328. default:
  329. NL_SET_ERR_MSG(extack, "Clockid is not supported");
  330. return -ENOTSUPP;
  331. }
  332. qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
  333. return 0;
  334. }
  335. static void timesortedlist_clear(struct Qdisc *sch)
  336. {
  337. struct etf_sched_data *q = qdisc_priv(sch);
  338. struct rb_node *p = rb_first_cached(&q->head);
  339. while (p) {
  340. struct sk_buff *skb = rb_to_skb(p);
  341. p = rb_next(p);
  342. rb_erase_cached(&skb->rbnode, &q->head);
  343. rtnl_kfree_skbs(skb, skb);
  344. sch->q.qlen--;
  345. }
  346. }
  347. static void etf_reset(struct Qdisc *sch)
  348. {
  349. struct etf_sched_data *q = qdisc_priv(sch);
  350. /* Only cancel watchdog if it's been initialized. */
  351. if (q->watchdog.qdisc == sch)
  352. qdisc_watchdog_cancel(&q->watchdog);
  353. /* No matter which mode we are on, it's safe to clear both lists. */
  354. timesortedlist_clear(sch);
  355. __qdisc_reset_queue(&sch->q);
  356. q->last = 0;
  357. }
  358. static void etf_destroy(struct Qdisc *sch)
  359. {
  360. struct etf_sched_data *q = qdisc_priv(sch);
  361. struct net_device *dev = qdisc_dev(sch);
  362. /* Only cancel watchdog if it's been initialized. */
  363. if (q->watchdog.qdisc == sch)
  364. qdisc_watchdog_cancel(&q->watchdog);
  365. etf_disable_offload(dev, q);
  366. }
  367. static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
  368. {
  369. struct etf_sched_data *q = qdisc_priv(sch);
  370. struct tc_etf_qopt opt = { };
  371. struct nlattr *nest;
  372. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  373. if (!nest)
  374. goto nla_put_failure;
  375. opt.delta = q->delta;
  376. opt.clockid = q->clockid;
  377. if (q->offload)
  378. opt.flags |= TC_ETF_OFFLOAD_ON;
  379. if (q->deadline_mode)
  380. opt.flags |= TC_ETF_DEADLINE_MODE_ON;
  381. if (q->skip_sock_check)
  382. opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
  383. if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
  384. goto nla_put_failure;
  385. return nla_nest_end(skb, nest);
  386. nla_put_failure:
  387. nla_nest_cancel(skb, nest);
  388. return -1;
  389. }
  390. static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
  391. .id = "etf",
  392. .priv_size = sizeof(struct etf_sched_data),
  393. .enqueue = etf_enqueue_timesortedlist,
  394. .dequeue = etf_dequeue_timesortedlist,
  395. .peek = etf_peek_timesortedlist,
  396. .init = etf_init,
  397. .reset = etf_reset,
  398. .destroy = etf_destroy,
  399. .dump = etf_dump,
  400. .owner = THIS_MODULE,
  401. };
  402. static int __init etf_module_init(void)
  403. {
  404. return register_qdisc(&etf_qdisc_ops);
  405. }
  406. static void __exit etf_module_exit(void)
  407. {
  408. unregister_qdisc(&etf_qdisc_ops);
  409. }
  410. module_init(etf_module_init)
  411. module_exit(etf_module_exit)
  412. MODULE_LICENSE("GPL");