sch_cbs.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * net/sched/sch_cbs.c Credit Based Shaper
  4. *
  5. * Authors: Vinicius Costa Gomes <[email protected]>
  6. */
  7. /* Credit Based Shaper (CBS)
  8. * =========================
  9. *
  10. * This is a simple rate-limiting shaper aimed at TSN applications on
  11. * systems with known traffic workloads.
  12. *
  13. * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
  14. * Section 8.6.8.2, and explained in more detail in the Annex L of the
  15. * same specification.
  16. *
  17. * There are four tunables to be considered:
  18. *
  19. * 'idleslope': Idleslope is the rate of credits that is
  20. * accumulated (in kilobits per second) when there is at least
  21. * one packet waiting for transmission. Packets are transmitted
  22. * when the current value of credits is equal or greater than
  23. * zero. When there is no packet to be transmitted the amount of
  24. * credits is set to zero. This is the main tunable of the CBS
  25. * algorithm.
  26. *
  27. * 'sendslope':
  28. * Sendslope is the rate of credits that is depleted (it should be a
  29. * negative number of kilobits per second) when a transmission is
  30. * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
  31. * 8.6.8.2 item g):
  32. *
  33. * sendslope = idleslope - port_transmit_rate
  34. *
  35. * 'hicredit': Hicredit defines the maximum amount of credits (in
  36. * bytes) that can be accumulated. Hicredit depends on the
  37. * characteristics of interfering traffic,
  38. * 'max_interference_size' is the maximum size of any burst of
  39. * traffic that can delay the transmission of a frame that is
  40. * available for transmission for this traffic class, (IEEE
  41. * 802.1Q-2014 Annex L, Equation L-3):
  42. *
  43. * hicredit = max_interference_size * (idleslope / port_transmit_rate)
  44. *
  45. * 'locredit': Locredit is the minimum amount of credits that can
  46. * be reached. It is a function of the traffic flowing through
  47. * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
  48. *
  49. * locredit = max_frame_size * (sendslope / port_transmit_rate)
  50. */
  51. #include <linux/ethtool.h>
  52. #include <linux/module.h>
  53. #include <linux/types.h>
  54. #include <linux/kernel.h>
  55. #include <linux/string.h>
  56. #include <linux/errno.h>
  57. #include <linux/skbuff.h>
  58. #include <net/netevent.h>
  59. #include <net/netlink.h>
  60. #include <net/sch_generic.h>
  61. #include <net/pkt_sched.h>
  62. static LIST_HEAD(cbs_list);
  63. static DEFINE_SPINLOCK(cbs_list_lock);
  64. #define BYTES_PER_KBIT (1000LL / 8)
  65. struct cbs_sched_data {
  66. bool offload;
  67. int queue;
  68. atomic64_t port_rate; /* in bytes/s */
  69. s64 last; /* timestamp in ns */
  70. s64 credits; /* in bytes */
  71. s32 locredit; /* in bytes */
  72. s32 hicredit; /* in bytes */
  73. s64 sendslope; /* in bytes/s */
  74. s64 idleslope; /* in bytes/s */
  75. struct qdisc_watchdog watchdog;
  76. int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
  77. struct sk_buff **to_free);
  78. struct sk_buff *(*dequeue)(struct Qdisc *sch);
  79. struct Qdisc *qdisc;
  80. struct list_head cbs_list;
  81. };
  82. static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  83. struct Qdisc *child,
  84. struct sk_buff **to_free)
  85. {
  86. unsigned int len = qdisc_pkt_len(skb);
  87. int err;
  88. err = child->ops->enqueue(skb, child, to_free);
  89. if (err != NET_XMIT_SUCCESS)
  90. return err;
  91. sch->qstats.backlog += len;
  92. sch->q.qlen++;
  93. return NET_XMIT_SUCCESS;
  94. }
  95. static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
  96. struct sk_buff **to_free)
  97. {
  98. struct cbs_sched_data *q = qdisc_priv(sch);
  99. struct Qdisc *qdisc = q->qdisc;
  100. return cbs_child_enqueue(skb, sch, qdisc, to_free);
  101. }
  102. static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
  103. struct sk_buff **to_free)
  104. {
  105. struct cbs_sched_data *q = qdisc_priv(sch);
  106. struct Qdisc *qdisc = q->qdisc;
  107. if (sch->q.qlen == 0 && q->credits > 0) {
  108. /* We need to stop accumulating credits when there's
  109. * no enqueued packets and q->credits is positive.
  110. */
  111. q->credits = 0;
  112. q->last = ktime_get_ns();
  113. }
  114. return cbs_child_enqueue(skb, sch, qdisc, to_free);
  115. }
  116. static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  117. struct sk_buff **to_free)
  118. {
  119. struct cbs_sched_data *q = qdisc_priv(sch);
  120. return q->enqueue(skb, sch, to_free);
  121. }
  122. /* timediff is in ns, slope is in bytes/s */
  123. static s64 timediff_to_credits(s64 timediff, s64 slope)
  124. {
  125. return div64_s64(timediff * slope, NSEC_PER_SEC);
  126. }
  127. static s64 delay_from_credits(s64 credits, s64 slope)
  128. {
  129. if (unlikely(slope == 0))
  130. return S64_MAX;
  131. return div64_s64(-credits * NSEC_PER_SEC, slope);
  132. }
  133. static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
  134. {
  135. if (unlikely(port_rate == 0))
  136. return S64_MAX;
  137. return div64_s64(len * slope, port_rate);
  138. }
  139. static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child)
  140. {
  141. struct sk_buff *skb;
  142. skb = child->ops->dequeue(child);
  143. if (!skb)
  144. return NULL;
  145. qdisc_qstats_backlog_dec(sch, skb);
  146. qdisc_bstats_update(sch, skb);
  147. sch->q.qlen--;
  148. return skb;
  149. }
  150. static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
  151. {
  152. struct cbs_sched_data *q = qdisc_priv(sch);
  153. struct Qdisc *qdisc = q->qdisc;
  154. s64 now = ktime_get_ns();
  155. struct sk_buff *skb;
  156. s64 credits;
  157. int len;
  158. /* The previous packet is still being sent */
  159. if (now < q->last) {
  160. qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
  161. return NULL;
  162. }
  163. if (q->credits < 0) {
  164. credits = timediff_to_credits(now - q->last, q->idleslope);
  165. credits = q->credits + credits;
  166. q->credits = min_t(s64, credits, q->hicredit);
  167. if (q->credits < 0) {
  168. s64 delay;
  169. delay = delay_from_credits(q->credits, q->idleslope);
  170. qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
  171. q->last = now;
  172. return NULL;
  173. }
  174. }
  175. skb = cbs_child_dequeue(sch, qdisc);
  176. if (!skb)
  177. return NULL;
  178. len = qdisc_pkt_len(skb);
  179. /* As sendslope is a negative number, this will decrease the
  180. * amount of q->credits.
  181. */
  182. credits = credits_from_len(len, q->sendslope,
  183. atomic64_read(&q->port_rate));
  184. credits += q->credits;
  185. q->credits = max_t(s64, credits, q->locredit);
  186. /* Estimate of the transmission of the last byte of the packet in ns */
  187. if (unlikely(atomic64_read(&q->port_rate) == 0))
  188. q->last = now;
  189. else
  190. q->last = now + div64_s64(len * NSEC_PER_SEC,
  191. atomic64_read(&q->port_rate));
  192. return skb;
  193. }
  194. static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
  195. {
  196. struct cbs_sched_data *q = qdisc_priv(sch);
  197. struct Qdisc *qdisc = q->qdisc;
  198. return cbs_child_dequeue(sch, qdisc);
  199. }
  200. static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
  201. {
  202. struct cbs_sched_data *q = qdisc_priv(sch);
  203. return q->dequeue(sch);
  204. }
  205. static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
  206. [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
  207. };
  208. static void cbs_disable_offload(struct net_device *dev,
  209. struct cbs_sched_data *q)
  210. {
  211. struct tc_cbs_qopt_offload cbs = { };
  212. const struct net_device_ops *ops;
  213. int err;
  214. if (!q->offload)
  215. return;
  216. q->enqueue = cbs_enqueue_soft;
  217. q->dequeue = cbs_dequeue_soft;
  218. ops = dev->netdev_ops;
  219. if (!ops->ndo_setup_tc)
  220. return;
  221. cbs.queue = q->queue;
  222. cbs.enable = 0;
  223. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
  224. if (err < 0)
  225. pr_warn("Couldn't disable CBS offload for queue %d\n",
  226. cbs.queue);
  227. }
  228. static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
  229. const struct tc_cbs_qopt *opt,
  230. struct netlink_ext_ack *extack)
  231. {
  232. const struct net_device_ops *ops = dev->netdev_ops;
  233. struct tc_cbs_qopt_offload cbs = { };
  234. int err;
  235. if (!ops->ndo_setup_tc) {
  236. NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload");
  237. return -EOPNOTSUPP;
  238. }
  239. cbs.queue = q->queue;
  240. cbs.enable = 1;
  241. cbs.hicredit = opt->hicredit;
  242. cbs.locredit = opt->locredit;
  243. cbs.idleslope = opt->idleslope;
  244. cbs.sendslope = opt->sendslope;
  245. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
  246. if (err < 0) {
  247. NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload");
  248. return err;
  249. }
  250. q->enqueue = cbs_enqueue_offload;
  251. q->dequeue = cbs_dequeue_offload;
  252. return 0;
  253. }
  254. static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
  255. {
  256. struct ethtool_link_ksettings ecmd;
  257. int speed = SPEED_10;
  258. int port_rate;
  259. int err;
  260. err = __ethtool_get_link_ksettings(dev, &ecmd);
  261. if (err < 0)
  262. goto skip;
  263. if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
  264. speed = ecmd.base.speed;
  265. skip:
  266. port_rate = speed * 1000 * BYTES_PER_KBIT;
  267. atomic64_set(&q->port_rate, port_rate);
  268. netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
  269. dev->name, (long long)atomic64_read(&q->port_rate),
  270. ecmd.base.speed);
  271. }
  272. static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
  273. void *ptr)
  274. {
  275. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  276. struct cbs_sched_data *q;
  277. struct net_device *qdev;
  278. bool found = false;
  279. ASSERT_RTNL();
  280. if (event != NETDEV_UP && event != NETDEV_CHANGE)
  281. return NOTIFY_DONE;
  282. spin_lock(&cbs_list_lock);
  283. list_for_each_entry(q, &cbs_list, cbs_list) {
  284. qdev = qdisc_dev(q->qdisc);
  285. if (qdev == dev) {
  286. found = true;
  287. break;
  288. }
  289. }
  290. spin_unlock(&cbs_list_lock);
  291. if (found)
  292. cbs_set_port_rate(dev, q);
  293. return NOTIFY_DONE;
  294. }
  295. static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
  296. struct netlink_ext_ack *extack)
  297. {
  298. struct cbs_sched_data *q = qdisc_priv(sch);
  299. struct net_device *dev = qdisc_dev(sch);
  300. struct nlattr *tb[TCA_CBS_MAX + 1];
  301. struct tc_cbs_qopt *qopt;
  302. int err;
  303. err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy,
  304. extack);
  305. if (err < 0)
  306. return err;
  307. if (!tb[TCA_CBS_PARMS]) {
  308. NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory");
  309. return -EINVAL;
  310. }
  311. qopt = nla_data(tb[TCA_CBS_PARMS]);
  312. if (!qopt->offload) {
  313. cbs_set_port_rate(dev, q);
  314. cbs_disable_offload(dev, q);
  315. } else {
  316. err = cbs_enable_offload(dev, q, qopt, extack);
  317. if (err < 0)
  318. return err;
  319. }
  320. /* Everything went OK, save the parameters used. */
  321. q->hicredit = qopt->hicredit;
  322. q->locredit = qopt->locredit;
  323. q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
  324. q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
  325. q->offload = qopt->offload;
  326. return 0;
  327. }
  328. static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
  329. struct netlink_ext_ack *extack)
  330. {
  331. struct cbs_sched_data *q = qdisc_priv(sch);
  332. struct net_device *dev = qdisc_dev(sch);
  333. if (!opt) {
  334. NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
  335. return -EINVAL;
  336. }
  337. q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
  338. sch->handle, extack);
  339. if (!q->qdisc)
  340. return -ENOMEM;
  341. spin_lock(&cbs_list_lock);
  342. list_add(&q->cbs_list, &cbs_list);
  343. spin_unlock(&cbs_list_lock);
  344. qdisc_hash_add(q->qdisc, false);
  345. q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
  346. q->enqueue = cbs_enqueue_soft;
  347. q->dequeue = cbs_dequeue_soft;
  348. qdisc_watchdog_init(&q->watchdog, sch);
  349. return cbs_change(sch, opt, extack);
  350. }
  351. static void cbs_destroy(struct Qdisc *sch)
  352. {
  353. struct cbs_sched_data *q = qdisc_priv(sch);
  354. struct net_device *dev = qdisc_dev(sch);
  355. /* Nothing to do if we couldn't create the underlying qdisc */
  356. if (!q->qdisc)
  357. return;
  358. qdisc_watchdog_cancel(&q->watchdog);
  359. cbs_disable_offload(dev, q);
  360. spin_lock(&cbs_list_lock);
  361. list_del(&q->cbs_list);
  362. spin_unlock(&cbs_list_lock);
  363. qdisc_put(q->qdisc);
  364. }
  365. static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
  366. {
  367. struct cbs_sched_data *q = qdisc_priv(sch);
  368. struct tc_cbs_qopt opt = { };
  369. struct nlattr *nest;
  370. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  371. if (!nest)
  372. goto nla_put_failure;
  373. opt.hicredit = q->hicredit;
  374. opt.locredit = q->locredit;
  375. opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
  376. opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
  377. opt.offload = q->offload;
  378. if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
  379. goto nla_put_failure;
  380. return nla_nest_end(skb, nest);
  381. nla_put_failure:
  382. nla_nest_cancel(skb, nest);
  383. return -1;
  384. }
  385. static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
  386. struct sk_buff *skb, struct tcmsg *tcm)
  387. {
  388. struct cbs_sched_data *q = qdisc_priv(sch);
  389. if (cl != 1 || !q->qdisc) /* only one class */
  390. return -ENOENT;
  391. tcm->tcm_handle |= TC_H_MIN(1);
  392. tcm->tcm_info = q->qdisc->handle;
  393. return 0;
  394. }
  395. static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
  396. struct Qdisc **old, struct netlink_ext_ack *extack)
  397. {
  398. struct cbs_sched_data *q = qdisc_priv(sch);
  399. if (!new) {
  400. new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
  401. sch->handle, NULL);
  402. if (!new)
  403. new = &noop_qdisc;
  404. }
  405. *old = qdisc_replace(sch, new, &q->qdisc);
  406. return 0;
  407. }
  408. static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
  409. {
  410. struct cbs_sched_data *q = qdisc_priv(sch);
  411. return q->qdisc;
  412. }
  413. static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
  414. {
  415. return 1;
  416. }
  417. static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
  418. {
  419. if (!walker->stop) {
  420. tc_qdisc_stats_dump(sch, 1, walker);
  421. }
  422. }
  423. static const struct Qdisc_class_ops cbs_class_ops = {
  424. .graft = cbs_graft,
  425. .leaf = cbs_leaf,
  426. .find = cbs_find,
  427. .walk = cbs_walk,
  428. .dump = cbs_dump_class,
  429. };
  430. static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
  431. .id = "cbs",
  432. .cl_ops = &cbs_class_ops,
  433. .priv_size = sizeof(struct cbs_sched_data),
  434. .enqueue = cbs_enqueue,
  435. .dequeue = cbs_dequeue,
  436. .peek = qdisc_peek_dequeued,
  437. .init = cbs_init,
  438. .reset = qdisc_reset_queue,
  439. .destroy = cbs_destroy,
  440. .change = cbs_change,
  441. .dump = cbs_dump,
  442. .owner = THIS_MODULE,
  443. };
  444. static struct notifier_block cbs_device_notifier = {
  445. .notifier_call = cbs_dev_notifier,
  446. };
  447. static int __init cbs_module_init(void)
  448. {
  449. int err;
  450. err = register_netdevice_notifier(&cbs_device_notifier);
  451. if (err)
  452. return err;
  453. err = register_qdisc(&cbs_qdisc_ops);
  454. if (err)
  455. unregister_netdevice_notifier(&cbs_device_notifier);
  456. return err;
  457. }
  458. static void __exit cbs_module_exit(void)
  459. {
  460. unregister_qdisc(&cbs_qdisc_ops);
  461. unregister_netdevice_notifier(&cbs_device_notifier);
  462. }
  463. module_init(cbs_module_init)
  464. module_exit(cbs_module_exit)
  465. MODULE_LICENSE("GPL");