sch_taprio.c 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_taprio.c Time Aware Priority Scheduler
  3. *
  4. * Authors: Vinicius Costa Gomes <[email protected]>
  5. *
  6. */
  7. #include <linux/ethtool.h>
  8. #include <linux/types.h>
  9. #include <linux/slab.h>
  10. #include <linux/kernel.h>
  11. #include <linux/string.h>
  12. #include <linux/list.h>
  13. #include <linux/errno.h>
  14. #include <linux/skbuff.h>
  15. #include <linux/math64.h>
  16. #include <linux/module.h>
  17. #include <linux/spinlock.h>
  18. #include <linux/rcupdate.h>
  19. #include <linux/time.h>
  20. #include <net/netlink.h>
  21. #include <net/pkt_sched.h>
  22. #include <net/pkt_cls.h>
  23. #include <net/sch_generic.h>
  24. #include <net/sock.h>
  25. #include <net/tcp.h>
  26. static LIST_HEAD(taprio_list);
  27. #define TAPRIO_ALL_GATES_OPEN -1
  28. #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
  29. #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  30. #define TAPRIO_FLAGS_INVALID U32_MAX
  31. struct sched_entry {
  32. struct list_head list;
  33. /* The instant that this entry "closes" and the next one
  34. * should open, the qdisc will make some effort so that no
  35. * packet leaves after this time.
  36. */
  37. ktime_t close_time;
  38. ktime_t next_txtime;
  39. atomic_t budget;
  40. int index;
  41. u32 gate_mask;
  42. u32 interval;
  43. u8 command;
  44. };
  45. struct sched_gate_list {
  46. struct rcu_head rcu;
  47. struct list_head entries;
  48. size_t num_entries;
  49. ktime_t cycle_close_time;
  50. s64 cycle_time;
  51. s64 cycle_time_extension;
  52. s64 base_time;
  53. };
  54. struct taprio_sched {
  55. struct Qdisc **qdiscs;
  56. struct Qdisc *root;
  57. u32 flags;
  58. enum tk_offsets tk_offset;
  59. int clockid;
  60. bool offloaded;
  61. atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  62. * speeds it's sub-nanoseconds per byte
  63. */
  64. /* Protects the update side of the RCU protected current_entry */
  65. spinlock_t current_entry_lock;
  66. struct sched_entry __rcu *current_entry;
  67. struct sched_gate_list __rcu *oper_sched;
  68. struct sched_gate_list __rcu *admin_sched;
  69. struct hrtimer advance_timer;
  70. struct list_head taprio_list;
  71. u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
  72. u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
  73. u32 txtime_delay;
  74. };
  75. struct __tc_taprio_qopt_offload {
  76. refcount_t users;
  77. struct tc_taprio_qopt_offload offload;
  78. };
  79. static ktime_t sched_base_time(const struct sched_gate_list *sched)
  80. {
  81. if (!sched)
  82. return KTIME_MAX;
  83. return ns_to_ktime(sched->base_time);
  84. }
  85. static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
  86. {
  87. /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
  88. enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
  89. switch (tk_offset) {
  90. case TK_OFFS_MAX:
  91. return mono;
  92. default:
  93. return ktime_mono_to_any(mono, tk_offset);
  94. }
  95. }
  96. static ktime_t taprio_get_time(const struct taprio_sched *q)
  97. {
  98. return taprio_mono_to_any(q, ktime_get());
  99. }
  100. static void taprio_free_sched_cb(struct rcu_head *head)
  101. {
  102. struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
  103. struct sched_entry *entry, *n;
  104. list_for_each_entry_safe(entry, n, &sched->entries, list) {
  105. list_del(&entry->list);
  106. kfree(entry);
  107. }
  108. kfree(sched);
  109. }
  110. static void switch_schedules(struct taprio_sched *q,
  111. struct sched_gate_list **admin,
  112. struct sched_gate_list **oper)
  113. {
  114. rcu_assign_pointer(q->oper_sched, *admin);
  115. rcu_assign_pointer(q->admin_sched, NULL);
  116. if (*oper)
  117. call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
  118. *oper = *admin;
  119. *admin = NULL;
  120. }
  121. /* Get how much time has been already elapsed in the current cycle. */
  122. static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
  123. {
  124. ktime_t time_since_sched_start;
  125. s32 time_elapsed;
  126. time_since_sched_start = ktime_sub(time, sched->base_time);
  127. div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
  128. return time_elapsed;
  129. }
  130. static ktime_t get_interval_end_time(struct sched_gate_list *sched,
  131. struct sched_gate_list *admin,
  132. struct sched_entry *entry,
  133. ktime_t intv_start)
  134. {
  135. s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
  136. ktime_t intv_end, cycle_ext_end, cycle_end;
  137. cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
  138. intv_end = ktime_add_ns(intv_start, entry->interval);
  139. cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
  140. if (ktime_before(intv_end, cycle_end))
  141. return intv_end;
  142. else if (admin && admin != sched &&
  143. ktime_after(admin->base_time, cycle_end) &&
  144. ktime_before(admin->base_time, cycle_ext_end))
  145. return admin->base_time;
  146. else
  147. return cycle_end;
  148. }
  149. static int length_to_duration(struct taprio_sched *q, int len)
  150. {
  151. return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
  152. }
  153. /* Returns the entry corresponding to next available interval. If
  154. * validate_interval is set, it only validates whether the timestamp occurs
  155. * when the gate corresponding to the skb's traffic class is open.
  156. */
  157. static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
  158. struct Qdisc *sch,
  159. struct sched_gate_list *sched,
  160. struct sched_gate_list *admin,
  161. ktime_t time,
  162. ktime_t *interval_start,
  163. ktime_t *interval_end,
  164. bool validate_interval)
  165. {
  166. ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
  167. ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
  168. struct sched_entry *entry = NULL, *entry_found = NULL;
  169. struct taprio_sched *q = qdisc_priv(sch);
  170. struct net_device *dev = qdisc_dev(sch);
  171. bool entry_available = false;
  172. s32 cycle_elapsed;
  173. int tc, n;
  174. tc = netdev_get_prio_tc_map(dev, skb->priority);
  175. packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
  176. *interval_start = 0;
  177. *interval_end = 0;
  178. if (!sched)
  179. return NULL;
  180. cycle = sched->cycle_time;
  181. cycle_elapsed = get_cycle_time_elapsed(sched, time);
  182. curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
  183. cycle_end = ktime_add_ns(curr_intv_end, cycle);
  184. list_for_each_entry(entry, &sched->entries, list) {
  185. curr_intv_start = curr_intv_end;
  186. curr_intv_end = get_interval_end_time(sched, admin, entry,
  187. curr_intv_start);
  188. if (ktime_after(curr_intv_start, cycle_end))
  189. break;
  190. if (!(entry->gate_mask & BIT(tc)) ||
  191. packet_transmit_time > entry->interval)
  192. continue;
  193. txtime = entry->next_txtime;
  194. if (ktime_before(txtime, time) || validate_interval) {
  195. transmit_end_time = ktime_add_ns(time, packet_transmit_time);
  196. if ((ktime_before(curr_intv_start, time) &&
  197. ktime_before(transmit_end_time, curr_intv_end)) ||
  198. (ktime_after(curr_intv_start, time) && !validate_interval)) {
  199. entry_found = entry;
  200. *interval_start = curr_intv_start;
  201. *interval_end = curr_intv_end;
  202. break;
  203. } else if (!entry_available && !validate_interval) {
  204. /* Here, we are just trying to find out the
  205. * first available interval in the next cycle.
  206. */
  207. entry_available = true;
  208. entry_found = entry;
  209. *interval_start = ktime_add_ns(curr_intv_start, cycle);
  210. *interval_end = ktime_add_ns(curr_intv_end, cycle);
  211. }
  212. } else if (ktime_before(txtime, earliest_txtime) &&
  213. !entry_available) {
  214. earliest_txtime = txtime;
  215. entry_found = entry;
  216. n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
  217. *interval_start = ktime_add(curr_intv_start, n * cycle);
  218. *interval_end = ktime_add(curr_intv_end, n * cycle);
  219. }
  220. }
  221. return entry_found;
  222. }
  223. static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
  224. {
  225. struct taprio_sched *q = qdisc_priv(sch);
  226. struct sched_gate_list *sched, *admin;
  227. ktime_t interval_start, interval_end;
  228. struct sched_entry *entry;
  229. rcu_read_lock();
  230. sched = rcu_dereference(q->oper_sched);
  231. admin = rcu_dereference(q->admin_sched);
  232. entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
  233. &interval_start, &interval_end, true);
  234. rcu_read_unlock();
  235. return entry;
  236. }
  237. static bool taprio_flags_valid(u32 flags)
  238. {
  239. /* Make sure no other flag bits are set. */
  240. if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
  241. TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
  242. return false;
  243. /* txtime-assist and full offload are mutually exclusive */
  244. if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
  245. (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
  246. return false;
  247. return true;
  248. }
  249. /* This returns the tstamp value set by TCP in terms of the set clock. */
  250. static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
  251. {
  252. unsigned int offset = skb_network_offset(skb);
  253. const struct ipv6hdr *ipv6h;
  254. const struct iphdr *iph;
  255. struct ipv6hdr _ipv6h;
  256. ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
  257. if (!ipv6h)
  258. return 0;
  259. if (ipv6h->version == 4) {
  260. iph = (struct iphdr *)ipv6h;
  261. offset += iph->ihl * 4;
  262. /* special-case 6in4 tunnelling, as that is a common way to get
  263. * v6 connectivity in the home
  264. */
  265. if (iph->protocol == IPPROTO_IPV6) {
  266. ipv6h = skb_header_pointer(skb, offset,
  267. sizeof(_ipv6h), &_ipv6h);
  268. if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
  269. return 0;
  270. } else if (iph->protocol != IPPROTO_TCP) {
  271. return 0;
  272. }
  273. } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
  274. return 0;
  275. }
  276. return taprio_mono_to_any(q, skb->skb_mstamp_ns);
  277. }
  278. /* There are a few scenarios where we will have to modify the txtime from
  279. * what is read from next_txtime in sched_entry. They are:
  280. * 1. If txtime is in the past,
  281. * a. The gate for the traffic class is currently open and packet can be
  282. * transmitted before it closes, schedule the packet right away.
  283. * b. If the gate corresponding to the traffic class is going to open later
  284. * in the cycle, set the txtime of packet to the interval start.
  285. * 2. If txtime is in the future, there are packets corresponding to the
  286. * current traffic class waiting to be transmitted. So, the following
  287. * possibilities exist:
  288. * a. We can transmit the packet before the window containing the txtime
  289. * closes.
  290. * b. The window might close before the transmission can be completed
  291. * successfully. So, schedule the packet in the next open window.
  292. */
  293. static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
  294. {
  295. ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
  296. struct taprio_sched *q = qdisc_priv(sch);
  297. struct sched_gate_list *sched, *admin;
  298. ktime_t minimum_time, now, txtime;
  299. int len, packet_transmit_time;
  300. struct sched_entry *entry;
  301. bool sched_changed;
  302. now = taprio_get_time(q);
  303. minimum_time = ktime_add_ns(now, q->txtime_delay);
  304. tcp_tstamp = get_tcp_tstamp(q, skb);
  305. minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
  306. rcu_read_lock();
  307. admin = rcu_dereference(q->admin_sched);
  308. sched = rcu_dereference(q->oper_sched);
  309. if (admin && ktime_after(minimum_time, admin->base_time))
  310. switch_schedules(q, &admin, &sched);
  311. /* Until the schedule starts, all the queues are open */
  312. if (!sched || ktime_before(minimum_time, sched->base_time)) {
  313. txtime = minimum_time;
  314. goto done;
  315. }
  316. len = qdisc_pkt_len(skb);
  317. packet_transmit_time = length_to_duration(q, len);
  318. do {
  319. sched_changed = false;
  320. entry = find_entry_to_transmit(skb, sch, sched, admin,
  321. minimum_time,
  322. &interval_start, &interval_end,
  323. false);
  324. if (!entry) {
  325. txtime = 0;
  326. goto done;
  327. }
  328. txtime = entry->next_txtime;
  329. txtime = max_t(ktime_t, txtime, minimum_time);
  330. txtime = max_t(ktime_t, txtime, interval_start);
  331. if (admin && admin != sched &&
  332. ktime_after(txtime, admin->base_time)) {
  333. sched = admin;
  334. sched_changed = true;
  335. continue;
  336. }
  337. transmit_end_time = ktime_add(txtime, packet_transmit_time);
  338. minimum_time = transmit_end_time;
  339. /* Update the txtime of current entry to the next time it's
  340. * interval starts.
  341. */
  342. if (ktime_after(transmit_end_time, interval_end))
  343. entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
  344. } while (sched_changed || ktime_after(transmit_end_time, interval_end));
  345. entry->next_txtime = transmit_end_time;
  346. done:
  347. rcu_read_unlock();
  348. return txtime;
  349. }
  350. static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
  351. struct Qdisc *child, struct sk_buff **to_free)
  352. {
  353. struct taprio_sched *q = qdisc_priv(sch);
  354. struct net_device *dev = qdisc_dev(sch);
  355. int prio = skb->priority;
  356. u8 tc;
  357. /* sk_flags are only safe to use on full sockets. */
  358. if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
  359. if (!is_valid_interval(skb, sch))
  360. return qdisc_drop(skb, sch, to_free);
  361. } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  362. skb->tstamp = get_packet_txtime(skb, sch);
  363. if (!skb->tstamp)
  364. return qdisc_drop(skb, sch, to_free);
  365. }
  366. /* Devices with full offload are expected to honor this in hardware */
  367. tc = netdev_get_prio_tc_map(dev, prio);
  368. if (skb->len > q->max_frm_len[tc])
  369. return qdisc_drop(skb, sch, to_free);
  370. qdisc_qstats_backlog_inc(sch, skb);
  371. sch->q.qlen++;
  372. return qdisc_enqueue(skb, child, to_free);
  373. }
  374. /* Will not be called in the full offload case, since the TX queues are
  375. * attached to the Qdisc created using qdisc_create_dflt()
  376. */
  377. static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  378. struct sk_buff **to_free)
  379. {
  380. struct taprio_sched *q = qdisc_priv(sch);
  381. struct Qdisc *child;
  382. int queue;
  383. queue = skb_get_queue_mapping(skb);
  384. child = q->qdiscs[queue];
  385. if (unlikely(!child))
  386. return qdisc_drop(skb, sch, to_free);
  387. /* Large packets might not be transmitted when the transmission duration
  388. * exceeds any configured interval. Therefore, segment the skb into
  389. * smaller chunks. Drivers with full offload are expected to handle
  390. * this in hardware.
  391. */
  392. if (skb_is_gso(skb)) {
  393. unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
  394. netdev_features_t features = netif_skb_features(skb);
  395. struct sk_buff *segs, *nskb;
  396. int ret;
  397. segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
  398. if (IS_ERR_OR_NULL(segs))
  399. return qdisc_drop(skb, sch, to_free);
  400. skb_list_walk_safe(segs, segs, nskb) {
  401. skb_mark_not_on_list(segs);
  402. qdisc_skb_cb(segs)->pkt_len = segs->len;
  403. slen += segs->len;
  404. ret = taprio_enqueue_one(segs, sch, child, to_free);
  405. if (ret != NET_XMIT_SUCCESS) {
  406. if (net_xmit_drop_count(ret))
  407. qdisc_qstats_drop(sch);
  408. } else {
  409. numsegs++;
  410. }
  411. }
  412. if (numsegs > 1)
  413. qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
  414. consume_skb(skb);
  415. return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
  416. }
  417. return taprio_enqueue_one(skb, sch, child, to_free);
  418. }
  419. /* Will not be called in the full offload case, since the TX queues are
  420. * attached to the Qdisc created using qdisc_create_dflt()
  421. */
  422. static struct sk_buff *taprio_peek(struct Qdisc *sch)
  423. {
  424. struct taprio_sched *q = qdisc_priv(sch);
  425. struct net_device *dev = qdisc_dev(sch);
  426. struct sched_entry *entry;
  427. struct sk_buff *skb;
  428. u32 gate_mask;
  429. int i;
  430. rcu_read_lock();
  431. entry = rcu_dereference(q->current_entry);
  432. gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
  433. rcu_read_unlock();
  434. if (!gate_mask)
  435. return NULL;
  436. for (i = 0; i < dev->num_tx_queues; i++) {
  437. struct Qdisc *child = q->qdiscs[i];
  438. int prio;
  439. u8 tc;
  440. if (unlikely(!child))
  441. continue;
  442. skb = child->ops->peek(child);
  443. if (!skb)
  444. continue;
  445. if (TXTIME_ASSIST_IS_ENABLED(q->flags))
  446. return skb;
  447. prio = skb->priority;
  448. tc = netdev_get_prio_tc_map(dev, prio);
  449. if (!(gate_mask & BIT(tc)))
  450. continue;
  451. return skb;
  452. }
  453. return NULL;
  454. }
  455. static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
  456. {
  457. atomic_set(&entry->budget,
  458. div64_u64((u64)entry->interval * PSEC_PER_NSEC,
  459. atomic64_read(&q->picos_per_byte)));
  460. }
  461. /* Will not be called in the full offload case, since the TX queues are
  462. * attached to the Qdisc created using qdisc_create_dflt()
  463. */
  464. static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
  465. {
  466. struct taprio_sched *q = qdisc_priv(sch);
  467. struct net_device *dev = qdisc_dev(sch);
  468. struct sk_buff *skb = NULL;
  469. struct sched_entry *entry;
  470. u32 gate_mask;
  471. int i;
  472. rcu_read_lock();
  473. entry = rcu_dereference(q->current_entry);
  474. /* if there's no entry, it means that the schedule didn't
  475. * start yet, so force all gates to be open, this is in
  476. * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
  477. * "AdminGateStates"
  478. */
  479. gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
  480. if (!gate_mask)
  481. goto done;
  482. for (i = 0; i < dev->num_tx_queues; i++) {
  483. struct Qdisc *child = q->qdiscs[i];
  484. ktime_t guard;
  485. int prio;
  486. int len;
  487. u8 tc;
  488. if (unlikely(!child))
  489. continue;
  490. if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  491. skb = child->ops->dequeue(child);
  492. if (!skb)
  493. continue;
  494. goto skb_found;
  495. }
  496. skb = child->ops->peek(child);
  497. if (!skb)
  498. continue;
  499. prio = skb->priority;
  500. tc = netdev_get_prio_tc_map(dev, prio);
  501. if (!(gate_mask & BIT(tc))) {
  502. skb = NULL;
  503. continue;
  504. }
  505. len = qdisc_pkt_len(skb);
  506. guard = ktime_add_ns(taprio_get_time(q),
  507. length_to_duration(q, len));
  508. /* In the case that there's no gate entry, there's no
  509. * guard band ...
  510. */
  511. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  512. ktime_after(guard, entry->close_time)) {
  513. skb = NULL;
  514. continue;
  515. }
  516. /* ... and no budget. */
  517. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  518. atomic_sub_return(len, &entry->budget) < 0) {
  519. skb = NULL;
  520. continue;
  521. }
  522. skb = child->ops->dequeue(child);
  523. if (unlikely(!skb))
  524. goto done;
  525. skb_found:
  526. qdisc_bstats_update(sch, skb);
  527. qdisc_qstats_backlog_dec(sch, skb);
  528. sch->q.qlen--;
  529. goto done;
  530. }
  531. done:
  532. rcu_read_unlock();
  533. return skb;
  534. }
  535. static bool should_restart_cycle(const struct sched_gate_list *oper,
  536. const struct sched_entry *entry)
  537. {
  538. if (list_is_last(&entry->list, &oper->entries))
  539. return true;
  540. if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
  541. return true;
  542. return false;
  543. }
  544. static bool should_change_schedules(const struct sched_gate_list *admin,
  545. const struct sched_gate_list *oper,
  546. ktime_t close_time)
  547. {
  548. ktime_t next_base_time, extension_time;
  549. if (!admin)
  550. return false;
  551. next_base_time = sched_base_time(admin);
  552. /* This is the simple case, the close_time would fall after
  553. * the next schedule base_time.
  554. */
  555. if (ktime_compare(next_base_time, close_time) <= 0)
  556. return true;
  557. /* This is the cycle_time_extension case, if the close_time
  558. * plus the amount that can be extended would fall after the
  559. * next schedule base_time, we can extend the current schedule
  560. * for that amount.
  561. */
  562. extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
  563. /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
  564. * how precisely the extension should be made. So after
  565. * conformance testing, this logic may change.
  566. */
  567. if (ktime_compare(next_base_time, extension_time) <= 0)
  568. return true;
  569. return false;
  570. }
  571. static enum hrtimer_restart advance_sched(struct hrtimer *timer)
  572. {
  573. struct taprio_sched *q = container_of(timer, struct taprio_sched,
  574. advance_timer);
  575. struct sched_gate_list *oper, *admin;
  576. struct sched_entry *entry, *next;
  577. struct Qdisc *sch = q->root;
  578. ktime_t close_time;
  579. spin_lock(&q->current_entry_lock);
  580. entry = rcu_dereference_protected(q->current_entry,
  581. lockdep_is_held(&q->current_entry_lock));
  582. oper = rcu_dereference_protected(q->oper_sched,
  583. lockdep_is_held(&q->current_entry_lock));
  584. admin = rcu_dereference_protected(q->admin_sched,
  585. lockdep_is_held(&q->current_entry_lock));
  586. if (!oper)
  587. switch_schedules(q, &admin, &oper);
  588. /* This can happen in two cases: 1. this is the very first run
  589. * of this function (i.e. we weren't running any schedule
  590. * previously); 2. The previous schedule just ended. The first
  591. * entry of all schedules are pre-calculated during the
  592. * schedule initialization.
  593. */
  594. if (unlikely(!entry || entry->close_time == oper->base_time)) {
  595. next = list_first_entry(&oper->entries, struct sched_entry,
  596. list);
  597. close_time = next->close_time;
  598. goto first_run;
  599. }
  600. if (should_restart_cycle(oper, entry)) {
  601. next = list_first_entry(&oper->entries, struct sched_entry,
  602. list);
  603. oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
  604. oper->cycle_time);
  605. } else {
  606. next = list_next_entry(entry, list);
  607. }
  608. close_time = ktime_add_ns(entry->close_time, next->interval);
  609. close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
  610. if (should_change_schedules(admin, oper, close_time)) {
  611. /* Set things so the next time this runs, the new
  612. * schedule runs.
  613. */
  614. close_time = sched_base_time(admin);
  615. switch_schedules(q, &admin, &oper);
  616. }
  617. next->close_time = close_time;
  618. taprio_set_budget(q, next);
  619. first_run:
  620. rcu_assign_pointer(q->current_entry, next);
  621. spin_unlock(&q->current_entry_lock);
  622. hrtimer_set_expires(&q->advance_timer, close_time);
  623. rcu_read_lock();
  624. __netif_schedule(sch);
  625. rcu_read_unlock();
  626. return HRTIMER_RESTART;
  627. }
  628. static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
  629. [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
  630. [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
  631. [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
  632. [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
  633. };
  634. static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
  635. [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
  636. [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
  637. };
  638. static struct netlink_range_validation_signed taprio_cycle_time_range = {
  639. .min = 0,
  640. .max = INT_MAX,
  641. };
  642. static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
  643. [TCA_TAPRIO_ATTR_PRIOMAP] = {
  644. .len = sizeof(struct tc_mqprio_qopt)
  645. },
  646. [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
  647. [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
  648. [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
  649. [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
  650. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
  651. NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
  652. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
  653. [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
  654. [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
  655. [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
  656. };
  657. static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
  658. struct sched_entry *entry,
  659. struct netlink_ext_ack *extack)
  660. {
  661. int min_duration = length_to_duration(q, ETH_ZLEN);
  662. u32 interval = 0;
  663. if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
  664. entry->command = nla_get_u8(
  665. tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
  666. if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
  667. entry->gate_mask = nla_get_u32(
  668. tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
  669. if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
  670. interval = nla_get_u32(
  671. tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
  672. /* The interval should allow at least the minimum ethernet
  673. * frame to go out.
  674. */
  675. if (interval < min_duration) {
  676. NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
  677. return -EINVAL;
  678. }
  679. entry->interval = interval;
  680. return 0;
  681. }
  682. static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
  683. struct sched_entry *entry, int index,
  684. struct netlink_ext_ack *extack)
  685. {
  686. struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
  687. int err;
  688. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
  689. entry_policy, NULL);
  690. if (err < 0) {
  691. NL_SET_ERR_MSG(extack, "Could not parse nested entry");
  692. return -EINVAL;
  693. }
  694. entry->index = index;
  695. return fill_sched_entry(q, tb, entry, extack);
  696. }
  697. static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
  698. struct sched_gate_list *sched,
  699. struct netlink_ext_ack *extack)
  700. {
  701. struct nlattr *n;
  702. int err, rem;
  703. int i = 0;
  704. if (!list)
  705. return -EINVAL;
  706. nla_for_each_nested(n, list, rem) {
  707. struct sched_entry *entry;
  708. if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
  709. NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
  710. continue;
  711. }
  712. entry = kzalloc(sizeof(*entry), GFP_KERNEL);
  713. if (!entry) {
  714. NL_SET_ERR_MSG(extack, "Not enough memory for entry");
  715. return -ENOMEM;
  716. }
  717. err = parse_sched_entry(q, n, entry, i, extack);
  718. if (err < 0) {
  719. kfree(entry);
  720. return err;
  721. }
  722. list_add_tail(&entry->list, &sched->entries);
  723. i++;
  724. }
  725. sched->num_entries = i;
  726. return i;
  727. }
  728. static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
  729. struct sched_gate_list *new,
  730. struct netlink_ext_ack *extack)
  731. {
  732. int err = 0;
  733. if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
  734. NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
  735. return -ENOTSUPP;
  736. }
  737. if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
  738. new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
  739. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
  740. new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
  741. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
  742. new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
  743. if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
  744. err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
  745. new, extack);
  746. if (err < 0)
  747. return err;
  748. if (!new->cycle_time) {
  749. struct sched_entry *entry;
  750. ktime_t cycle = 0;
  751. list_for_each_entry(entry, &new->entries, list)
  752. cycle = ktime_add_ns(cycle, entry->interval);
  753. if (!cycle) {
  754. NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
  755. return -EINVAL;
  756. }
  757. if (cycle < 0 || cycle > INT_MAX) {
  758. NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
  759. return -EINVAL;
  760. }
  761. new->cycle_time = cycle;
  762. }
  763. return 0;
  764. }
  765. static int taprio_parse_mqprio_opt(struct net_device *dev,
  766. struct tc_mqprio_qopt *qopt,
  767. struct netlink_ext_ack *extack,
  768. u32 taprio_flags)
  769. {
  770. int i, j;
  771. if (!qopt && !dev->num_tc) {
  772. NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
  773. return -EINVAL;
  774. }
  775. /* If num_tc is already set, it means that the user already
  776. * configured the mqprio part
  777. */
  778. if (dev->num_tc)
  779. return 0;
  780. /* Verify num_tc is not out of max range */
  781. if (qopt->num_tc > TC_MAX_QUEUE) {
  782. NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
  783. return -EINVAL;
  784. }
  785. /* taprio imposes that traffic classes map 1:n to tx queues */
  786. if (qopt->num_tc > dev->num_tx_queues) {
  787. NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
  788. return -EINVAL;
  789. }
  790. /* Verify priority mapping uses valid tcs */
  791. for (i = 0; i <= TC_BITMASK; i++) {
  792. if (qopt->prio_tc_map[i] >= qopt->num_tc) {
  793. NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
  794. return -EINVAL;
  795. }
  796. }
  797. for (i = 0; i < qopt->num_tc; i++) {
  798. unsigned int last = qopt->offset[i] + qopt->count[i];
  799. /* Verify the queue count is in tx range being equal to the
  800. * real_num_tx_queues indicates the last queue is in use.
  801. */
  802. if (qopt->offset[i] >= dev->num_tx_queues ||
  803. !qopt->count[i] ||
  804. last > dev->real_num_tx_queues) {
  805. NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
  806. return -EINVAL;
  807. }
  808. if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
  809. continue;
  810. /* Verify that the offset and counts do not overlap */
  811. for (j = i + 1; j < qopt->num_tc; j++) {
  812. if (last > qopt->offset[j]) {
  813. NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
  814. return -EINVAL;
  815. }
  816. }
  817. }
  818. return 0;
  819. }
  820. static int taprio_get_start_time(struct Qdisc *sch,
  821. struct sched_gate_list *sched,
  822. ktime_t *start)
  823. {
  824. struct taprio_sched *q = qdisc_priv(sch);
  825. ktime_t now, base, cycle;
  826. s64 n;
  827. base = sched_base_time(sched);
  828. now = taprio_get_time(q);
  829. if (ktime_after(base, now)) {
  830. *start = base;
  831. return 0;
  832. }
  833. cycle = sched->cycle_time;
  834. /* The qdisc is expected to have at least one sched_entry. Moreover,
  835. * any entry must have 'interval' > 0. Thus if the cycle time is zero,
  836. * something went really wrong. In that case, we should warn about this
  837. * inconsistent state and return error.
  838. */
  839. if (WARN_ON(!cycle))
  840. return -EFAULT;
  841. /* Schedule the start time for the beginning of the next
  842. * cycle.
  843. */
  844. n = div64_s64(ktime_sub_ns(now, base), cycle);
  845. *start = ktime_add_ns(base, (n + 1) * cycle);
  846. return 0;
  847. }
  848. static void setup_first_close_time(struct taprio_sched *q,
  849. struct sched_gate_list *sched, ktime_t base)
  850. {
  851. struct sched_entry *first;
  852. ktime_t cycle;
  853. first = list_first_entry(&sched->entries,
  854. struct sched_entry, list);
  855. cycle = sched->cycle_time;
  856. /* FIXME: find a better place to do this */
  857. sched->cycle_close_time = ktime_add_ns(base, cycle);
  858. first->close_time = ktime_add_ns(base, first->interval);
  859. taprio_set_budget(q, first);
  860. rcu_assign_pointer(q->current_entry, NULL);
  861. }
  862. static void taprio_start_sched(struct Qdisc *sch,
  863. ktime_t start, struct sched_gate_list *new)
  864. {
  865. struct taprio_sched *q = qdisc_priv(sch);
  866. ktime_t expires;
  867. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  868. return;
  869. expires = hrtimer_get_expires(&q->advance_timer);
  870. if (expires == 0)
  871. expires = KTIME_MAX;
  872. /* If the new schedule starts before the next expiration, we
  873. * reprogram it to the earliest one, so we change the admin
  874. * schedule to the operational one at the right time.
  875. */
  876. start = min_t(ktime_t, start, expires);
  877. hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
  878. }
  879. static void taprio_set_picos_per_byte(struct net_device *dev,
  880. struct taprio_sched *q)
  881. {
  882. struct ethtool_link_ksettings ecmd;
  883. int speed = SPEED_10;
  884. int picos_per_byte;
  885. int err;
  886. err = __ethtool_get_link_ksettings(dev, &ecmd);
  887. if (err < 0)
  888. goto skip;
  889. if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
  890. speed = ecmd.base.speed;
  891. skip:
  892. picos_per_byte = (USEC_PER_SEC * 8) / speed;
  893. atomic64_set(&q->picos_per_byte, picos_per_byte);
  894. netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
  895. dev->name, (long long)atomic64_read(&q->picos_per_byte),
  896. ecmd.base.speed);
  897. }
  898. static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
  899. void *ptr)
  900. {
  901. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  902. struct taprio_sched *q;
  903. ASSERT_RTNL();
  904. if (event != NETDEV_UP && event != NETDEV_CHANGE)
  905. return NOTIFY_DONE;
  906. list_for_each_entry(q, &taprio_list, taprio_list) {
  907. if (dev != qdisc_dev(q->root))
  908. continue;
  909. taprio_set_picos_per_byte(dev, q);
  910. break;
  911. }
  912. return NOTIFY_DONE;
  913. }
  914. static void setup_txtime(struct taprio_sched *q,
  915. struct sched_gate_list *sched, ktime_t base)
  916. {
  917. struct sched_entry *entry;
  918. u64 interval = 0;
  919. list_for_each_entry(entry, &sched->entries, list) {
  920. entry->next_txtime = ktime_add_ns(base, interval);
  921. interval += entry->interval;
  922. }
  923. }
  924. static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
  925. {
  926. struct __tc_taprio_qopt_offload *__offload;
  927. __offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
  928. GFP_KERNEL);
  929. if (!__offload)
  930. return NULL;
  931. refcount_set(&__offload->users, 1);
  932. return &__offload->offload;
  933. }
  934. struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
  935. *offload)
  936. {
  937. struct __tc_taprio_qopt_offload *__offload;
  938. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  939. offload);
  940. refcount_inc(&__offload->users);
  941. return offload;
  942. }
  943. EXPORT_SYMBOL_GPL(taprio_offload_get);
  944. void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
  945. {
  946. struct __tc_taprio_qopt_offload *__offload;
  947. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  948. offload);
  949. if (!refcount_dec_and_test(&__offload->users))
  950. return;
  951. kfree(__offload);
  952. }
  953. EXPORT_SYMBOL_GPL(taprio_offload_free);
  954. /* The function will only serve to keep the pointers to the "oper" and "admin"
  955. * schedules valid in relation to their base times, so when calling dump() the
  956. * users looks at the right schedules.
  957. * When using full offload, the admin configuration is promoted to oper at the
  958. * base_time in the PHC time domain. But because the system time is not
  959. * necessarily in sync with that, we can't just trigger a hrtimer to call
  960. * switch_schedules at the right hardware time.
  961. * At the moment we call this by hand right away from taprio, but in the future
  962. * it will be useful to create a mechanism for drivers to notify taprio of the
  963. * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
  964. * This is left as TODO.
  965. */
  966. static void taprio_offload_config_changed(struct taprio_sched *q)
  967. {
  968. struct sched_gate_list *oper, *admin;
  969. oper = rtnl_dereference(q->oper_sched);
  970. admin = rtnl_dereference(q->admin_sched);
  971. switch_schedules(q, &admin, &oper);
  972. }
  973. static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
  974. {
  975. u32 i, queue_mask = 0;
  976. for (i = 0; i < dev->num_tc; i++) {
  977. u32 offset, count;
  978. if (!(tc_mask & BIT(i)))
  979. continue;
  980. offset = dev->tc_to_txq[i].offset;
  981. count = dev->tc_to_txq[i].count;
  982. queue_mask |= GENMASK(offset + count - 1, offset);
  983. }
  984. return queue_mask;
  985. }
  986. static void taprio_sched_to_offload(struct net_device *dev,
  987. struct sched_gate_list *sched,
  988. struct tc_taprio_qopt_offload *offload)
  989. {
  990. struct sched_entry *entry;
  991. int i = 0;
  992. offload->base_time = sched->base_time;
  993. offload->cycle_time = sched->cycle_time;
  994. offload->cycle_time_extension = sched->cycle_time_extension;
  995. list_for_each_entry(entry, &sched->entries, list) {
  996. struct tc_taprio_sched_entry *e = &offload->entries[i];
  997. e->command = entry->command;
  998. e->interval = entry->interval;
  999. e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
  1000. i++;
  1001. }
  1002. offload->num_entries = i;
  1003. }
  1004. static int taprio_enable_offload(struct net_device *dev,
  1005. struct taprio_sched *q,
  1006. struct sched_gate_list *sched,
  1007. struct netlink_ext_ack *extack)
  1008. {
  1009. const struct net_device_ops *ops = dev->netdev_ops;
  1010. struct tc_taprio_qopt_offload *offload;
  1011. struct tc_taprio_caps caps;
  1012. int tc, err = 0;
  1013. if (!ops->ndo_setup_tc) {
  1014. NL_SET_ERR_MSG(extack,
  1015. "Device does not support taprio offload");
  1016. return -EOPNOTSUPP;
  1017. }
  1018. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1019. &caps, sizeof(caps));
  1020. if (!caps.supports_queue_max_sdu) {
  1021. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1022. if (q->max_sdu[tc]) {
  1023. NL_SET_ERR_MSG_MOD(extack,
  1024. "Device does not handle queueMaxSDU");
  1025. return -EOPNOTSUPP;
  1026. }
  1027. }
  1028. }
  1029. offload = taprio_offload_alloc(sched->num_entries);
  1030. if (!offload) {
  1031. NL_SET_ERR_MSG(extack,
  1032. "Not enough memory for enabling offload mode");
  1033. return -ENOMEM;
  1034. }
  1035. offload->enable = 1;
  1036. taprio_sched_to_offload(dev, sched, offload);
  1037. for (tc = 0; tc < TC_MAX_QUEUE; tc++)
  1038. offload->max_sdu[tc] = q->max_sdu[tc];
  1039. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1040. if (err < 0) {
  1041. NL_SET_ERR_MSG(extack,
  1042. "Device failed to setup taprio offload");
  1043. goto done;
  1044. }
  1045. q->offloaded = true;
  1046. done:
  1047. taprio_offload_free(offload);
  1048. return err;
  1049. }
  1050. static int taprio_disable_offload(struct net_device *dev,
  1051. struct taprio_sched *q,
  1052. struct netlink_ext_ack *extack)
  1053. {
  1054. const struct net_device_ops *ops = dev->netdev_ops;
  1055. struct tc_taprio_qopt_offload *offload;
  1056. int err;
  1057. if (!q->offloaded)
  1058. return 0;
  1059. offload = taprio_offload_alloc(0);
  1060. if (!offload) {
  1061. NL_SET_ERR_MSG(extack,
  1062. "Not enough memory to disable offload mode");
  1063. return -ENOMEM;
  1064. }
  1065. offload->enable = 0;
  1066. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1067. if (err < 0) {
  1068. NL_SET_ERR_MSG(extack,
  1069. "Device failed to disable offload");
  1070. goto out;
  1071. }
  1072. q->offloaded = false;
  1073. out:
  1074. taprio_offload_free(offload);
  1075. return err;
  1076. }
  1077. /* If full offload is enabled, the only possible clockid is the net device's
  1078. * PHC. For that reason, specifying a clockid through netlink is incorrect.
  1079. * For txtime-assist, it is implicitly assumed that the device's PHC is kept
  1080. * in sync with the specified clockid via a user space daemon such as phc2sys.
  1081. * For both software taprio and txtime-assist, the clockid is used for the
  1082. * hrtimer that advances the schedule and hence mandatory.
  1083. */
  1084. static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
  1085. struct netlink_ext_ack *extack)
  1086. {
  1087. struct taprio_sched *q = qdisc_priv(sch);
  1088. struct net_device *dev = qdisc_dev(sch);
  1089. int err = -EINVAL;
  1090. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1091. const struct ethtool_ops *ops = dev->ethtool_ops;
  1092. struct ethtool_ts_info info = {
  1093. .cmd = ETHTOOL_GET_TS_INFO,
  1094. .phc_index = -1,
  1095. };
  1096. if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1097. NL_SET_ERR_MSG(extack,
  1098. "The 'clockid' cannot be specified for full offload");
  1099. goto out;
  1100. }
  1101. if (ops && ops->get_ts_info)
  1102. err = ops->get_ts_info(dev, &info);
  1103. if (err || info.phc_index < 0) {
  1104. NL_SET_ERR_MSG(extack,
  1105. "Device does not have a PTP clock");
  1106. err = -ENOTSUPP;
  1107. goto out;
  1108. }
  1109. } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1110. int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
  1111. enum tk_offsets tk_offset;
  1112. /* We only support static clockids and we don't allow
  1113. * for it to be modified after the first init.
  1114. */
  1115. if (clockid < 0 ||
  1116. (q->clockid != -1 && q->clockid != clockid)) {
  1117. NL_SET_ERR_MSG(extack,
  1118. "Changing the 'clockid' of a running schedule is not supported");
  1119. err = -ENOTSUPP;
  1120. goto out;
  1121. }
  1122. switch (clockid) {
  1123. case CLOCK_REALTIME:
  1124. tk_offset = TK_OFFS_REAL;
  1125. break;
  1126. case CLOCK_MONOTONIC:
  1127. tk_offset = TK_OFFS_MAX;
  1128. break;
  1129. case CLOCK_BOOTTIME:
  1130. tk_offset = TK_OFFS_BOOT;
  1131. break;
  1132. case CLOCK_TAI:
  1133. tk_offset = TK_OFFS_TAI;
  1134. break;
  1135. default:
  1136. NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
  1137. err = -EINVAL;
  1138. goto out;
  1139. }
  1140. /* This pairs with READ_ONCE() in taprio_mono_to_any */
  1141. WRITE_ONCE(q->tk_offset, tk_offset);
  1142. q->clockid = clockid;
  1143. } else {
  1144. NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
  1145. goto out;
  1146. }
  1147. /* Everything went ok, return success. */
  1148. err = 0;
  1149. out:
  1150. return err;
  1151. }
  1152. static int taprio_parse_tc_entry(struct Qdisc *sch,
  1153. struct nlattr *opt,
  1154. u32 max_sdu[TC_QOPT_MAX_QUEUE],
  1155. unsigned long *seen_tcs,
  1156. struct netlink_ext_ack *extack)
  1157. {
  1158. struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
  1159. struct net_device *dev = qdisc_dev(sch);
  1160. u32 val = 0;
  1161. int err, tc;
  1162. err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
  1163. taprio_tc_policy, extack);
  1164. if (err < 0)
  1165. return err;
  1166. if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
  1167. NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
  1168. return -EINVAL;
  1169. }
  1170. tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
  1171. if (tc >= TC_QOPT_MAX_QUEUE) {
  1172. NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
  1173. return -ERANGE;
  1174. }
  1175. if (*seen_tcs & BIT(tc)) {
  1176. NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
  1177. return -EINVAL;
  1178. }
  1179. *seen_tcs |= BIT(tc);
  1180. if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
  1181. val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
  1182. if (val > dev->max_mtu) {
  1183. NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
  1184. return -ERANGE;
  1185. }
  1186. max_sdu[tc] = val;
  1187. return 0;
  1188. }
  1189. static int taprio_parse_tc_entries(struct Qdisc *sch,
  1190. struct nlattr *opt,
  1191. struct netlink_ext_ack *extack)
  1192. {
  1193. struct taprio_sched *q = qdisc_priv(sch);
  1194. struct net_device *dev = qdisc_dev(sch);
  1195. u32 max_sdu[TC_QOPT_MAX_QUEUE];
  1196. unsigned long seen_tcs = 0;
  1197. struct nlattr *n;
  1198. int tc, rem;
  1199. int err = 0;
  1200. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  1201. max_sdu[tc] = q->max_sdu[tc];
  1202. nla_for_each_nested(n, opt, rem) {
  1203. if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
  1204. continue;
  1205. err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack);
  1206. if (err)
  1207. goto out;
  1208. }
  1209. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1210. q->max_sdu[tc] = max_sdu[tc];
  1211. if (max_sdu[tc])
  1212. q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len;
  1213. else
  1214. q->max_frm_len[tc] = U32_MAX; /* never oversized */
  1215. }
  1216. out:
  1217. return err;
  1218. }
  1219. static int taprio_mqprio_cmp(const struct net_device *dev,
  1220. const struct tc_mqprio_qopt *mqprio)
  1221. {
  1222. int i;
  1223. if (!mqprio || mqprio->num_tc != dev->num_tc)
  1224. return -1;
  1225. for (i = 0; i < mqprio->num_tc; i++)
  1226. if (dev->tc_to_txq[i].count != mqprio->count[i] ||
  1227. dev->tc_to_txq[i].offset != mqprio->offset[i])
  1228. return -1;
  1229. for (i = 0; i <= TC_BITMASK; i++)
  1230. if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
  1231. return -1;
  1232. return 0;
  1233. }
  1234. /* The semantics of the 'flags' argument in relation to 'change()'
  1235. * requests, are interpreted following two rules (which are applied in
  1236. * this order): (1) an omitted 'flags' argument is interpreted as
  1237. * zero; (2) the 'flags' of a "running" taprio instance cannot be
  1238. * changed.
  1239. */
  1240. static int taprio_new_flags(const struct nlattr *attr, u32 old,
  1241. struct netlink_ext_ack *extack)
  1242. {
  1243. u32 new = 0;
  1244. if (attr)
  1245. new = nla_get_u32(attr);
  1246. if (old != TAPRIO_FLAGS_INVALID && old != new) {
  1247. NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
  1248. return -EOPNOTSUPP;
  1249. }
  1250. if (!taprio_flags_valid(new)) {
  1251. NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
  1252. return -EINVAL;
  1253. }
  1254. return new;
  1255. }
  1256. static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
  1257. struct netlink_ext_ack *extack)
  1258. {
  1259. struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
  1260. struct sched_gate_list *oper, *admin, *new_admin;
  1261. struct taprio_sched *q = qdisc_priv(sch);
  1262. struct net_device *dev = qdisc_dev(sch);
  1263. struct tc_mqprio_qopt *mqprio = NULL;
  1264. unsigned long flags;
  1265. ktime_t start;
  1266. int i, err;
  1267. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
  1268. taprio_policy, extack);
  1269. if (err < 0)
  1270. return err;
  1271. if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
  1272. mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
  1273. err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
  1274. q->flags, extack);
  1275. if (err < 0)
  1276. return err;
  1277. q->flags = err;
  1278. err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
  1279. if (err < 0)
  1280. return err;
  1281. err = taprio_parse_tc_entries(sch, opt, extack);
  1282. if (err)
  1283. return err;
  1284. new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
  1285. if (!new_admin) {
  1286. NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
  1287. return -ENOMEM;
  1288. }
  1289. INIT_LIST_HEAD(&new_admin->entries);
  1290. oper = rtnl_dereference(q->oper_sched);
  1291. admin = rtnl_dereference(q->admin_sched);
  1292. /* no changes - no new mqprio settings */
  1293. if (!taprio_mqprio_cmp(dev, mqprio))
  1294. mqprio = NULL;
  1295. if (mqprio && (oper || admin)) {
  1296. NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
  1297. err = -ENOTSUPP;
  1298. goto free_sched;
  1299. }
  1300. err = parse_taprio_schedule(q, tb, new_admin, extack);
  1301. if (err < 0)
  1302. goto free_sched;
  1303. if (new_admin->num_entries == 0) {
  1304. NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
  1305. err = -EINVAL;
  1306. goto free_sched;
  1307. }
  1308. err = taprio_parse_clockid(sch, tb, extack);
  1309. if (err < 0)
  1310. goto free_sched;
  1311. taprio_set_picos_per_byte(dev, q);
  1312. if (mqprio) {
  1313. err = netdev_set_num_tc(dev, mqprio->num_tc);
  1314. if (err)
  1315. goto free_sched;
  1316. for (i = 0; i < mqprio->num_tc; i++)
  1317. netdev_set_tc_queue(dev, i,
  1318. mqprio->count[i],
  1319. mqprio->offset[i]);
  1320. /* Always use supplied priority mappings */
  1321. for (i = 0; i <= TC_BITMASK; i++)
  1322. netdev_set_prio_tc_map(dev, i,
  1323. mqprio->prio_tc_map[i]);
  1324. }
  1325. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1326. err = taprio_enable_offload(dev, q, new_admin, extack);
  1327. else
  1328. err = taprio_disable_offload(dev, q, extack);
  1329. if (err)
  1330. goto free_sched;
  1331. /* Protects against enqueue()/dequeue() */
  1332. spin_lock_bh(qdisc_lock(sch));
  1333. if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
  1334. if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1335. NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
  1336. err = -EINVAL;
  1337. goto unlock;
  1338. }
  1339. q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
  1340. }
  1341. if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
  1342. !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1343. !hrtimer_active(&q->advance_timer)) {
  1344. hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
  1345. q->advance_timer.function = advance_sched;
  1346. }
  1347. err = taprio_get_start_time(sch, new_admin, &start);
  1348. if (err < 0) {
  1349. NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
  1350. goto unlock;
  1351. }
  1352. setup_txtime(q, new_admin, start);
  1353. if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1354. if (!oper) {
  1355. rcu_assign_pointer(q->oper_sched, new_admin);
  1356. err = 0;
  1357. new_admin = NULL;
  1358. goto unlock;
  1359. }
  1360. rcu_assign_pointer(q->admin_sched, new_admin);
  1361. if (admin)
  1362. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1363. } else {
  1364. setup_first_close_time(q, new_admin, start);
  1365. /* Protects against advance_sched() */
  1366. spin_lock_irqsave(&q->current_entry_lock, flags);
  1367. taprio_start_sched(sch, start, new_admin);
  1368. rcu_assign_pointer(q->admin_sched, new_admin);
  1369. if (admin)
  1370. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1371. spin_unlock_irqrestore(&q->current_entry_lock, flags);
  1372. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1373. taprio_offload_config_changed(q);
  1374. }
  1375. new_admin = NULL;
  1376. err = 0;
  1377. unlock:
  1378. spin_unlock_bh(qdisc_lock(sch));
  1379. free_sched:
  1380. if (new_admin)
  1381. call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  1382. return err;
  1383. }
  1384. static void taprio_reset(struct Qdisc *sch)
  1385. {
  1386. struct taprio_sched *q = qdisc_priv(sch);
  1387. struct net_device *dev = qdisc_dev(sch);
  1388. int i;
  1389. hrtimer_cancel(&q->advance_timer);
  1390. if (q->qdiscs) {
  1391. for (i = 0; i < dev->num_tx_queues; i++)
  1392. if (q->qdiscs[i])
  1393. qdisc_reset(q->qdiscs[i]);
  1394. }
  1395. }
  1396. static void taprio_destroy(struct Qdisc *sch)
  1397. {
  1398. struct taprio_sched *q = qdisc_priv(sch);
  1399. struct net_device *dev = qdisc_dev(sch);
  1400. struct sched_gate_list *oper, *admin;
  1401. unsigned int i;
  1402. list_del(&q->taprio_list);
  1403. /* Note that taprio_reset() might not be called if an error
  1404. * happens in qdisc_create(), after taprio_init() has been called.
  1405. */
  1406. hrtimer_cancel(&q->advance_timer);
  1407. qdisc_synchronize(sch);
  1408. taprio_disable_offload(dev, q, NULL);
  1409. if (q->qdiscs) {
  1410. for (i = 0; i < dev->num_tx_queues; i++)
  1411. qdisc_put(q->qdiscs[i]);
  1412. kfree(q->qdiscs);
  1413. }
  1414. q->qdiscs = NULL;
  1415. netdev_reset_tc(dev);
  1416. oper = rtnl_dereference(q->oper_sched);
  1417. admin = rtnl_dereference(q->admin_sched);
  1418. if (oper)
  1419. call_rcu(&oper->rcu, taprio_free_sched_cb);
  1420. if (admin)
  1421. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1422. }
  1423. static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
  1424. struct netlink_ext_ack *extack)
  1425. {
  1426. struct taprio_sched *q = qdisc_priv(sch);
  1427. struct net_device *dev = qdisc_dev(sch);
  1428. int i;
  1429. spin_lock_init(&q->current_entry_lock);
  1430. hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
  1431. q->advance_timer.function = advance_sched;
  1432. q->root = sch;
  1433. /* We only support static clockids. Use an invalid value as default
  1434. * and get the valid one on taprio_change().
  1435. */
  1436. q->clockid = -1;
  1437. q->flags = TAPRIO_FLAGS_INVALID;
  1438. list_add(&q->taprio_list, &taprio_list);
  1439. if (sch->parent != TC_H_ROOT) {
  1440. NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
  1441. return -EOPNOTSUPP;
  1442. }
  1443. if (!netif_is_multiqueue(dev)) {
  1444. NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
  1445. return -EOPNOTSUPP;
  1446. }
  1447. /* pre-allocate qdisc, attachment can't fail */
  1448. q->qdiscs = kcalloc(dev->num_tx_queues,
  1449. sizeof(q->qdiscs[0]),
  1450. GFP_KERNEL);
  1451. if (!q->qdiscs)
  1452. return -ENOMEM;
  1453. if (!opt)
  1454. return -EINVAL;
  1455. for (i = 0; i < dev->num_tx_queues; i++) {
  1456. struct netdev_queue *dev_queue;
  1457. struct Qdisc *qdisc;
  1458. dev_queue = netdev_get_tx_queue(dev, i);
  1459. qdisc = qdisc_create_dflt(dev_queue,
  1460. &pfifo_qdisc_ops,
  1461. TC_H_MAKE(TC_H_MAJ(sch->handle),
  1462. TC_H_MIN(i + 1)),
  1463. extack);
  1464. if (!qdisc)
  1465. return -ENOMEM;
  1466. if (i < dev->real_num_tx_queues)
  1467. qdisc_hash_add(qdisc, false);
  1468. q->qdiscs[i] = qdisc;
  1469. }
  1470. return taprio_change(sch, opt, extack);
  1471. }
  1472. static void taprio_attach(struct Qdisc *sch)
  1473. {
  1474. struct taprio_sched *q = qdisc_priv(sch);
  1475. struct net_device *dev = qdisc_dev(sch);
  1476. unsigned int ntx;
  1477. /* Attach underlying qdisc */
  1478. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  1479. struct Qdisc *qdisc = q->qdiscs[ntx];
  1480. struct Qdisc *old;
  1481. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1482. qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1483. old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
  1484. } else {
  1485. old = dev_graft_qdisc(qdisc->dev_queue, sch);
  1486. qdisc_refcount_inc(sch);
  1487. }
  1488. if (old)
  1489. qdisc_put(old);
  1490. }
  1491. /* access to the child qdiscs is not needed in offload mode */
  1492. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1493. kfree(q->qdiscs);
  1494. q->qdiscs = NULL;
  1495. }
  1496. }
  1497. static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
  1498. unsigned long cl)
  1499. {
  1500. struct net_device *dev = qdisc_dev(sch);
  1501. unsigned long ntx = cl - 1;
  1502. if (ntx >= dev->num_tx_queues)
  1503. return NULL;
  1504. return netdev_get_tx_queue(dev, ntx);
  1505. }
  1506. static int taprio_graft(struct Qdisc *sch, unsigned long cl,
  1507. struct Qdisc *new, struct Qdisc **old,
  1508. struct netlink_ext_ack *extack)
  1509. {
  1510. struct taprio_sched *q = qdisc_priv(sch);
  1511. struct net_device *dev = qdisc_dev(sch);
  1512. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1513. if (!dev_queue)
  1514. return -EINVAL;
  1515. if (dev->flags & IFF_UP)
  1516. dev_deactivate(dev);
  1517. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1518. *old = dev_graft_qdisc(dev_queue, new);
  1519. } else {
  1520. *old = q->qdiscs[cl - 1];
  1521. q->qdiscs[cl - 1] = new;
  1522. }
  1523. if (new)
  1524. new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1525. if (dev->flags & IFF_UP)
  1526. dev_activate(dev);
  1527. return 0;
  1528. }
  1529. static int dump_entry(struct sk_buff *msg,
  1530. const struct sched_entry *entry)
  1531. {
  1532. struct nlattr *item;
  1533. item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
  1534. if (!item)
  1535. return -ENOSPC;
  1536. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
  1537. goto nla_put_failure;
  1538. if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
  1539. goto nla_put_failure;
  1540. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
  1541. entry->gate_mask))
  1542. goto nla_put_failure;
  1543. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
  1544. entry->interval))
  1545. goto nla_put_failure;
  1546. return nla_nest_end(msg, item);
  1547. nla_put_failure:
  1548. nla_nest_cancel(msg, item);
  1549. return -1;
  1550. }
  1551. static int dump_schedule(struct sk_buff *msg,
  1552. const struct sched_gate_list *root)
  1553. {
  1554. struct nlattr *entry_list;
  1555. struct sched_entry *entry;
  1556. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
  1557. root->base_time, TCA_TAPRIO_PAD))
  1558. return -1;
  1559. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
  1560. root->cycle_time, TCA_TAPRIO_PAD))
  1561. return -1;
  1562. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
  1563. root->cycle_time_extension, TCA_TAPRIO_PAD))
  1564. return -1;
  1565. entry_list = nla_nest_start_noflag(msg,
  1566. TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
  1567. if (!entry_list)
  1568. goto error_nest;
  1569. list_for_each_entry(entry, &root->entries, list) {
  1570. if (dump_entry(msg, entry) < 0)
  1571. goto error_nest;
  1572. }
  1573. nla_nest_end(msg, entry_list);
  1574. return 0;
  1575. error_nest:
  1576. nla_nest_cancel(msg, entry_list);
  1577. return -1;
  1578. }
  1579. static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
  1580. {
  1581. struct nlattr *n;
  1582. int tc;
  1583. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1584. n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
  1585. if (!n)
  1586. return -EMSGSIZE;
  1587. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
  1588. goto nla_put_failure;
  1589. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
  1590. q->max_sdu[tc]))
  1591. goto nla_put_failure;
  1592. nla_nest_end(skb, n);
  1593. }
  1594. return 0;
  1595. nla_put_failure:
  1596. nla_nest_cancel(skb, n);
  1597. return -EMSGSIZE;
  1598. }
  1599. static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
  1600. {
  1601. struct taprio_sched *q = qdisc_priv(sch);
  1602. struct net_device *dev = qdisc_dev(sch);
  1603. struct sched_gate_list *oper, *admin;
  1604. struct tc_mqprio_qopt opt = { 0 };
  1605. struct nlattr *nest, *sched_nest;
  1606. unsigned int i;
  1607. oper = rtnl_dereference(q->oper_sched);
  1608. admin = rtnl_dereference(q->admin_sched);
  1609. opt.num_tc = netdev_get_num_tc(dev);
  1610. memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
  1611. for (i = 0; i < netdev_get_num_tc(dev); i++) {
  1612. opt.count[i] = dev->tc_to_txq[i].count;
  1613. opt.offset[i] = dev->tc_to_txq[i].offset;
  1614. }
  1615. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  1616. if (!nest)
  1617. goto start_error;
  1618. if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
  1619. goto options_error;
  1620. if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1621. nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
  1622. goto options_error;
  1623. if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
  1624. goto options_error;
  1625. if (q->txtime_delay &&
  1626. nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
  1627. goto options_error;
  1628. if (taprio_dump_tc_entries(q, skb))
  1629. goto options_error;
  1630. if (oper && dump_schedule(skb, oper))
  1631. goto options_error;
  1632. if (!admin)
  1633. goto done;
  1634. sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
  1635. if (!sched_nest)
  1636. goto options_error;
  1637. if (dump_schedule(skb, admin))
  1638. goto admin_error;
  1639. nla_nest_end(skb, sched_nest);
  1640. done:
  1641. return nla_nest_end(skb, nest);
  1642. admin_error:
  1643. nla_nest_cancel(skb, sched_nest);
  1644. options_error:
  1645. nla_nest_cancel(skb, nest);
  1646. start_error:
  1647. return -ENOSPC;
  1648. }
  1649. static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
  1650. {
  1651. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1652. if (!dev_queue)
  1653. return NULL;
  1654. return rtnl_dereference(dev_queue->qdisc_sleeping);
  1655. }
  1656. static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
  1657. {
  1658. unsigned int ntx = TC_H_MIN(classid);
  1659. if (!taprio_queue_get(sch, ntx))
  1660. return 0;
  1661. return ntx;
  1662. }
  1663. static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
  1664. struct sk_buff *skb, struct tcmsg *tcm)
  1665. {
  1666. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1667. tcm->tcm_parent = TC_H_ROOT;
  1668. tcm->tcm_handle |= TC_H_MIN(cl);
  1669. tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
  1670. return 0;
  1671. }
  1672. static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  1673. struct gnet_dump *d)
  1674. __releases(d->lock)
  1675. __acquires(d->lock)
  1676. {
  1677. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1678. sch = rtnl_dereference(dev_queue->qdisc_sleeping);
  1679. if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
  1680. qdisc_qstats_copy(d, sch) < 0)
  1681. return -1;
  1682. return 0;
  1683. }
  1684. static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  1685. {
  1686. struct net_device *dev = qdisc_dev(sch);
  1687. unsigned long ntx;
  1688. if (arg->stop)
  1689. return;
  1690. arg->count = arg->skip;
  1691. for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
  1692. if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
  1693. break;
  1694. }
  1695. }
  1696. static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
  1697. struct tcmsg *tcm)
  1698. {
  1699. return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
  1700. }
  1701. static const struct Qdisc_class_ops taprio_class_ops = {
  1702. .graft = taprio_graft,
  1703. .leaf = taprio_leaf,
  1704. .find = taprio_find,
  1705. .walk = taprio_walk,
  1706. .dump = taprio_dump_class,
  1707. .dump_stats = taprio_dump_class_stats,
  1708. .select_queue = taprio_select_queue,
  1709. };
  1710. static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
  1711. .cl_ops = &taprio_class_ops,
  1712. .id = "taprio",
  1713. .priv_size = sizeof(struct taprio_sched),
  1714. .init = taprio_init,
  1715. .change = taprio_change,
  1716. .destroy = taprio_destroy,
  1717. .reset = taprio_reset,
  1718. .attach = taprio_attach,
  1719. .peek = taprio_peek,
  1720. .dequeue = taprio_dequeue,
  1721. .enqueue = taprio_enqueue,
  1722. .dump = taprio_dump,
  1723. .owner = THIS_MODULE,
  1724. };
  1725. static struct notifier_block taprio_device_notifier = {
  1726. .notifier_call = taprio_dev_notifier,
  1727. };
  1728. static int __init taprio_module_init(void)
  1729. {
  1730. int err = register_netdevice_notifier(&taprio_device_notifier);
  1731. if (err)
  1732. return err;
  1733. return register_qdisc(&taprio_qdisc_ops);
  1734. }
  1735. static void __exit taprio_module_exit(void)
  1736. {
  1737. unregister_qdisc(&taprio_qdisc_ops);
  1738. unregister_netdevice_notifier(&taprio_device_notifier);
  1739. }
  1740. module_init(taprio_module_init);
  1741. module_exit(taprio_module_exit);
  1742. MODULE_LICENSE("GPL");