xen-netfront.c 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713
  1. /*
  2. * Virtual network driver for conversing with remote driver backends.
  3. *
  4. * Copyright (c) 2002-2005, K A Fraser
  5. * Copyright (c) 2005, XenSource Ltd
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License version 2
  9. * as published by the Free Software Foundation; or, when distributed
  10. * separately from the Linux kernel or incorporated into other
  11. * software packages, subject to the following license:
  12. *
  13. * Permission is hereby granted, free of charge, to any person obtaining a copy
  14. * of this source file (the "Software"), to deal in the Software without
  15. * restriction, including without limitation the rights to use, copy, modify,
  16. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17. * and to permit persons to whom the Software is furnished to do so, subject to
  18. * the following conditions:
  19. *
  20. * The above copyright notice and this permission notice shall be included in
  21. * all copies or substantial portions of the Software.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29. * IN THE SOFTWARE.
  30. */
  31. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  32. #include <linux/module.h>
  33. #include <linux/kernel.h>
  34. #include <linux/netdevice.h>
  35. #include <linux/etherdevice.h>
  36. #include <linux/skbuff.h>
  37. #include <linux/ethtool.h>
  38. #include <linux/if_ether.h>
  39. #include <net/tcp.h>
  40. #include <linux/udp.h>
  41. #include <linux/moduleparam.h>
  42. #include <linux/mm.h>
  43. #include <linux/slab.h>
  44. #include <net/ip.h>
  45. #include <linux/bpf.h>
  46. #include <net/page_pool.h>
  47. #include <linux/bpf_trace.h>
  48. #include <xen/xen.h>
  49. #include <xen/xenbus.h>
  50. #include <xen/events.h>
  51. #include <xen/page.h>
  52. #include <xen/platform_pci.h>
  53. #include <xen/grant_table.h>
  54. #include <xen/interface/io/netif.h>
  55. #include <xen/interface/memory.h>
  56. #include <xen/interface/grant_table.h>
  57. /* Module parameters */
  58. #define MAX_QUEUES_DEFAULT 8
  59. static unsigned int xennet_max_queues;
  60. module_param_named(max_queues, xennet_max_queues, uint, 0644);
  61. MODULE_PARM_DESC(max_queues,
  62. "Maximum number of queues per virtual interface");
  63. static bool __read_mostly xennet_trusted = true;
  64. module_param_named(trusted, xennet_trusted, bool, 0644);
  65. MODULE_PARM_DESC(trusted, "Is the backend trusted");
  66. #define XENNET_TIMEOUT (5 * HZ)
  67. static const struct ethtool_ops xennet_ethtool_ops;
  68. struct netfront_cb {
  69. int pull_to;
  70. };
  71. #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
  72. #define RX_COPY_THRESHOLD 256
  73. #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
  74. #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
  75. /* Minimum number of Rx slots (includes slot for GSO metadata). */
  76. #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  77. /* Queue name is interface name with "-qNNN" appended */
  78. #define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  79. /* IRQ name is queue name with "-tx" or "-rx" appended */
  80. #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  81. static DECLARE_WAIT_QUEUE_HEAD(module_wq);
  82. struct netfront_stats {
  83. u64 packets;
  84. u64 bytes;
  85. struct u64_stats_sync syncp;
  86. };
  87. struct netfront_info;
  88. struct netfront_queue {
  89. unsigned int id; /* Queue ID, 0-based */
  90. char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
  91. struct netfront_info *info;
  92. struct bpf_prog __rcu *xdp_prog;
  93. struct napi_struct napi;
  94. /* Split event channels support, tx_* == rx_* when using
  95. * single event channel.
  96. */
  97. unsigned int tx_evtchn, rx_evtchn;
  98. unsigned int tx_irq, rx_irq;
  99. /* Only used when split event channels support is enabled */
  100. char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
  101. char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
  102. spinlock_t tx_lock;
  103. struct xen_netif_tx_front_ring tx;
  104. int tx_ring_ref;
  105. /*
  106. * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
  107. * are linked from tx_skb_freelist through tx_link.
  108. */
  109. struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
  110. unsigned short tx_link[NET_TX_RING_SIZE];
  111. #define TX_LINK_NONE 0xffff
  112. #define TX_PENDING 0xfffe
  113. grant_ref_t gref_tx_head;
  114. grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
  115. struct page *grant_tx_page[NET_TX_RING_SIZE];
  116. unsigned tx_skb_freelist;
  117. unsigned int tx_pend_queue;
  118. spinlock_t rx_lock ____cacheline_aligned_in_smp;
  119. struct xen_netif_rx_front_ring rx;
  120. int rx_ring_ref;
  121. struct timer_list rx_refill_timer;
  122. struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
  123. grant_ref_t gref_rx_head;
  124. grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
  125. unsigned int rx_rsp_unconsumed;
  126. spinlock_t rx_cons_lock;
  127. struct page_pool *page_pool;
  128. struct xdp_rxq_info xdp_rxq;
  129. };
  130. struct netfront_info {
  131. struct list_head list;
  132. struct net_device *netdev;
  133. struct xenbus_device *xbdev;
  134. /* Multi-queue support */
  135. struct netfront_queue *queues;
  136. /* Statistics */
  137. struct netfront_stats __percpu *rx_stats;
  138. struct netfront_stats __percpu *tx_stats;
  139. /* XDP state */
  140. bool netback_has_xdp_headroom;
  141. bool netfront_xdp_enabled;
  142. /* Is device behaving sane? */
  143. bool broken;
  144. /* Should skbs be bounced into a zeroed buffer? */
  145. bool bounce;
  146. atomic_t rx_gso_checksum_fixup;
  147. };
  148. struct netfront_rx_info {
  149. struct xen_netif_rx_response rx;
  150. struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
  151. };
  152. /*
  153. * Access macros for acquiring freeing slots in tx_skbs[].
  154. */
  155. static void add_id_to_list(unsigned *head, unsigned short *list,
  156. unsigned short id)
  157. {
  158. list[id] = *head;
  159. *head = id;
  160. }
  161. static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
  162. {
  163. unsigned int id = *head;
  164. if (id != TX_LINK_NONE) {
  165. *head = list[id];
  166. list[id] = TX_LINK_NONE;
  167. }
  168. return id;
  169. }
  170. static int xennet_rxidx(RING_IDX idx)
  171. {
  172. return idx & (NET_RX_RING_SIZE - 1);
  173. }
  174. static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
  175. RING_IDX ri)
  176. {
  177. int i = xennet_rxidx(ri);
  178. struct sk_buff *skb = queue->rx_skbs[i];
  179. queue->rx_skbs[i] = NULL;
  180. return skb;
  181. }
  182. static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
  183. RING_IDX ri)
  184. {
  185. int i = xennet_rxidx(ri);
  186. grant_ref_t ref = queue->grant_rx_ref[i];
  187. queue->grant_rx_ref[i] = INVALID_GRANT_REF;
  188. return ref;
  189. }
  190. #ifdef CONFIG_SYSFS
  191. static const struct attribute_group xennet_dev_group;
  192. #endif
  193. static bool xennet_can_sg(struct net_device *dev)
  194. {
  195. return dev->features & NETIF_F_SG;
  196. }
  197. static void rx_refill_timeout(struct timer_list *t)
  198. {
  199. struct netfront_queue *queue = from_timer(queue, t, rx_refill_timer);
  200. napi_schedule(&queue->napi);
  201. }
  202. static int netfront_tx_slot_available(struct netfront_queue *queue)
  203. {
  204. return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
  205. (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
  206. }
  207. static void xennet_maybe_wake_tx(struct netfront_queue *queue)
  208. {
  209. struct net_device *dev = queue->info->netdev;
  210. struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
  211. if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
  212. netfront_tx_slot_available(queue) &&
  213. likely(netif_running(dev)))
  214. netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
  215. }
  216. static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
  217. {
  218. struct sk_buff *skb;
  219. struct page *page;
  220. skb = __netdev_alloc_skb(queue->info->netdev,
  221. RX_COPY_THRESHOLD + NET_IP_ALIGN,
  222. GFP_ATOMIC | __GFP_NOWARN);
  223. if (unlikely(!skb))
  224. return NULL;
  225. page = page_pool_alloc_pages(queue->page_pool,
  226. GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
  227. if (unlikely(!page)) {
  228. kfree_skb(skb);
  229. return NULL;
  230. }
  231. skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
  232. /* Align ip header to a 16 bytes boundary */
  233. skb_reserve(skb, NET_IP_ALIGN);
  234. skb->dev = queue->info->netdev;
  235. return skb;
  236. }
  237. static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
  238. {
  239. RING_IDX req_prod = queue->rx.req_prod_pvt;
  240. int notify;
  241. int err = 0;
  242. if (unlikely(!netif_carrier_ok(queue->info->netdev)))
  243. return;
  244. for (req_prod = queue->rx.req_prod_pvt;
  245. req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
  246. req_prod++) {
  247. struct sk_buff *skb;
  248. unsigned short id;
  249. grant_ref_t ref;
  250. struct page *page;
  251. struct xen_netif_rx_request *req;
  252. skb = xennet_alloc_one_rx_buffer(queue);
  253. if (!skb) {
  254. err = -ENOMEM;
  255. break;
  256. }
  257. id = xennet_rxidx(req_prod);
  258. BUG_ON(queue->rx_skbs[id]);
  259. queue->rx_skbs[id] = skb;
  260. ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
  261. WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
  262. queue->grant_rx_ref[id] = ref;
  263. page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
  264. req = RING_GET_REQUEST(&queue->rx, req_prod);
  265. gnttab_page_grant_foreign_access_ref_one(ref,
  266. queue->info->xbdev->otherend_id,
  267. page,
  268. 0);
  269. req->id = id;
  270. req->gref = ref;
  271. }
  272. queue->rx.req_prod_pvt = req_prod;
  273. /* Try again later if there are not enough requests or skb allocation
  274. * failed.
  275. * Enough requests is quantified as the sum of newly created slots and
  276. * the unconsumed slots at the backend.
  277. */
  278. if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
  279. unlikely(err)) {
  280. mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
  281. return;
  282. }
  283. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
  284. if (notify)
  285. notify_remote_via_irq(queue->rx_irq);
  286. }
  287. static int xennet_open(struct net_device *dev)
  288. {
  289. struct netfront_info *np = netdev_priv(dev);
  290. unsigned int num_queues = dev->real_num_tx_queues;
  291. unsigned int i = 0;
  292. struct netfront_queue *queue = NULL;
  293. if (!np->queues || np->broken)
  294. return -ENODEV;
  295. for (i = 0; i < num_queues; ++i) {
  296. queue = &np->queues[i];
  297. napi_enable(&queue->napi);
  298. spin_lock_bh(&queue->rx_lock);
  299. if (netif_carrier_ok(dev)) {
  300. xennet_alloc_rx_buffers(queue);
  301. queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
  302. if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
  303. napi_schedule(&queue->napi);
  304. }
  305. spin_unlock_bh(&queue->rx_lock);
  306. }
  307. netif_tx_start_all_queues(dev);
  308. return 0;
  309. }
  310. static bool xennet_tx_buf_gc(struct netfront_queue *queue)
  311. {
  312. RING_IDX cons, prod;
  313. unsigned short id;
  314. struct sk_buff *skb;
  315. bool more_to_do;
  316. bool work_done = false;
  317. const struct device *dev = &queue->info->netdev->dev;
  318. BUG_ON(!netif_carrier_ok(queue->info->netdev));
  319. do {
  320. prod = queue->tx.sring->rsp_prod;
  321. if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
  322. dev_alert(dev, "Illegal number of responses %u\n",
  323. prod - queue->tx.rsp_cons);
  324. goto err;
  325. }
  326. rmb(); /* Ensure we see responses up to 'rp'. */
  327. for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
  328. struct xen_netif_tx_response txrsp;
  329. work_done = true;
  330. RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
  331. if (txrsp.status == XEN_NETIF_RSP_NULL)
  332. continue;
  333. id = txrsp.id;
  334. if (id >= RING_SIZE(&queue->tx)) {
  335. dev_alert(dev,
  336. "Response has incorrect id (%u)\n",
  337. id);
  338. goto err;
  339. }
  340. if (queue->tx_link[id] != TX_PENDING) {
  341. dev_alert(dev,
  342. "Response for inactive request\n");
  343. goto err;
  344. }
  345. queue->tx_link[id] = TX_LINK_NONE;
  346. skb = queue->tx_skbs[id];
  347. queue->tx_skbs[id] = NULL;
  348. if (unlikely(!gnttab_end_foreign_access_ref(
  349. queue->grant_tx_ref[id]))) {
  350. dev_alert(dev,
  351. "Grant still in use by backend domain\n");
  352. goto err;
  353. }
  354. gnttab_release_grant_reference(
  355. &queue->gref_tx_head, queue->grant_tx_ref[id]);
  356. queue->grant_tx_ref[id] = INVALID_GRANT_REF;
  357. queue->grant_tx_page[id] = NULL;
  358. add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
  359. dev_kfree_skb_irq(skb);
  360. }
  361. queue->tx.rsp_cons = prod;
  362. RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
  363. } while (more_to_do);
  364. xennet_maybe_wake_tx(queue);
  365. return work_done;
  366. err:
  367. queue->info->broken = true;
  368. dev_alert(dev, "Disabled for further use\n");
  369. return work_done;
  370. }
  371. struct xennet_gnttab_make_txreq {
  372. struct netfront_queue *queue;
  373. struct sk_buff *skb;
  374. struct page *page;
  375. struct xen_netif_tx_request *tx; /* Last request on ring page */
  376. struct xen_netif_tx_request tx_local; /* Last request local copy*/
  377. unsigned int size;
  378. };
  379. static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
  380. unsigned int len, void *data)
  381. {
  382. struct xennet_gnttab_make_txreq *info = data;
  383. unsigned int id;
  384. struct xen_netif_tx_request *tx;
  385. grant_ref_t ref;
  386. /* convenient aliases */
  387. struct page *page = info->page;
  388. struct netfront_queue *queue = info->queue;
  389. struct sk_buff *skb = info->skb;
  390. id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
  391. tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
  392. ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
  393. WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
  394. gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
  395. gfn, GNTMAP_readonly);
  396. queue->tx_skbs[id] = skb;
  397. queue->grant_tx_page[id] = page;
  398. queue->grant_tx_ref[id] = ref;
  399. info->tx_local.id = id;
  400. info->tx_local.gref = ref;
  401. info->tx_local.offset = offset;
  402. info->tx_local.size = len;
  403. info->tx_local.flags = 0;
  404. *tx = info->tx_local;
  405. /*
  406. * Put the request in the pending queue, it will be set to be pending
  407. * when the producer index is about to be raised.
  408. */
  409. add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
  410. info->tx = tx;
  411. info->size += info->tx_local.size;
  412. }
  413. static struct xen_netif_tx_request *xennet_make_first_txreq(
  414. struct xennet_gnttab_make_txreq *info,
  415. unsigned int offset, unsigned int len)
  416. {
  417. info->size = 0;
  418. gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
  419. return info->tx;
  420. }
  421. static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
  422. unsigned int len, void *data)
  423. {
  424. struct xennet_gnttab_make_txreq *info = data;
  425. info->tx->flags |= XEN_NETTXF_more_data;
  426. skb_get(info->skb);
  427. xennet_tx_setup_grant(gfn, offset, len, data);
  428. }
  429. static void xennet_make_txreqs(
  430. struct xennet_gnttab_make_txreq *info,
  431. struct page *page,
  432. unsigned int offset, unsigned int len)
  433. {
  434. /* Skip unused frames from start of page */
  435. page += offset >> PAGE_SHIFT;
  436. offset &= ~PAGE_MASK;
  437. while (len) {
  438. info->page = page;
  439. info->size = 0;
  440. gnttab_foreach_grant_in_range(page, offset, len,
  441. xennet_make_one_txreq,
  442. info);
  443. page++;
  444. offset = 0;
  445. len -= info->size;
  446. }
  447. }
  448. /*
  449. * Count how many ring slots are required to send this skb. Each frag
  450. * might be a compound page.
  451. */
  452. static int xennet_count_skb_slots(struct sk_buff *skb)
  453. {
  454. int i, frags = skb_shinfo(skb)->nr_frags;
  455. int slots;
  456. slots = gnttab_count_grant(offset_in_page(skb->data),
  457. skb_headlen(skb));
  458. for (i = 0; i < frags; i++) {
  459. skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  460. unsigned long size = skb_frag_size(frag);
  461. unsigned long offset = skb_frag_off(frag);
  462. /* Skip unused frames from start of page */
  463. offset &= ~PAGE_MASK;
  464. slots += gnttab_count_grant(offset, size);
  465. }
  466. return slots;
  467. }
  468. static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
  469. struct net_device *sb_dev)
  470. {
  471. unsigned int num_queues = dev->real_num_tx_queues;
  472. u32 hash;
  473. u16 queue_idx;
  474. /* First, check if there is only one queue */
  475. if (num_queues == 1) {
  476. queue_idx = 0;
  477. } else {
  478. hash = skb_get_hash(skb);
  479. queue_idx = hash % num_queues;
  480. }
  481. return queue_idx;
  482. }
  483. static void xennet_mark_tx_pending(struct netfront_queue *queue)
  484. {
  485. unsigned int i;
  486. while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
  487. TX_LINK_NONE)
  488. queue->tx_link[i] = TX_PENDING;
  489. }
  490. static int xennet_xdp_xmit_one(struct net_device *dev,
  491. struct netfront_queue *queue,
  492. struct xdp_frame *xdpf)
  493. {
  494. struct netfront_info *np = netdev_priv(dev);
  495. struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
  496. struct xennet_gnttab_make_txreq info = {
  497. .queue = queue,
  498. .skb = NULL,
  499. .page = virt_to_page(xdpf->data),
  500. };
  501. int notify;
  502. xennet_make_first_txreq(&info,
  503. offset_in_page(xdpf->data),
  504. xdpf->len);
  505. xennet_mark_tx_pending(queue);
  506. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
  507. if (notify)
  508. notify_remote_via_irq(queue->tx_irq);
  509. u64_stats_update_begin(&tx_stats->syncp);
  510. tx_stats->bytes += xdpf->len;
  511. tx_stats->packets++;
  512. u64_stats_update_end(&tx_stats->syncp);
  513. xennet_tx_buf_gc(queue);
  514. return 0;
  515. }
  516. static int xennet_xdp_xmit(struct net_device *dev, int n,
  517. struct xdp_frame **frames, u32 flags)
  518. {
  519. unsigned int num_queues = dev->real_num_tx_queues;
  520. struct netfront_info *np = netdev_priv(dev);
  521. struct netfront_queue *queue = NULL;
  522. unsigned long irq_flags;
  523. int nxmit = 0;
  524. int i;
  525. if (unlikely(np->broken))
  526. return -ENODEV;
  527. if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
  528. return -EINVAL;
  529. queue = &np->queues[smp_processor_id() % num_queues];
  530. spin_lock_irqsave(&queue->tx_lock, irq_flags);
  531. for (i = 0; i < n; i++) {
  532. struct xdp_frame *xdpf = frames[i];
  533. if (!xdpf)
  534. continue;
  535. if (xennet_xdp_xmit_one(dev, queue, xdpf))
  536. break;
  537. nxmit++;
  538. }
  539. spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
  540. return nxmit;
  541. }
  542. static struct sk_buff *bounce_skb(const struct sk_buff *skb)
  543. {
  544. unsigned int headerlen = skb_headroom(skb);
  545. /* Align size to allocate full pages and avoid contiguous data leaks */
  546. unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
  547. XEN_PAGE_SIZE);
  548. struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
  549. if (!n)
  550. return NULL;
  551. if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
  552. WARN_ONCE(1, "misaligned skb allocated\n");
  553. kfree_skb(n);
  554. return NULL;
  555. }
  556. /* Set the data pointer */
  557. skb_reserve(n, headerlen);
  558. /* Set the tail pointer and length */
  559. skb_put(n, skb->len);
  560. BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
  561. skb_copy_header(n, skb);
  562. return n;
  563. }
  564. #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
  565. static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
  566. {
  567. struct netfront_info *np = netdev_priv(dev);
  568. struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
  569. struct xen_netif_tx_request *first_tx;
  570. unsigned int i;
  571. int notify;
  572. int slots;
  573. struct page *page;
  574. unsigned int offset;
  575. unsigned int len;
  576. unsigned long flags;
  577. struct netfront_queue *queue = NULL;
  578. struct xennet_gnttab_make_txreq info = { };
  579. unsigned int num_queues = dev->real_num_tx_queues;
  580. u16 queue_index;
  581. struct sk_buff *nskb;
  582. /* Drop the packet if no queues are set up */
  583. if (num_queues < 1)
  584. goto drop;
  585. if (unlikely(np->broken))
  586. goto drop;
  587. /* Determine which queue to transmit this SKB on */
  588. queue_index = skb_get_queue_mapping(skb);
  589. queue = &np->queues[queue_index];
  590. /* If skb->len is too big for wire format, drop skb and alert
  591. * user about misconfiguration.
  592. */
  593. if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
  594. net_alert_ratelimited(
  595. "xennet: skb->len = %u, too big for wire format\n",
  596. skb->len);
  597. goto drop;
  598. }
  599. slots = xennet_count_skb_slots(skb);
  600. if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
  601. net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
  602. slots, skb->len);
  603. if (skb_linearize(skb))
  604. goto drop;
  605. }
  606. page = virt_to_page(skb->data);
  607. offset = offset_in_page(skb->data);
  608. /* The first req should be at least ETH_HLEN size or the packet will be
  609. * dropped by netback.
  610. *
  611. * If the backend is not trusted bounce all data to zeroed pages to
  612. * avoid exposing contiguous data on the granted page not belonging to
  613. * the skb.
  614. */
  615. if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
  616. nskb = bounce_skb(skb);
  617. if (!nskb)
  618. goto drop;
  619. dev_consume_skb_any(skb);
  620. skb = nskb;
  621. page = virt_to_page(skb->data);
  622. offset = offset_in_page(skb->data);
  623. }
  624. len = skb_headlen(skb);
  625. spin_lock_irqsave(&queue->tx_lock, flags);
  626. if (unlikely(!netif_carrier_ok(dev) ||
  627. (slots > 1 && !xennet_can_sg(dev)) ||
  628. netif_needs_gso(skb, netif_skb_features(skb)))) {
  629. spin_unlock_irqrestore(&queue->tx_lock, flags);
  630. goto drop;
  631. }
  632. /* First request for the linear area. */
  633. info.queue = queue;
  634. info.skb = skb;
  635. info.page = page;
  636. first_tx = xennet_make_first_txreq(&info, offset, len);
  637. offset += info.tx_local.size;
  638. if (offset == PAGE_SIZE) {
  639. page++;
  640. offset = 0;
  641. }
  642. len -= info.tx_local.size;
  643. if (skb->ip_summed == CHECKSUM_PARTIAL)
  644. /* local packet? */
  645. first_tx->flags |= XEN_NETTXF_csum_blank |
  646. XEN_NETTXF_data_validated;
  647. else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
  648. /* remote but checksummed. */
  649. first_tx->flags |= XEN_NETTXF_data_validated;
  650. /* Optional extra info after the first request. */
  651. if (skb_shinfo(skb)->gso_size) {
  652. struct xen_netif_extra_info *gso;
  653. gso = (struct xen_netif_extra_info *)
  654. RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
  655. first_tx->flags |= XEN_NETTXF_extra_info;
  656. gso->u.gso.size = skb_shinfo(skb)->gso_size;
  657. gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
  658. XEN_NETIF_GSO_TYPE_TCPV6 :
  659. XEN_NETIF_GSO_TYPE_TCPV4;
  660. gso->u.gso.pad = 0;
  661. gso->u.gso.features = 0;
  662. gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
  663. gso->flags = 0;
  664. }
  665. /* Requests for the rest of the linear area. */
  666. xennet_make_txreqs(&info, page, offset, len);
  667. /* Requests for all the frags. */
  668. for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  669. skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  670. xennet_make_txreqs(&info, skb_frag_page(frag),
  671. skb_frag_off(frag),
  672. skb_frag_size(frag));
  673. }
  674. /* First request has the packet length. */
  675. first_tx->size = skb->len;
  676. /* timestamp packet in software */
  677. skb_tx_timestamp(skb);
  678. xennet_mark_tx_pending(queue);
  679. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
  680. if (notify)
  681. notify_remote_via_irq(queue->tx_irq);
  682. u64_stats_update_begin(&tx_stats->syncp);
  683. tx_stats->bytes += skb->len;
  684. tx_stats->packets++;
  685. u64_stats_update_end(&tx_stats->syncp);
  686. /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
  687. xennet_tx_buf_gc(queue);
  688. if (!netfront_tx_slot_available(queue))
  689. netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
  690. spin_unlock_irqrestore(&queue->tx_lock, flags);
  691. return NETDEV_TX_OK;
  692. drop:
  693. dev->stats.tx_dropped++;
  694. dev_kfree_skb_any(skb);
  695. return NETDEV_TX_OK;
  696. }
  697. static int xennet_close(struct net_device *dev)
  698. {
  699. struct netfront_info *np = netdev_priv(dev);
  700. unsigned int num_queues = dev->real_num_tx_queues;
  701. unsigned int i;
  702. struct netfront_queue *queue;
  703. netif_tx_stop_all_queues(np->netdev);
  704. for (i = 0; i < num_queues; ++i) {
  705. queue = &np->queues[i];
  706. napi_disable(&queue->napi);
  707. }
  708. return 0;
  709. }
  710. static void xennet_destroy_queues(struct netfront_info *info)
  711. {
  712. unsigned int i;
  713. for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
  714. struct netfront_queue *queue = &info->queues[i];
  715. if (netif_running(info->netdev))
  716. napi_disable(&queue->napi);
  717. netif_napi_del(&queue->napi);
  718. }
  719. kfree(info->queues);
  720. info->queues = NULL;
  721. }
  722. static void xennet_uninit(struct net_device *dev)
  723. {
  724. struct netfront_info *np = netdev_priv(dev);
  725. xennet_destroy_queues(np);
  726. }
  727. static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
  728. {
  729. unsigned long flags;
  730. spin_lock_irqsave(&queue->rx_cons_lock, flags);
  731. queue->rx.rsp_cons = val;
  732. queue->rx_rsp_unconsumed = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
  733. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  734. }
  735. static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
  736. grant_ref_t ref)
  737. {
  738. int new = xennet_rxidx(queue->rx.req_prod_pvt);
  739. BUG_ON(queue->rx_skbs[new]);
  740. queue->rx_skbs[new] = skb;
  741. queue->grant_rx_ref[new] = ref;
  742. RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
  743. RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
  744. queue->rx.req_prod_pvt++;
  745. }
  746. static int xennet_get_extras(struct netfront_queue *queue,
  747. struct xen_netif_extra_info *extras,
  748. RING_IDX rp)
  749. {
  750. struct xen_netif_extra_info extra;
  751. struct device *dev = &queue->info->netdev->dev;
  752. RING_IDX cons = queue->rx.rsp_cons;
  753. int err = 0;
  754. do {
  755. struct sk_buff *skb;
  756. grant_ref_t ref;
  757. if (unlikely(cons + 1 == rp)) {
  758. if (net_ratelimit())
  759. dev_warn(dev, "Missing extra info\n");
  760. err = -EBADR;
  761. break;
  762. }
  763. RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
  764. if (unlikely(!extra.type ||
  765. extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
  766. if (net_ratelimit())
  767. dev_warn(dev, "Invalid extra type: %d\n",
  768. extra.type);
  769. err = -EINVAL;
  770. } else {
  771. extras[extra.type - 1] = extra;
  772. }
  773. skb = xennet_get_rx_skb(queue, cons);
  774. ref = xennet_get_rx_ref(queue, cons);
  775. xennet_move_rx_slot(queue, skb, ref);
  776. } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
  777. xennet_set_rx_rsp_cons(queue, cons);
  778. return err;
  779. }
  780. static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
  781. struct xen_netif_rx_response *rx, struct bpf_prog *prog,
  782. struct xdp_buff *xdp, bool *need_xdp_flush)
  783. {
  784. struct xdp_frame *xdpf;
  785. u32 len = rx->status;
  786. u32 act;
  787. int err;
  788. xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
  789. &queue->xdp_rxq);
  790. xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
  791. len, false);
  792. act = bpf_prog_run_xdp(prog, xdp);
  793. switch (act) {
  794. case XDP_TX:
  795. get_page(pdata);
  796. xdpf = xdp_convert_buff_to_frame(xdp);
  797. err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
  798. if (unlikely(!err))
  799. xdp_return_frame_rx_napi(xdpf);
  800. else if (unlikely(err < 0))
  801. trace_xdp_exception(queue->info->netdev, prog, act);
  802. break;
  803. case XDP_REDIRECT:
  804. get_page(pdata);
  805. err = xdp_do_redirect(queue->info->netdev, xdp, prog);
  806. *need_xdp_flush = true;
  807. if (unlikely(err))
  808. trace_xdp_exception(queue->info->netdev, prog, act);
  809. break;
  810. case XDP_PASS:
  811. case XDP_DROP:
  812. break;
  813. case XDP_ABORTED:
  814. trace_xdp_exception(queue->info->netdev, prog, act);
  815. break;
  816. default:
  817. bpf_warn_invalid_xdp_action(queue->info->netdev, prog, act);
  818. }
  819. return act;
  820. }
  821. static int xennet_get_responses(struct netfront_queue *queue,
  822. struct netfront_rx_info *rinfo, RING_IDX rp,
  823. struct sk_buff_head *list,
  824. bool *need_xdp_flush)
  825. {
  826. struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
  827. int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
  828. RING_IDX cons = queue->rx.rsp_cons;
  829. struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
  830. struct xen_netif_extra_info *extras = rinfo->extras;
  831. grant_ref_t ref = xennet_get_rx_ref(queue, cons);
  832. struct device *dev = &queue->info->netdev->dev;
  833. struct bpf_prog *xdp_prog;
  834. struct xdp_buff xdp;
  835. int slots = 1;
  836. int err = 0;
  837. u32 verdict;
  838. if (rx->flags & XEN_NETRXF_extra_info) {
  839. err = xennet_get_extras(queue, extras, rp);
  840. if (!err) {
  841. if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) {
  842. struct xen_netif_extra_info *xdp;
  843. xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1];
  844. rx->offset = xdp->u.xdp.headroom;
  845. }
  846. }
  847. cons = queue->rx.rsp_cons;
  848. }
  849. for (;;) {
  850. /*
  851. * This definitely indicates a bug, either in this driver or in
  852. * the backend driver. In future this should flag the bad
  853. * situation to the system controller to reboot the backend.
  854. */
  855. if (ref == INVALID_GRANT_REF) {
  856. if (net_ratelimit())
  857. dev_warn(dev, "Bad rx response id %d.\n",
  858. rx->id);
  859. err = -EINVAL;
  860. goto next;
  861. }
  862. if (unlikely(rx->status < 0 ||
  863. rx->offset + rx->status > XEN_PAGE_SIZE)) {
  864. if (net_ratelimit())
  865. dev_warn(dev, "rx->offset: %u, size: %d\n",
  866. rx->offset, rx->status);
  867. xennet_move_rx_slot(queue, skb, ref);
  868. err = -EINVAL;
  869. goto next;
  870. }
  871. if (!gnttab_end_foreign_access_ref(ref)) {
  872. dev_alert(dev,
  873. "Grant still in use by backend domain\n");
  874. queue->info->broken = true;
  875. dev_alert(dev, "Disabled for further use\n");
  876. return -EINVAL;
  877. }
  878. gnttab_release_grant_reference(&queue->gref_rx_head, ref);
  879. rcu_read_lock();
  880. xdp_prog = rcu_dereference(queue->xdp_prog);
  881. if (xdp_prog) {
  882. if (!(rx->flags & XEN_NETRXF_more_data)) {
  883. /* currently only a single page contains data */
  884. verdict = xennet_run_xdp(queue,
  885. skb_frag_page(&skb_shinfo(skb)->frags[0]),
  886. rx, xdp_prog, &xdp, need_xdp_flush);
  887. if (verdict != XDP_PASS)
  888. err = -EINVAL;
  889. } else {
  890. /* drop the frame */
  891. err = -EINVAL;
  892. }
  893. }
  894. rcu_read_unlock();
  895. __skb_queue_tail(list, skb);
  896. next:
  897. if (!(rx->flags & XEN_NETRXF_more_data))
  898. break;
  899. if (cons + slots == rp) {
  900. if (net_ratelimit())
  901. dev_warn(dev, "Need more slots\n");
  902. err = -ENOENT;
  903. break;
  904. }
  905. RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
  906. rx = &rx_local;
  907. skb = xennet_get_rx_skb(queue, cons + slots);
  908. ref = xennet_get_rx_ref(queue, cons + slots);
  909. slots++;
  910. }
  911. if (unlikely(slots > max)) {
  912. if (net_ratelimit())
  913. dev_warn(dev, "Too many slots\n");
  914. err = -E2BIG;
  915. }
  916. if (unlikely(err))
  917. xennet_set_rx_rsp_cons(queue, cons + slots);
  918. return err;
  919. }
  920. static int xennet_set_skb_gso(struct sk_buff *skb,
  921. struct xen_netif_extra_info *gso)
  922. {
  923. if (!gso->u.gso.size) {
  924. if (net_ratelimit())
  925. pr_warn("GSO size must not be zero\n");
  926. return -EINVAL;
  927. }
  928. if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
  929. gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
  930. if (net_ratelimit())
  931. pr_warn("Bad GSO type %d\n", gso->u.gso.type);
  932. return -EINVAL;
  933. }
  934. skb_shinfo(skb)->gso_size = gso->u.gso.size;
  935. skb_shinfo(skb)->gso_type =
  936. (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
  937. SKB_GSO_TCPV4 :
  938. SKB_GSO_TCPV6;
  939. /* Header must be checked, and gso_segs computed. */
  940. skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
  941. skb_shinfo(skb)->gso_segs = 0;
  942. return 0;
  943. }
  944. static int xennet_fill_frags(struct netfront_queue *queue,
  945. struct sk_buff *skb,
  946. struct sk_buff_head *list)
  947. {
  948. RING_IDX cons = queue->rx.rsp_cons;
  949. struct sk_buff *nskb;
  950. while ((nskb = __skb_dequeue(list))) {
  951. struct xen_netif_rx_response rx;
  952. skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
  953. RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
  954. if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
  955. unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
  956. BUG_ON(pull_to < skb_headlen(skb));
  957. __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
  958. }
  959. if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
  960. xennet_set_rx_rsp_cons(queue,
  961. ++cons + skb_queue_len(list));
  962. kfree_skb(nskb);
  963. return -ENOENT;
  964. }
  965. skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
  966. skb_frag_page(nfrag),
  967. rx.offset, rx.status, PAGE_SIZE);
  968. skb_shinfo(nskb)->nr_frags = 0;
  969. kfree_skb(nskb);
  970. }
  971. xennet_set_rx_rsp_cons(queue, cons);
  972. return 0;
  973. }
  974. static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
  975. {
  976. bool recalculate_partial_csum = false;
  977. /*
  978. * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
  979. * peers can fail to set NETRXF_csum_blank when sending a GSO
  980. * frame. In this case force the SKB to CHECKSUM_PARTIAL and
  981. * recalculate the partial checksum.
  982. */
  983. if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
  984. struct netfront_info *np = netdev_priv(dev);
  985. atomic_inc(&np->rx_gso_checksum_fixup);
  986. skb->ip_summed = CHECKSUM_PARTIAL;
  987. recalculate_partial_csum = true;
  988. }
  989. /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
  990. if (skb->ip_summed != CHECKSUM_PARTIAL)
  991. return 0;
  992. return skb_checksum_setup(skb, recalculate_partial_csum);
  993. }
  994. static int handle_incoming_queue(struct netfront_queue *queue,
  995. struct sk_buff_head *rxq)
  996. {
  997. struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
  998. int packets_dropped = 0;
  999. struct sk_buff *skb;
  1000. while ((skb = __skb_dequeue(rxq)) != NULL) {
  1001. int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
  1002. if (pull_to > skb_headlen(skb))
  1003. __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
  1004. /* Ethernet work: Delayed to here as it peeks the header. */
  1005. skb->protocol = eth_type_trans(skb, queue->info->netdev);
  1006. skb_reset_network_header(skb);
  1007. if (checksum_setup(queue->info->netdev, skb)) {
  1008. kfree_skb(skb);
  1009. packets_dropped++;
  1010. queue->info->netdev->stats.rx_errors++;
  1011. continue;
  1012. }
  1013. u64_stats_update_begin(&rx_stats->syncp);
  1014. rx_stats->packets++;
  1015. rx_stats->bytes += skb->len;
  1016. u64_stats_update_end(&rx_stats->syncp);
  1017. /* Pass it up. */
  1018. napi_gro_receive(&queue->napi, skb);
  1019. }
  1020. return packets_dropped;
  1021. }
  1022. static int xennet_poll(struct napi_struct *napi, int budget)
  1023. {
  1024. struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
  1025. struct net_device *dev = queue->info->netdev;
  1026. struct sk_buff *skb;
  1027. struct netfront_rx_info rinfo;
  1028. struct xen_netif_rx_response *rx = &rinfo.rx;
  1029. struct xen_netif_extra_info *extras = rinfo.extras;
  1030. RING_IDX i, rp;
  1031. int work_done;
  1032. struct sk_buff_head rxq;
  1033. struct sk_buff_head errq;
  1034. struct sk_buff_head tmpq;
  1035. int err;
  1036. bool need_xdp_flush = false;
  1037. spin_lock(&queue->rx_lock);
  1038. skb_queue_head_init(&rxq);
  1039. skb_queue_head_init(&errq);
  1040. skb_queue_head_init(&tmpq);
  1041. rp = queue->rx.sring->rsp_prod;
  1042. if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
  1043. dev_alert(&dev->dev, "Illegal number of responses %u\n",
  1044. rp - queue->rx.rsp_cons);
  1045. queue->info->broken = true;
  1046. spin_unlock(&queue->rx_lock);
  1047. return 0;
  1048. }
  1049. rmb(); /* Ensure we see queued responses up to 'rp'. */
  1050. i = queue->rx.rsp_cons;
  1051. work_done = 0;
  1052. while ((i != rp) && (work_done < budget)) {
  1053. RING_COPY_RESPONSE(&queue->rx, i, rx);
  1054. memset(extras, 0, sizeof(rinfo.extras));
  1055. err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
  1056. &need_xdp_flush);
  1057. if (unlikely(err)) {
  1058. if (queue->info->broken) {
  1059. spin_unlock(&queue->rx_lock);
  1060. return 0;
  1061. }
  1062. err:
  1063. while ((skb = __skb_dequeue(&tmpq)))
  1064. __skb_queue_tail(&errq, skb);
  1065. dev->stats.rx_errors++;
  1066. i = queue->rx.rsp_cons;
  1067. continue;
  1068. }
  1069. skb = __skb_dequeue(&tmpq);
  1070. if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
  1071. struct xen_netif_extra_info *gso;
  1072. gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
  1073. if (unlikely(xennet_set_skb_gso(skb, gso))) {
  1074. __skb_queue_head(&tmpq, skb);
  1075. xennet_set_rx_rsp_cons(queue,
  1076. queue->rx.rsp_cons +
  1077. skb_queue_len(&tmpq));
  1078. goto err;
  1079. }
  1080. }
  1081. NETFRONT_SKB_CB(skb)->pull_to = rx->status;
  1082. if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
  1083. NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
  1084. skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset);
  1085. skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
  1086. skb->data_len = rx->status;
  1087. skb->len += rx->status;
  1088. if (unlikely(xennet_fill_frags(queue, skb, &tmpq)))
  1089. goto err;
  1090. if (rx->flags & XEN_NETRXF_csum_blank)
  1091. skb->ip_summed = CHECKSUM_PARTIAL;
  1092. else if (rx->flags & XEN_NETRXF_data_validated)
  1093. skb->ip_summed = CHECKSUM_UNNECESSARY;
  1094. __skb_queue_tail(&rxq, skb);
  1095. i = queue->rx.rsp_cons + 1;
  1096. xennet_set_rx_rsp_cons(queue, i);
  1097. work_done++;
  1098. }
  1099. if (need_xdp_flush)
  1100. xdp_do_flush();
  1101. __skb_queue_purge(&errq);
  1102. work_done -= handle_incoming_queue(queue, &rxq);
  1103. xennet_alloc_rx_buffers(queue);
  1104. if (work_done < budget) {
  1105. int more_to_do = 0;
  1106. napi_complete_done(napi, work_done);
  1107. RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
  1108. if (more_to_do)
  1109. napi_schedule(napi);
  1110. }
  1111. spin_unlock(&queue->rx_lock);
  1112. return work_done;
  1113. }
  1114. static int xennet_change_mtu(struct net_device *dev, int mtu)
  1115. {
  1116. int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
  1117. if (mtu > max)
  1118. return -EINVAL;
  1119. dev->mtu = mtu;
  1120. return 0;
  1121. }
  1122. static void xennet_get_stats64(struct net_device *dev,
  1123. struct rtnl_link_stats64 *tot)
  1124. {
  1125. struct netfront_info *np = netdev_priv(dev);
  1126. int cpu;
  1127. for_each_possible_cpu(cpu) {
  1128. struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
  1129. struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
  1130. u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
  1131. unsigned int start;
  1132. do {
  1133. start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
  1134. tx_packets = tx_stats->packets;
  1135. tx_bytes = tx_stats->bytes;
  1136. } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
  1137. do {
  1138. start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
  1139. rx_packets = rx_stats->packets;
  1140. rx_bytes = rx_stats->bytes;
  1141. } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
  1142. tot->rx_packets += rx_packets;
  1143. tot->tx_packets += tx_packets;
  1144. tot->rx_bytes += rx_bytes;
  1145. tot->tx_bytes += tx_bytes;
  1146. }
  1147. tot->rx_errors = dev->stats.rx_errors;
  1148. tot->tx_dropped = dev->stats.tx_dropped;
  1149. }
  1150. static void xennet_release_tx_bufs(struct netfront_queue *queue)
  1151. {
  1152. struct sk_buff *skb;
  1153. int i;
  1154. for (i = 0; i < NET_TX_RING_SIZE; i++) {
  1155. /* Skip over entries which are actually freelist references */
  1156. if (!queue->tx_skbs[i])
  1157. continue;
  1158. skb = queue->tx_skbs[i];
  1159. queue->tx_skbs[i] = NULL;
  1160. get_page(queue->grant_tx_page[i]);
  1161. gnttab_end_foreign_access(queue->grant_tx_ref[i],
  1162. queue->grant_tx_page[i]);
  1163. queue->grant_tx_page[i] = NULL;
  1164. queue->grant_tx_ref[i] = INVALID_GRANT_REF;
  1165. add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
  1166. dev_kfree_skb_irq(skb);
  1167. }
  1168. }
  1169. static void xennet_release_rx_bufs(struct netfront_queue *queue)
  1170. {
  1171. int id, ref;
  1172. spin_lock_bh(&queue->rx_lock);
  1173. for (id = 0; id < NET_RX_RING_SIZE; id++) {
  1174. struct sk_buff *skb;
  1175. struct page *page;
  1176. skb = queue->rx_skbs[id];
  1177. if (!skb)
  1178. continue;
  1179. ref = queue->grant_rx_ref[id];
  1180. if (ref == INVALID_GRANT_REF)
  1181. continue;
  1182. page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
  1183. /* gnttab_end_foreign_access() needs a page ref until
  1184. * foreign access is ended (which may be deferred).
  1185. */
  1186. get_page(page);
  1187. gnttab_end_foreign_access(ref, page);
  1188. queue->grant_rx_ref[id] = INVALID_GRANT_REF;
  1189. kfree_skb(skb);
  1190. }
  1191. spin_unlock_bh(&queue->rx_lock);
  1192. }
  1193. static netdev_features_t xennet_fix_features(struct net_device *dev,
  1194. netdev_features_t features)
  1195. {
  1196. struct netfront_info *np = netdev_priv(dev);
  1197. if (features & NETIF_F_SG &&
  1198. !xenbus_read_unsigned(np->xbdev->otherend, "feature-sg", 0))
  1199. features &= ~NETIF_F_SG;
  1200. if (features & NETIF_F_IPV6_CSUM &&
  1201. !xenbus_read_unsigned(np->xbdev->otherend,
  1202. "feature-ipv6-csum-offload", 0))
  1203. features &= ~NETIF_F_IPV6_CSUM;
  1204. if (features & NETIF_F_TSO &&
  1205. !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv4", 0))
  1206. features &= ~NETIF_F_TSO;
  1207. if (features & NETIF_F_TSO6 &&
  1208. !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv6", 0))
  1209. features &= ~NETIF_F_TSO6;
  1210. return features;
  1211. }
  1212. static int xennet_set_features(struct net_device *dev,
  1213. netdev_features_t features)
  1214. {
  1215. if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
  1216. netdev_info(dev, "Reducing MTU because no SG offload");
  1217. dev->mtu = ETH_DATA_LEN;
  1218. }
  1219. return 0;
  1220. }
  1221. static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
  1222. {
  1223. unsigned long flags;
  1224. if (unlikely(queue->info->broken))
  1225. return false;
  1226. spin_lock_irqsave(&queue->tx_lock, flags);
  1227. if (xennet_tx_buf_gc(queue))
  1228. *eoi = 0;
  1229. spin_unlock_irqrestore(&queue->tx_lock, flags);
  1230. return true;
  1231. }
  1232. static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
  1233. {
  1234. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1235. if (likely(xennet_handle_tx(dev_id, &eoiflag)))
  1236. xen_irq_lateeoi(irq, eoiflag);
  1237. return IRQ_HANDLED;
  1238. }
  1239. static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
  1240. {
  1241. unsigned int work_queued;
  1242. unsigned long flags;
  1243. if (unlikely(queue->info->broken))
  1244. return false;
  1245. spin_lock_irqsave(&queue->rx_cons_lock, flags);
  1246. work_queued = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
  1247. if (work_queued > queue->rx_rsp_unconsumed) {
  1248. queue->rx_rsp_unconsumed = work_queued;
  1249. *eoi = 0;
  1250. } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
  1251. const struct device *dev = &queue->info->netdev->dev;
  1252. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  1253. dev_alert(dev, "RX producer index going backwards\n");
  1254. dev_alert(dev, "Disabled for further use\n");
  1255. queue->info->broken = true;
  1256. return false;
  1257. }
  1258. spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
  1259. if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
  1260. napi_schedule(&queue->napi);
  1261. return true;
  1262. }
  1263. static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
  1264. {
  1265. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1266. if (likely(xennet_handle_rx(dev_id, &eoiflag)))
  1267. xen_irq_lateeoi(irq, eoiflag);
  1268. return IRQ_HANDLED;
  1269. }
  1270. static irqreturn_t xennet_interrupt(int irq, void *dev_id)
  1271. {
  1272. unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
  1273. if (xennet_handle_tx(dev_id, &eoiflag) &&
  1274. xennet_handle_rx(dev_id, &eoiflag))
  1275. xen_irq_lateeoi(irq, eoiflag);
  1276. return IRQ_HANDLED;
  1277. }
  1278. #ifdef CONFIG_NET_POLL_CONTROLLER
  1279. static void xennet_poll_controller(struct net_device *dev)
  1280. {
  1281. /* Poll each queue */
  1282. struct netfront_info *info = netdev_priv(dev);
  1283. unsigned int num_queues = dev->real_num_tx_queues;
  1284. unsigned int i;
  1285. if (info->broken)
  1286. return;
  1287. for (i = 0; i < num_queues; ++i)
  1288. xennet_interrupt(0, &info->queues[i]);
  1289. }
  1290. #endif
  1291. #define NETBACK_XDP_HEADROOM_DISABLE 0
  1292. #define NETBACK_XDP_HEADROOM_ENABLE 1
  1293. static int talk_to_netback_xdp(struct netfront_info *np, int xdp)
  1294. {
  1295. int err;
  1296. unsigned short headroom;
  1297. headroom = xdp ? XDP_PACKET_HEADROOM : 0;
  1298. err = xenbus_printf(XBT_NIL, np->xbdev->nodename,
  1299. "xdp-headroom", "%hu",
  1300. headroom);
  1301. if (err)
  1302. pr_warn("Error writing xdp-headroom\n");
  1303. return err;
  1304. }
  1305. static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
  1306. struct netlink_ext_ack *extack)
  1307. {
  1308. unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
  1309. struct netfront_info *np = netdev_priv(dev);
  1310. struct bpf_prog *old_prog;
  1311. unsigned int i, err;
  1312. if (dev->mtu > max_mtu) {
  1313. netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu);
  1314. return -EINVAL;
  1315. }
  1316. if (!np->netback_has_xdp_headroom)
  1317. return 0;
  1318. xenbus_switch_state(np->xbdev, XenbusStateReconfiguring);
  1319. err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE :
  1320. NETBACK_XDP_HEADROOM_DISABLE);
  1321. if (err)
  1322. return err;
  1323. /* avoid the race with XDP headroom adjustment */
  1324. wait_event(module_wq,
  1325. xenbus_read_driver_state(np->xbdev->otherend) ==
  1326. XenbusStateReconfigured);
  1327. np->netfront_xdp_enabled = true;
  1328. old_prog = rtnl_dereference(np->queues[0].xdp_prog);
  1329. if (prog)
  1330. bpf_prog_add(prog, dev->real_num_tx_queues);
  1331. for (i = 0; i < dev->real_num_tx_queues; ++i)
  1332. rcu_assign_pointer(np->queues[i].xdp_prog, prog);
  1333. if (old_prog)
  1334. for (i = 0; i < dev->real_num_tx_queues; ++i)
  1335. bpf_prog_put(old_prog);
  1336. xenbus_switch_state(np->xbdev, XenbusStateConnected);
  1337. return 0;
  1338. }
  1339. static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  1340. {
  1341. struct netfront_info *np = netdev_priv(dev);
  1342. if (np->broken)
  1343. return -ENODEV;
  1344. switch (xdp->command) {
  1345. case XDP_SETUP_PROG:
  1346. return xennet_xdp_set(dev, xdp->prog, xdp->extack);
  1347. default:
  1348. return -EINVAL;
  1349. }
  1350. }
  1351. static const struct net_device_ops xennet_netdev_ops = {
  1352. .ndo_uninit = xennet_uninit,
  1353. .ndo_open = xennet_open,
  1354. .ndo_stop = xennet_close,
  1355. .ndo_start_xmit = xennet_start_xmit,
  1356. .ndo_change_mtu = xennet_change_mtu,
  1357. .ndo_get_stats64 = xennet_get_stats64,
  1358. .ndo_set_mac_address = eth_mac_addr,
  1359. .ndo_validate_addr = eth_validate_addr,
  1360. .ndo_fix_features = xennet_fix_features,
  1361. .ndo_set_features = xennet_set_features,
  1362. .ndo_select_queue = xennet_select_queue,
  1363. .ndo_bpf = xennet_xdp,
  1364. .ndo_xdp_xmit = xennet_xdp_xmit,
  1365. #ifdef CONFIG_NET_POLL_CONTROLLER
  1366. .ndo_poll_controller = xennet_poll_controller,
  1367. #endif
  1368. };
  1369. static void xennet_free_netdev(struct net_device *netdev)
  1370. {
  1371. struct netfront_info *np = netdev_priv(netdev);
  1372. free_percpu(np->rx_stats);
  1373. free_percpu(np->tx_stats);
  1374. free_netdev(netdev);
  1375. }
  1376. static struct net_device *xennet_create_dev(struct xenbus_device *dev)
  1377. {
  1378. int err;
  1379. struct net_device *netdev;
  1380. struct netfront_info *np;
  1381. netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
  1382. if (!netdev)
  1383. return ERR_PTR(-ENOMEM);
  1384. np = netdev_priv(netdev);
  1385. np->xbdev = dev;
  1386. np->queues = NULL;
  1387. err = -ENOMEM;
  1388. np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
  1389. if (np->rx_stats == NULL)
  1390. goto exit;
  1391. np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
  1392. if (np->tx_stats == NULL)
  1393. goto exit;
  1394. netdev->netdev_ops = &xennet_netdev_ops;
  1395. netdev->features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
  1396. NETIF_F_GSO_ROBUST;
  1397. netdev->hw_features = NETIF_F_SG |
  1398. NETIF_F_IPV6_CSUM |
  1399. NETIF_F_TSO | NETIF_F_TSO6;
  1400. /*
  1401. * Assume that all hw features are available for now. This set
  1402. * will be adjusted by the call to netdev_update_features() in
  1403. * xennet_connect() which is the earliest point where we can
  1404. * negotiate with the backend regarding supported features.
  1405. */
  1406. netdev->features |= netdev->hw_features;
  1407. netdev->ethtool_ops = &xennet_ethtool_ops;
  1408. netdev->min_mtu = ETH_MIN_MTU;
  1409. netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE;
  1410. SET_NETDEV_DEV(netdev, &dev->dev);
  1411. np->netdev = netdev;
  1412. np->netfront_xdp_enabled = false;
  1413. netif_carrier_off(netdev);
  1414. do {
  1415. xenbus_switch_state(dev, XenbusStateInitialising);
  1416. err = wait_event_timeout(module_wq,
  1417. xenbus_read_driver_state(dev->otherend) !=
  1418. XenbusStateClosed &&
  1419. xenbus_read_driver_state(dev->otherend) !=
  1420. XenbusStateUnknown, XENNET_TIMEOUT);
  1421. } while (!err);
  1422. return netdev;
  1423. exit:
  1424. xennet_free_netdev(netdev);
  1425. return ERR_PTR(err);
  1426. }
  1427. /*
  1428. * Entry point to this code when a new device is created. Allocate the basic
  1429. * structures and the ring buffers for communication with the backend, and
  1430. * inform the backend of the appropriate details for those.
  1431. */
  1432. static int netfront_probe(struct xenbus_device *dev,
  1433. const struct xenbus_device_id *id)
  1434. {
  1435. int err;
  1436. struct net_device *netdev;
  1437. struct netfront_info *info;
  1438. netdev = xennet_create_dev(dev);
  1439. if (IS_ERR(netdev)) {
  1440. err = PTR_ERR(netdev);
  1441. xenbus_dev_fatal(dev, err, "creating netdev");
  1442. return err;
  1443. }
  1444. info = netdev_priv(netdev);
  1445. dev_set_drvdata(&dev->dev, info);
  1446. #ifdef CONFIG_SYSFS
  1447. info->netdev->sysfs_groups[0] = &xennet_dev_group;
  1448. #endif
  1449. return 0;
  1450. }
  1451. static void xennet_end_access(int ref, void *page)
  1452. {
  1453. /* This frees the page as a side-effect */
  1454. if (ref != INVALID_GRANT_REF)
  1455. gnttab_end_foreign_access(ref, virt_to_page(page));
  1456. }
  1457. static void xennet_disconnect_backend(struct netfront_info *info)
  1458. {
  1459. unsigned int i = 0;
  1460. unsigned int num_queues = info->netdev->real_num_tx_queues;
  1461. netif_carrier_off(info->netdev);
  1462. for (i = 0; i < num_queues && info->queues; ++i) {
  1463. struct netfront_queue *queue = &info->queues[i];
  1464. del_timer_sync(&queue->rx_refill_timer);
  1465. if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
  1466. unbind_from_irqhandler(queue->tx_irq, queue);
  1467. if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
  1468. unbind_from_irqhandler(queue->tx_irq, queue);
  1469. unbind_from_irqhandler(queue->rx_irq, queue);
  1470. }
  1471. queue->tx_evtchn = queue->rx_evtchn = 0;
  1472. queue->tx_irq = queue->rx_irq = 0;
  1473. if (netif_running(info->netdev))
  1474. napi_synchronize(&queue->napi);
  1475. xennet_release_tx_bufs(queue);
  1476. xennet_release_rx_bufs(queue);
  1477. gnttab_free_grant_references(queue->gref_tx_head);
  1478. gnttab_free_grant_references(queue->gref_rx_head);
  1479. /* End access and free the pages */
  1480. xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
  1481. xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
  1482. queue->tx_ring_ref = INVALID_GRANT_REF;
  1483. queue->rx_ring_ref = INVALID_GRANT_REF;
  1484. queue->tx.sring = NULL;
  1485. queue->rx.sring = NULL;
  1486. page_pool_destroy(queue->page_pool);
  1487. }
  1488. }
  1489. /*
  1490. * We are reconnecting to the backend, due to a suspend/resume, or a backend
  1491. * driver restart. We tear down our netif structure and recreate it, but
  1492. * leave the device-layer structures intact so that this is transparent to the
  1493. * rest of the kernel.
  1494. */
  1495. static int netfront_resume(struct xenbus_device *dev)
  1496. {
  1497. struct netfront_info *info = dev_get_drvdata(&dev->dev);
  1498. dev_dbg(&dev->dev, "%s\n", dev->nodename);
  1499. netif_tx_lock_bh(info->netdev);
  1500. netif_device_detach(info->netdev);
  1501. netif_tx_unlock_bh(info->netdev);
  1502. xennet_disconnect_backend(info);
  1503. rtnl_lock();
  1504. if (info->queues)
  1505. xennet_destroy_queues(info);
  1506. rtnl_unlock();
  1507. return 0;
  1508. }
  1509. static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
  1510. {
  1511. char *s, *e, *macstr;
  1512. int i;
  1513. macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
  1514. if (IS_ERR(macstr))
  1515. return PTR_ERR(macstr);
  1516. for (i = 0; i < ETH_ALEN; i++) {
  1517. mac[i] = simple_strtoul(s, &e, 16);
  1518. if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
  1519. kfree(macstr);
  1520. return -ENOENT;
  1521. }
  1522. s = e+1;
  1523. }
  1524. kfree(macstr);
  1525. return 0;
  1526. }
  1527. static int setup_netfront_single(struct netfront_queue *queue)
  1528. {
  1529. int err;
  1530. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
  1531. if (err < 0)
  1532. goto fail;
  1533. err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
  1534. xennet_interrupt, 0,
  1535. queue->info->netdev->name,
  1536. queue);
  1537. if (err < 0)
  1538. goto bind_fail;
  1539. queue->rx_evtchn = queue->tx_evtchn;
  1540. queue->rx_irq = queue->tx_irq = err;
  1541. return 0;
  1542. bind_fail:
  1543. xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
  1544. queue->tx_evtchn = 0;
  1545. fail:
  1546. return err;
  1547. }
  1548. static int setup_netfront_split(struct netfront_queue *queue)
  1549. {
  1550. int err;
  1551. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
  1552. if (err < 0)
  1553. goto fail;
  1554. err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
  1555. if (err < 0)
  1556. goto alloc_rx_evtchn_fail;
  1557. snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
  1558. "%s-tx", queue->name);
  1559. err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
  1560. xennet_tx_interrupt, 0,
  1561. queue->tx_irq_name, queue);
  1562. if (err < 0)
  1563. goto bind_tx_fail;
  1564. queue->tx_irq = err;
  1565. snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
  1566. "%s-rx", queue->name);
  1567. err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
  1568. xennet_rx_interrupt, 0,
  1569. queue->rx_irq_name, queue);
  1570. if (err < 0)
  1571. goto bind_rx_fail;
  1572. queue->rx_irq = err;
  1573. return 0;
  1574. bind_rx_fail:
  1575. unbind_from_irqhandler(queue->tx_irq, queue);
  1576. queue->tx_irq = 0;
  1577. bind_tx_fail:
  1578. xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
  1579. queue->rx_evtchn = 0;
  1580. alloc_rx_evtchn_fail:
  1581. xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
  1582. queue->tx_evtchn = 0;
  1583. fail:
  1584. return err;
  1585. }
  1586. static int setup_netfront(struct xenbus_device *dev,
  1587. struct netfront_queue *queue, unsigned int feature_split_evtchn)
  1588. {
  1589. struct xen_netif_tx_sring *txs;
  1590. struct xen_netif_rx_sring *rxs;
  1591. int err;
  1592. queue->tx_ring_ref = INVALID_GRANT_REF;
  1593. queue->rx_ring_ref = INVALID_GRANT_REF;
  1594. queue->rx.sring = NULL;
  1595. queue->tx.sring = NULL;
  1596. err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&txs,
  1597. 1, &queue->tx_ring_ref);
  1598. if (err)
  1599. goto fail;
  1600. XEN_FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
  1601. err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&rxs,
  1602. 1, &queue->rx_ring_ref);
  1603. if (err)
  1604. goto fail;
  1605. XEN_FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
  1606. if (feature_split_evtchn)
  1607. err = setup_netfront_split(queue);
  1608. /* setup single event channel if
  1609. * a) feature-split-event-channels == 0
  1610. * b) feature-split-event-channels == 1 but failed to setup
  1611. */
  1612. if (!feature_split_evtchn || err)
  1613. err = setup_netfront_single(queue);
  1614. if (err)
  1615. goto fail;
  1616. return 0;
  1617. fail:
  1618. xenbus_teardown_ring((void **)&queue->rx.sring, 1, &queue->rx_ring_ref);
  1619. xenbus_teardown_ring((void **)&queue->tx.sring, 1, &queue->tx_ring_ref);
  1620. return err;
  1621. }
  1622. /* Queue-specific initialisation
  1623. * This used to be done in xennet_create_dev() but must now
  1624. * be run per-queue.
  1625. */
  1626. static int xennet_init_queue(struct netfront_queue *queue)
  1627. {
  1628. unsigned short i;
  1629. int err = 0;
  1630. char *devid;
  1631. spin_lock_init(&queue->tx_lock);
  1632. spin_lock_init(&queue->rx_lock);
  1633. spin_lock_init(&queue->rx_cons_lock);
  1634. timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
  1635. devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
  1636. snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
  1637. devid, queue->id);
  1638. /* Initialise tx_skb_freelist as a free chain containing every entry. */
  1639. queue->tx_skb_freelist = 0;
  1640. queue->tx_pend_queue = TX_LINK_NONE;
  1641. for (i = 0; i < NET_TX_RING_SIZE; i++) {
  1642. queue->tx_link[i] = i + 1;
  1643. queue->grant_tx_ref[i] = INVALID_GRANT_REF;
  1644. queue->grant_tx_page[i] = NULL;
  1645. }
  1646. queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
  1647. /* Clear out rx_skbs */
  1648. for (i = 0; i < NET_RX_RING_SIZE; i++) {
  1649. queue->rx_skbs[i] = NULL;
  1650. queue->grant_rx_ref[i] = INVALID_GRANT_REF;
  1651. }
  1652. /* A grant for every tx ring slot */
  1653. if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
  1654. &queue->gref_tx_head) < 0) {
  1655. pr_alert("can't alloc tx grant refs\n");
  1656. err = -ENOMEM;
  1657. goto exit;
  1658. }
  1659. /* A grant for every rx ring slot */
  1660. if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
  1661. &queue->gref_rx_head) < 0) {
  1662. pr_alert("can't alloc rx grant refs\n");
  1663. err = -ENOMEM;
  1664. goto exit_free_tx;
  1665. }
  1666. return 0;
  1667. exit_free_tx:
  1668. gnttab_free_grant_references(queue->gref_tx_head);
  1669. exit:
  1670. return err;
  1671. }
  1672. static int write_queue_xenstore_keys(struct netfront_queue *queue,
  1673. struct xenbus_transaction *xbt, int write_hierarchical)
  1674. {
  1675. /* Write the queue-specific keys into XenStore in the traditional
  1676. * way for a single queue, or in a queue subkeys for multiple
  1677. * queues.
  1678. */
  1679. struct xenbus_device *dev = queue->info->xbdev;
  1680. int err;
  1681. const char *message;
  1682. char *path;
  1683. size_t pathsize;
  1684. /* Choose the correct place to write the keys */
  1685. if (write_hierarchical) {
  1686. pathsize = strlen(dev->nodename) + 10;
  1687. path = kzalloc(pathsize, GFP_KERNEL);
  1688. if (!path) {
  1689. err = -ENOMEM;
  1690. message = "out of memory while writing ring references";
  1691. goto error;
  1692. }
  1693. snprintf(path, pathsize, "%s/queue-%u",
  1694. dev->nodename, queue->id);
  1695. } else {
  1696. path = (char *)dev->nodename;
  1697. }
  1698. /* Write ring references */
  1699. err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
  1700. queue->tx_ring_ref);
  1701. if (err) {
  1702. message = "writing tx-ring-ref";
  1703. goto error;
  1704. }
  1705. err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
  1706. queue->rx_ring_ref);
  1707. if (err) {
  1708. message = "writing rx-ring-ref";
  1709. goto error;
  1710. }
  1711. /* Write event channels; taking into account both shared
  1712. * and split event channel scenarios.
  1713. */
  1714. if (queue->tx_evtchn == queue->rx_evtchn) {
  1715. /* Shared event channel */
  1716. err = xenbus_printf(*xbt, path,
  1717. "event-channel", "%u", queue->tx_evtchn);
  1718. if (err) {
  1719. message = "writing event-channel";
  1720. goto error;
  1721. }
  1722. } else {
  1723. /* Split event channels */
  1724. err = xenbus_printf(*xbt, path,
  1725. "event-channel-tx", "%u", queue->tx_evtchn);
  1726. if (err) {
  1727. message = "writing event-channel-tx";
  1728. goto error;
  1729. }
  1730. err = xenbus_printf(*xbt, path,
  1731. "event-channel-rx", "%u", queue->rx_evtchn);
  1732. if (err) {
  1733. message = "writing event-channel-rx";
  1734. goto error;
  1735. }
  1736. }
  1737. if (write_hierarchical)
  1738. kfree(path);
  1739. return 0;
  1740. error:
  1741. if (write_hierarchical)
  1742. kfree(path);
  1743. xenbus_dev_fatal(dev, err, "%s", message);
  1744. return err;
  1745. }
  1746. static int xennet_create_page_pool(struct netfront_queue *queue)
  1747. {
  1748. int err;
  1749. struct page_pool_params pp_params = {
  1750. .order = 0,
  1751. .flags = 0,
  1752. .pool_size = NET_RX_RING_SIZE,
  1753. .nid = NUMA_NO_NODE,
  1754. .dev = &queue->info->netdev->dev,
  1755. .offset = XDP_PACKET_HEADROOM,
  1756. .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
  1757. };
  1758. queue->page_pool = page_pool_create(&pp_params);
  1759. if (IS_ERR(queue->page_pool)) {
  1760. err = PTR_ERR(queue->page_pool);
  1761. queue->page_pool = NULL;
  1762. return err;
  1763. }
  1764. err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev,
  1765. queue->id, 0);
  1766. if (err) {
  1767. netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n");
  1768. goto err_free_pp;
  1769. }
  1770. err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq,
  1771. MEM_TYPE_PAGE_POOL, queue->page_pool);
  1772. if (err) {
  1773. netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n");
  1774. goto err_unregister_rxq;
  1775. }
  1776. return 0;
  1777. err_unregister_rxq:
  1778. xdp_rxq_info_unreg(&queue->xdp_rxq);
  1779. err_free_pp:
  1780. page_pool_destroy(queue->page_pool);
  1781. queue->page_pool = NULL;
  1782. return err;
  1783. }
  1784. static int xennet_create_queues(struct netfront_info *info,
  1785. unsigned int *num_queues)
  1786. {
  1787. unsigned int i;
  1788. int ret;
  1789. info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
  1790. GFP_KERNEL);
  1791. if (!info->queues)
  1792. return -ENOMEM;
  1793. for (i = 0; i < *num_queues; i++) {
  1794. struct netfront_queue *queue = &info->queues[i];
  1795. queue->id = i;
  1796. queue->info = info;
  1797. ret = xennet_init_queue(queue);
  1798. if (ret < 0) {
  1799. dev_warn(&info->xbdev->dev,
  1800. "only created %d queues\n", i);
  1801. *num_queues = i;
  1802. break;
  1803. }
  1804. /* use page pool recycling instead of buddy allocator */
  1805. ret = xennet_create_page_pool(queue);
  1806. if (ret < 0) {
  1807. dev_err(&info->xbdev->dev, "can't allocate page pool\n");
  1808. *num_queues = i;
  1809. return ret;
  1810. }
  1811. netif_napi_add(queue->info->netdev, &queue->napi, xennet_poll);
  1812. if (netif_running(info->netdev))
  1813. napi_enable(&queue->napi);
  1814. }
  1815. netif_set_real_num_tx_queues(info->netdev, *num_queues);
  1816. if (*num_queues == 0) {
  1817. dev_err(&info->xbdev->dev, "no queues\n");
  1818. return -EINVAL;
  1819. }
  1820. return 0;
  1821. }
  1822. /* Common code used when first setting up, and when resuming. */
  1823. static int talk_to_netback(struct xenbus_device *dev,
  1824. struct netfront_info *info)
  1825. {
  1826. const char *message;
  1827. struct xenbus_transaction xbt;
  1828. int err;
  1829. unsigned int feature_split_evtchn;
  1830. unsigned int i = 0;
  1831. unsigned int max_queues = 0;
  1832. struct netfront_queue *queue = NULL;
  1833. unsigned int num_queues = 1;
  1834. u8 addr[ETH_ALEN];
  1835. info->netdev->irq = 0;
  1836. /* Check if backend is trusted. */
  1837. info->bounce = !xennet_trusted ||
  1838. !xenbus_read_unsigned(dev->nodename, "trusted", 1);
  1839. /* Check if backend supports multiple queues */
  1840. max_queues = xenbus_read_unsigned(info->xbdev->otherend,
  1841. "multi-queue-max-queues", 1);
  1842. num_queues = min(max_queues, xennet_max_queues);
  1843. /* Check feature-split-event-channels */
  1844. feature_split_evtchn = xenbus_read_unsigned(info->xbdev->otherend,
  1845. "feature-split-event-channels", 0);
  1846. /* Read mac addr. */
  1847. err = xen_net_read_mac(dev, addr);
  1848. if (err) {
  1849. xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
  1850. goto out_unlocked;
  1851. }
  1852. eth_hw_addr_set(info->netdev, addr);
  1853. info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend,
  1854. "feature-xdp-headroom", 0);
  1855. if (info->netback_has_xdp_headroom) {
  1856. /* set the current xen-netfront xdp state */
  1857. err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ?
  1858. NETBACK_XDP_HEADROOM_ENABLE :
  1859. NETBACK_XDP_HEADROOM_DISABLE);
  1860. if (err)
  1861. goto out_unlocked;
  1862. }
  1863. rtnl_lock();
  1864. if (info->queues)
  1865. xennet_destroy_queues(info);
  1866. /* For the case of a reconnect reset the "broken" indicator. */
  1867. info->broken = false;
  1868. err = xennet_create_queues(info, &num_queues);
  1869. if (err < 0) {
  1870. xenbus_dev_fatal(dev, err, "creating queues");
  1871. kfree(info->queues);
  1872. info->queues = NULL;
  1873. goto out;
  1874. }
  1875. rtnl_unlock();
  1876. /* Create shared ring, alloc event channel -- for each queue */
  1877. for (i = 0; i < num_queues; ++i) {
  1878. queue = &info->queues[i];
  1879. err = setup_netfront(dev, queue, feature_split_evtchn);
  1880. if (err)
  1881. goto destroy_ring;
  1882. }
  1883. again:
  1884. err = xenbus_transaction_start(&xbt);
  1885. if (err) {
  1886. xenbus_dev_fatal(dev, err, "starting transaction");
  1887. goto destroy_ring;
  1888. }
  1889. if (xenbus_exists(XBT_NIL,
  1890. info->xbdev->otherend, "multi-queue-max-queues")) {
  1891. /* Write the number of queues */
  1892. err = xenbus_printf(xbt, dev->nodename,
  1893. "multi-queue-num-queues", "%u", num_queues);
  1894. if (err) {
  1895. message = "writing multi-queue-num-queues";
  1896. goto abort_transaction_no_dev_fatal;
  1897. }
  1898. }
  1899. if (num_queues == 1) {
  1900. err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
  1901. if (err)
  1902. goto abort_transaction_no_dev_fatal;
  1903. } else {
  1904. /* Write the keys for each queue */
  1905. for (i = 0; i < num_queues; ++i) {
  1906. queue = &info->queues[i];
  1907. err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
  1908. if (err)
  1909. goto abort_transaction_no_dev_fatal;
  1910. }
  1911. }
  1912. /* The remaining keys are not queue-specific */
  1913. err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
  1914. 1);
  1915. if (err) {
  1916. message = "writing request-rx-copy";
  1917. goto abort_transaction;
  1918. }
  1919. err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
  1920. if (err) {
  1921. message = "writing feature-rx-notify";
  1922. goto abort_transaction;
  1923. }
  1924. err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
  1925. if (err) {
  1926. message = "writing feature-sg";
  1927. goto abort_transaction;
  1928. }
  1929. err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
  1930. if (err) {
  1931. message = "writing feature-gso-tcpv4";
  1932. goto abort_transaction;
  1933. }
  1934. err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
  1935. if (err) {
  1936. message = "writing feature-gso-tcpv6";
  1937. goto abort_transaction;
  1938. }
  1939. err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
  1940. "1");
  1941. if (err) {
  1942. message = "writing feature-ipv6-csum-offload";
  1943. goto abort_transaction;
  1944. }
  1945. err = xenbus_transaction_end(xbt, 0);
  1946. if (err) {
  1947. if (err == -EAGAIN)
  1948. goto again;
  1949. xenbus_dev_fatal(dev, err, "completing transaction");
  1950. goto destroy_ring;
  1951. }
  1952. return 0;
  1953. abort_transaction:
  1954. xenbus_dev_fatal(dev, err, "%s", message);
  1955. abort_transaction_no_dev_fatal:
  1956. xenbus_transaction_end(xbt, 1);
  1957. destroy_ring:
  1958. xennet_disconnect_backend(info);
  1959. rtnl_lock();
  1960. xennet_destroy_queues(info);
  1961. out:
  1962. rtnl_unlock();
  1963. out_unlocked:
  1964. device_unregister(&dev->dev);
  1965. return err;
  1966. }
  1967. static int xennet_connect(struct net_device *dev)
  1968. {
  1969. struct netfront_info *np = netdev_priv(dev);
  1970. unsigned int num_queues = 0;
  1971. int err;
  1972. unsigned int j = 0;
  1973. struct netfront_queue *queue = NULL;
  1974. if (!xenbus_read_unsigned(np->xbdev->otherend, "feature-rx-copy", 0)) {
  1975. dev_info(&dev->dev,
  1976. "backend does not support copying receive path\n");
  1977. return -ENODEV;
  1978. }
  1979. err = talk_to_netback(np->xbdev, np);
  1980. if (err)
  1981. return err;
  1982. if (np->netback_has_xdp_headroom)
  1983. pr_info("backend supports XDP headroom\n");
  1984. if (np->bounce)
  1985. dev_info(&np->xbdev->dev,
  1986. "bouncing transmitted data to zeroed pages\n");
  1987. /* talk_to_netback() sets the correct number of queues */
  1988. num_queues = dev->real_num_tx_queues;
  1989. if (dev->reg_state == NETREG_UNINITIALIZED) {
  1990. err = register_netdev(dev);
  1991. if (err) {
  1992. pr_warn("%s: register_netdev err=%d\n", __func__, err);
  1993. device_unregister(&np->xbdev->dev);
  1994. return err;
  1995. }
  1996. }
  1997. rtnl_lock();
  1998. netdev_update_features(dev);
  1999. rtnl_unlock();
  2000. /*
  2001. * All public and private state should now be sane. Get
  2002. * ready to start sending and receiving packets and give the driver
  2003. * domain a kick because we've probably just requeued some
  2004. * packets.
  2005. */
  2006. netif_tx_lock_bh(np->netdev);
  2007. netif_device_attach(np->netdev);
  2008. netif_tx_unlock_bh(np->netdev);
  2009. netif_carrier_on(np->netdev);
  2010. for (j = 0; j < num_queues; ++j) {
  2011. queue = &np->queues[j];
  2012. notify_remote_via_irq(queue->tx_irq);
  2013. if (queue->tx_irq != queue->rx_irq)
  2014. notify_remote_via_irq(queue->rx_irq);
  2015. spin_lock_bh(&queue->rx_lock);
  2016. xennet_alloc_rx_buffers(queue);
  2017. spin_unlock_bh(&queue->rx_lock);
  2018. }
  2019. return 0;
  2020. }
  2021. /*
  2022. * Callback received when the backend's state changes.
  2023. */
  2024. static void netback_changed(struct xenbus_device *dev,
  2025. enum xenbus_state backend_state)
  2026. {
  2027. struct netfront_info *np = dev_get_drvdata(&dev->dev);
  2028. struct net_device *netdev = np->netdev;
  2029. dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
  2030. wake_up_all(&module_wq);
  2031. switch (backend_state) {
  2032. case XenbusStateInitialising:
  2033. case XenbusStateInitialised:
  2034. case XenbusStateReconfiguring:
  2035. case XenbusStateReconfigured:
  2036. case XenbusStateUnknown:
  2037. break;
  2038. case XenbusStateInitWait:
  2039. if (dev->state != XenbusStateInitialising)
  2040. break;
  2041. if (xennet_connect(netdev) != 0)
  2042. break;
  2043. xenbus_switch_state(dev, XenbusStateConnected);
  2044. break;
  2045. case XenbusStateConnected:
  2046. netdev_notify_peers(netdev);
  2047. break;
  2048. case XenbusStateClosed:
  2049. if (dev->state == XenbusStateClosed)
  2050. break;
  2051. fallthrough; /* Missed the backend's CLOSING state */
  2052. case XenbusStateClosing:
  2053. xenbus_frontend_closed(dev);
  2054. break;
  2055. }
  2056. }
  2057. static const struct xennet_stat {
  2058. char name[ETH_GSTRING_LEN];
  2059. u16 offset;
  2060. } xennet_stats[] = {
  2061. {
  2062. "rx_gso_checksum_fixup",
  2063. offsetof(struct netfront_info, rx_gso_checksum_fixup)
  2064. },
  2065. };
  2066. static int xennet_get_sset_count(struct net_device *dev, int string_set)
  2067. {
  2068. switch (string_set) {
  2069. case ETH_SS_STATS:
  2070. return ARRAY_SIZE(xennet_stats);
  2071. default:
  2072. return -EINVAL;
  2073. }
  2074. }
  2075. static void xennet_get_ethtool_stats(struct net_device *dev,
  2076. struct ethtool_stats *stats, u64 * data)
  2077. {
  2078. void *np = netdev_priv(dev);
  2079. int i;
  2080. for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
  2081. data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
  2082. }
  2083. static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
  2084. {
  2085. int i;
  2086. switch (stringset) {
  2087. case ETH_SS_STATS:
  2088. for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
  2089. memcpy(data + i * ETH_GSTRING_LEN,
  2090. xennet_stats[i].name, ETH_GSTRING_LEN);
  2091. break;
  2092. }
  2093. }
  2094. static const struct ethtool_ops xennet_ethtool_ops =
  2095. {
  2096. .get_link = ethtool_op_get_link,
  2097. .get_sset_count = xennet_get_sset_count,
  2098. .get_ethtool_stats = xennet_get_ethtool_stats,
  2099. .get_strings = xennet_get_strings,
  2100. .get_ts_info = ethtool_op_get_ts_info,
  2101. };
  2102. #ifdef CONFIG_SYSFS
  2103. static ssize_t show_rxbuf(struct device *dev,
  2104. struct device_attribute *attr, char *buf)
  2105. {
  2106. return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
  2107. }
  2108. static ssize_t store_rxbuf(struct device *dev,
  2109. struct device_attribute *attr,
  2110. const char *buf, size_t len)
  2111. {
  2112. char *endp;
  2113. if (!capable(CAP_NET_ADMIN))
  2114. return -EPERM;
  2115. simple_strtoul(buf, &endp, 0);
  2116. if (endp == buf)
  2117. return -EBADMSG;
  2118. /* rxbuf_min and rxbuf_max are no longer configurable. */
  2119. return len;
  2120. }
  2121. static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
  2122. static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
  2123. static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
  2124. static struct attribute *xennet_dev_attrs[] = {
  2125. &dev_attr_rxbuf_min.attr,
  2126. &dev_attr_rxbuf_max.attr,
  2127. &dev_attr_rxbuf_cur.attr,
  2128. NULL
  2129. };
  2130. static const struct attribute_group xennet_dev_group = {
  2131. .attrs = xennet_dev_attrs
  2132. };
  2133. #endif /* CONFIG_SYSFS */
  2134. static void xennet_bus_close(struct xenbus_device *dev)
  2135. {
  2136. int ret;
  2137. if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
  2138. return;
  2139. do {
  2140. xenbus_switch_state(dev, XenbusStateClosing);
  2141. ret = wait_event_timeout(module_wq,
  2142. xenbus_read_driver_state(dev->otherend) ==
  2143. XenbusStateClosing ||
  2144. xenbus_read_driver_state(dev->otherend) ==
  2145. XenbusStateClosed ||
  2146. xenbus_read_driver_state(dev->otherend) ==
  2147. XenbusStateUnknown,
  2148. XENNET_TIMEOUT);
  2149. } while (!ret);
  2150. if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
  2151. return;
  2152. do {
  2153. xenbus_switch_state(dev, XenbusStateClosed);
  2154. ret = wait_event_timeout(module_wq,
  2155. xenbus_read_driver_state(dev->otherend) ==
  2156. XenbusStateClosed ||
  2157. xenbus_read_driver_state(dev->otherend) ==
  2158. XenbusStateUnknown,
  2159. XENNET_TIMEOUT);
  2160. } while (!ret);
  2161. }
  2162. static int xennet_remove(struct xenbus_device *dev)
  2163. {
  2164. struct netfront_info *info = dev_get_drvdata(&dev->dev);
  2165. xennet_bus_close(dev);
  2166. xennet_disconnect_backend(info);
  2167. if (info->netdev->reg_state == NETREG_REGISTERED)
  2168. unregister_netdev(info->netdev);
  2169. if (info->queues) {
  2170. rtnl_lock();
  2171. xennet_destroy_queues(info);
  2172. rtnl_unlock();
  2173. }
  2174. xennet_free_netdev(info->netdev);
  2175. return 0;
  2176. }
  2177. static const struct xenbus_device_id netfront_ids[] = {
  2178. { "vif" },
  2179. { "" }
  2180. };
  2181. static struct xenbus_driver netfront_driver = {
  2182. .ids = netfront_ids,
  2183. .probe = netfront_probe,
  2184. .remove = xennet_remove,
  2185. .resume = netfront_resume,
  2186. .otherend_changed = netback_changed,
  2187. };
  2188. static int __init netif_init(void)
  2189. {
  2190. if (!xen_domain())
  2191. return -ENODEV;
  2192. if (!xen_has_pv_nic_devices())
  2193. return -ENODEV;
  2194. pr_info("Initialising Xen virtual ethernet driver\n");
  2195. /* Allow as many queues as there are CPUs inut max. 8 if user has not
  2196. * specified a value.
  2197. */
  2198. if (xennet_max_queues == 0)
  2199. xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
  2200. num_online_cpus());
  2201. return xenbus_register_frontend(&netfront_driver);
  2202. }
  2203. module_init(netif_init);
  2204. static void __exit netif_exit(void)
  2205. {
  2206. xenbus_unregister_driver(&netfront_driver);
  2207. }
  2208. module_exit(netif_exit);
  2209. MODULE_DESCRIPTION("Xen virtual network device frontend");
  2210. MODULE_LICENSE("GPL");
  2211. MODULE_ALIAS("xen:vif");
  2212. MODULE_ALIAS("xennet");