virtio_net.c 87 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* A network driver using virtio.
  3. *
  4. * Copyright 2007 Rusty Russell <[email protected]> IBM Corporation
  5. */
  6. //#define DEBUG
  7. #include <linux/netdevice.h>
  8. #include <linux/etherdevice.h>
  9. #include <linux/ethtool.h>
  10. #include <linux/module.h>
  11. #include <linux/virtio.h>
  12. #include <linux/virtio_net.h>
  13. #include <linux/bpf.h>
  14. #include <linux/bpf_trace.h>
  15. #include <linux/scatterlist.h>
  16. #include <linux/if_vlan.h>
  17. #include <linux/slab.h>
  18. #include <linux/cpu.h>
  19. #include <linux/average.h>
  20. #include <linux/filter.h>
  21. #include <linux/kernel.h>
  22. #include <net/route.h>
  23. #include <net/xdp.h>
  24. #include <net/net_failover.h>
  25. static int napi_weight = NAPI_POLL_WEIGHT;
  26. module_param(napi_weight, int, 0444);
  27. static bool csum = true, gso = true, napi_tx = true;
  28. module_param(csum, bool, 0444);
  29. module_param(gso, bool, 0444);
  30. module_param(napi_tx, bool, 0644);
  31. /* FIXME: MTU in config. */
  32. #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
  33. #define GOOD_COPY_LEN 128
  34. #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
  35. /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
  36. #define VIRTIO_XDP_HEADROOM 256
  37. /* Separating two types of XDP xmit */
  38. #define VIRTIO_XDP_TX BIT(0)
  39. #define VIRTIO_XDP_REDIR BIT(1)
  40. #define VIRTIO_XDP_FLAG BIT(0)
  41. /* RX packet size EWMA. The average packet size is used to determine the packet
  42. * buffer size when refilling RX rings. As the entire RX ring may be refilled
  43. * at once, the weight is chosen so that the EWMA will be insensitive to short-
  44. * term, transient changes in packet size.
  45. */
  46. DECLARE_EWMA(pkt_len, 0, 64)
  47. #define VIRTNET_DRIVER_VERSION "1.0.0"
  48. static const unsigned long guest_offloads[] = {
  49. VIRTIO_NET_F_GUEST_TSO4,
  50. VIRTIO_NET_F_GUEST_TSO6,
  51. VIRTIO_NET_F_GUEST_ECN,
  52. VIRTIO_NET_F_GUEST_UFO,
  53. VIRTIO_NET_F_GUEST_CSUM
  54. };
  55. #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
  56. (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
  57. (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
  58. (1ULL << VIRTIO_NET_F_GUEST_UFO))
  59. struct virtnet_stat_desc {
  60. char desc[ETH_GSTRING_LEN];
  61. size_t offset;
  62. };
  63. struct virtnet_sq_stats {
  64. struct u64_stats_sync syncp;
  65. u64 packets;
  66. u64 bytes;
  67. u64 xdp_tx;
  68. u64 xdp_tx_drops;
  69. u64 kicks;
  70. };
  71. struct virtnet_rq_stats {
  72. struct u64_stats_sync syncp;
  73. u64 packets;
  74. u64 bytes;
  75. u64 drops;
  76. u64 xdp_packets;
  77. u64 xdp_tx;
  78. u64 xdp_redirects;
  79. u64 xdp_drops;
  80. u64 kicks;
  81. };
  82. #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m)
  83. #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m)
  84. static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
  85. { "packets", VIRTNET_SQ_STAT(packets) },
  86. { "bytes", VIRTNET_SQ_STAT(bytes) },
  87. { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) },
  88. { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) },
  89. { "kicks", VIRTNET_SQ_STAT(kicks) },
  90. };
  91. static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
  92. { "packets", VIRTNET_RQ_STAT(packets) },
  93. { "bytes", VIRTNET_RQ_STAT(bytes) },
  94. { "drops", VIRTNET_RQ_STAT(drops) },
  95. { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) },
  96. { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) },
  97. { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) },
  98. { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) },
  99. { "kicks", VIRTNET_RQ_STAT(kicks) },
  100. };
  101. #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
  102. #define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
  103. /* Internal representation of a send virtqueue */
  104. struct send_queue {
  105. /* Virtqueue associated with this send _queue */
  106. struct virtqueue *vq;
  107. /* TX: fragments + linear part + virtio header */
  108. struct scatterlist sg[MAX_SKB_FRAGS + 2];
  109. /* Name of the send queue: output.$index */
  110. char name[40];
  111. struct virtnet_sq_stats stats;
  112. struct napi_struct napi;
  113. };
  114. /* Internal representation of a receive virtqueue */
  115. struct receive_queue {
  116. /* Virtqueue associated with this receive_queue */
  117. struct virtqueue *vq;
  118. struct napi_struct napi;
  119. struct bpf_prog __rcu *xdp_prog;
  120. struct virtnet_rq_stats stats;
  121. /* Chain pages by the private ptr. */
  122. struct page *pages;
  123. /* Average packet length for mergeable receive buffers. */
  124. struct ewma_pkt_len mrg_avg_pkt_len;
  125. /* Page frag for packet buffer allocation. */
  126. struct page_frag alloc_frag;
  127. /* RX: fragments + linear part + virtio header */
  128. struct scatterlist sg[MAX_SKB_FRAGS + 2];
  129. /* Min single buffer size for mergeable buffers case. */
  130. unsigned int min_buf_len;
  131. /* Name of this receive queue: input.$index */
  132. char name[40];
  133. struct xdp_rxq_info xdp_rxq;
  134. };
  135. /* Control VQ buffers: protected by the rtnl lock */
  136. struct control_buf {
  137. struct virtio_net_ctrl_hdr hdr;
  138. virtio_net_ctrl_ack status;
  139. struct virtio_net_ctrl_mq mq;
  140. u8 promisc;
  141. u8 allmulti;
  142. __virtio16 vid;
  143. __virtio64 offloads;
  144. };
  145. struct virtnet_info {
  146. struct virtio_device *vdev;
  147. struct virtqueue *cvq;
  148. struct net_device *dev;
  149. struct send_queue *sq;
  150. struct receive_queue *rq;
  151. unsigned int status;
  152. /* Max # of queue pairs supported by the device */
  153. u16 max_queue_pairs;
  154. /* # of queue pairs currently used by the driver */
  155. u16 curr_queue_pairs;
  156. /* # of XDP queue pairs currently used by the driver */
  157. u16 xdp_queue_pairs;
  158. /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
  159. bool xdp_enabled;
  160. /* I like... big packets and I cannot lie! */
  161. bool big_packets;
  162. /* Host will merge rx buffers for big packets (shake it! shake it!) */
  163. bool mergeable_rx_bufs;
  164. /* Has control virtqueue */
  165. bool has_cvq;
  166. /* Host can handle any s/g split between our header and packet data */
  167. bool any_header_sg;
  168. /* Packet virtio header size */
  169. u8 hdr_len;
  170. /* Work struct for delayed refilling if we run low on memory. */
  171. struct delayed_work refill;
  172. /* Is delayed refill enabled? */
  173. bool refill_enabled;
  174. /* The lock to synchronize the access to refill_enabled */
  175. spinlock_t refill_lock;
  176. /* Work struct for config space updates */
  177. struct work_struct config_work;
  178. /* Does the affinity hint is set for virtqueues? */
  179. bool affinity_hint_set;
  180. /* CPU hotplug instances for online & dead */
  181. struct hlist_node node;
  182. struct hlist_node node_dead;
  183. struct control_buf *ctrl;
  184. /* Ethtool settings */
  185. u8 duplex;
  186. u32 speed;
  187. unsigned long guest_offloads;
  188. unsigned long guest_offloads_capable;
  189. /* failover when STANDBY feature enabled */
  190. struct failover *failover;
  191. };
  192. struct padded_vnet_hdr {
  193. struct virtio_net_hdr_mrg_rxbuf hdr;
  194. /*
  195. * hdr is in a separate sg buffer, and data sg buffer shares same page
  196. * with this header sg. This padding makes next sg 16 byte aligned
  197. * after the header.
  198. */
  199. char padding[4];
  200. };
  201. static bool is_xdp_frame(void *ptr)
  202. {
  203. return (unsigned long)ptr & VIRTIO_XDP_FLAG;
  204. }
  205. static void *xdp_to_ptr(struct xdp_frame *ptr)
  206. {
  207. return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
  208. }
  209. static struct xdp_frame *ptr_to_xdp(void *ptr)
  210. {
  211. return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
  212. }
  213. /* Converting between virtqueue no. and kernel tx/rx queue no.
  214. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
  215. */
  216. static int vq2txq(struct virtqueue *vq)
  217. {
  218. return (vq->index - 1) / 2;
  219. }
  220. static int txq2vq(int txq)
  221. {
  222. return txq * 2 + 1;
  223. }
  224. static int vq2rxq(struct virtqueue *vq)
  225. {
  226. return vq->index / 2;
  227. }
  228. static int rxq2vq(int rxq)
  229. {
  230. return rxq * 2;
  231. }
  232. static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
  233. {
  234. return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
  235. }
  236. /*
  237. * private is used to chain pages for big packets, put the whole
  238. * most recent used list in the beginning for reuse
  239. */
  240. static void give_pages(struct receive_queue *rq, struct page *page)
  241. {
  242. struct page *end;
  243. /* Find end of list, sew whole thing into vi->rq.pages. */
  244. for (end = page; end->private; end = (struct page *)end->private);
  245. end->private = (unsigned long)rq->pages;
  246. rq->pages = page;
  247. }
  248. static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
  249. {
  250. struct page *p = rq->pages;
  251. if (p) {
  252. rq->pages = (struct page *)p->private;
  253. /* clear private here, it is used to chain pages */
  254. p->private = 0;
  255. } else
  256. p = alloc_page(gfp_mask);
  257. return p;
  258. }
  259. static void enable_delayed_refill(struct virtnet_info *vi)
  260. {
  261. spin_lock_bh(&vi->refill_lock);
  262. vi->refill_enabled = true;
  263. spin_unlock_bh(&vi->refill_lock);
  264. }
  265. static void disable_delayed_refill(struct virtnet_info *vi)
  266. {
  267. spin_lock_bh(&vi->refill_lock);
  268. vi->refill_enabled = false;
  269. spin_unlock_bh(&vi->refill_lock);
  270. }
  271. static void virtqueue_napi_schedule(struct napi_struct *napi,
  272. struct virtqueue *vq)
  273. {
  274. if (napi_schedule_prep(napi)) {
  275. virtqueue_disable_cb(vq);
  276. __napi_schedule(napi);
  277. }
  278. }
  279. static void virtqueue_napi_complete(struct napi_struct *napi,
  280. struct virtqueue *vq, int processed)
  281. {
  282. int opaque;
  283. opaque = virtqueue_enable_cb_prepare(vq);
  284. if (napi_complete_done(napi, processed)) {
  285. if (unlikely(virtqueue_poll(vq, opaque)))
  286. virtqueue_napi_schedule(napi, vq);
  287. } else {
  288. virtqueue_disable_cb(vq);
  289. }
  290. }
  291. static void skb_xmit_done(struct virtqueue *vq)
  292. {
  293. struct virtnet_info *vi = vq->vdev->priv;
  294. struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
  295. /* Suppress further interrupts. */
  296. virtqueue_disable_cb(vq);
  297. if (napi->weight)
  298. virtqueue_napi_schedule(napi, vq);
  299. else
  300. /* We were probably waiting for more output buffers. */
  301. netif_wake_subqueue(vi->dev, vq2txq(vq));
  302. }
  303. #define MRG_CTX_HEADER_SHIFT 22
  304. static void *mergeable_len_to_ctx(unsigned int truesize,
  305. unsigned int headroom)
  306. {
  307. return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
  308. }
  309. static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
  310. {
  311. return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
  312. }
  313. static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
  314. {
  315. return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
  316. }
  317. /* Called from bottom half context */
  318. static struct sk_buff *page_to_skb(struct virtnet_info *vi,
  319. struct receive_queue *rq,
  320. struct page *page, unsigned int offset,
  321. unsigned int len, unsigned int truesize,
  322. bool hdr_valid, unsigned int metasize)
  323. {
  324. struct sk_buff *skb;
  325. struct virtio_net_hdr_mrg_rxbuf *hdr;
  326. unsigned int copy, hdr_len, hdr_padded_len;
  327. char *p;
  328. p = page_address(page) + offset;
  329. /* copy small packet so we can reuse these pages for small data */
  330. skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
  331. if (unlikely(!skb))
  332. return NULL;
  333. hdr = skb_vnet_hdr(skb);
  334. hdr_len = vi->hdr_len;
  335. if (vi->mergeable_rx_bufs)
  336. hdr_padded_len = sizeof(*hdr);
  337. else
  338. hdr_padded_len = sizeof(struct padded_vnet_hdr);
  339. /* hdr_valid means no XDP, so we can copy the vnet header */
  340. if (hdr_valid)
  341. memcpy(hdr, p, hdr_len);
  342. len -= hdr_len;
  343. offset += hdr_padded_len;
  344. p += hdr_padded_len;
  345. /* Copy all frame if it fits skb->head, otherwise
  346. * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
  347. */
  348. if (len <= skb_tailroom(skb))
  349. copy = len;
  350. else
  351. copy = ETH_HLEN + metasize;
  352. skb_put_data(skb, p, copy);
  353. if (metasize) {
  354. __skb_pull(skb, metasize);
  355. skb_metadata_set(skb, metasize);
  356. }
  357. len -= copy;
  358. offset += copy;
  359. if (vi->mergeable_rx_bufs) {
  360. if (len)
  361. skb_add_rx_frag(skb, 0, page, offset, len, truesize);
  362. else
  363. put_page(page);
  364. return skb;
  365. }
  366. /*
  367. * Verify that we can indeed put this data into a skb.
  368. * This is here to handle cases when the device erroneously
  369. * tries to receive more than is possible. This is usually
  370. * the case of a broken device.
  371. */
  372. if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
  373. net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
  374. dev_kfree_skb(skb);
  375. return NULL;
  376. }
  377. BUG_ON(offset >= PAGE_SIZE);
  378. while (len) {
  379. unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
  380. skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
  381. frag_size, truesize);
  382. len -= frag_size;
  383. page = (struct page *)page->private;
  384. offset = 0;
  385. }
  386. if (page)
  387. give_pages(rq, page);
  388. return skb;
  389. }
  390. static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
  391. struct send_queue *sq,
  392. struct xdp_frame *xdpf)
  393. {
  394. struct virtio_net_hdr_mrg_rxbuf *hdr;
  395. int err;
  396. if (unlikely(xdpf->headroom < vi->hdr_len))
  397. return -EOVERFLOW;
  398. /* Make room for virtqueue hdr (also change xdpf->headroom?) */
  399. xdpf->data -= vi->hdr_len;
  400. /* Zero header and leave csum up to XDP layers */
  401. hdr = xdpf->data;
  402. memset(hdr, 0, vi->hdr_len);
  403. xdpf->len += vi->hdr_len;
  404. sg_init_one(sq->sg, xdpf->data, xdpf->len);
  405. err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
  406. GFP_ATOMIC);
  407. if (unlikely(err))
  408. return -ENOSPC; /* Caller handle free/refcnt */
  409. return 0;
  410. }
  411. /* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
  412. * the current cpu, so it does not need to be locked.
  413. *
  414. * Here we use marco instead of inline functions because we have to deal with
  415. * three issues at the same time: 1. the choice of sq. 2. judge and execute the
  416. * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
  417. * functions to perfectly solve these three problems at the same time.
  418. */
  419. #define virtnet_xdp_get_sq(vi) ({ \
  420. struct netdev_queue *txq; \
  421. typeof(vi) v = (vi); \
  422. unsigned int qp; \
  423. \
  424. if (v->curr_queue_pairs > nr_cpu_ids) { \
  425. qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
  426. qp += smp_processor_id(); \
  427. txq = netdev_get_tx_queue(v->dev, qp); \
  428. __netif_tx_acquire(txq); \
  429. } else { \
  430. qp = smp_processor_id() % v->curr_queue_pairs; \
  431. txq = netdev_get_tx_queue(v->dev, qp); \
  432. __netif_tx_lock(txq, raw_smp_processor_id()); \
  433. } \
  434. v->sq + qp; \
  435. })
  436. #define virtnet_xdp_put_sq(vi, q) { \
  437. struct netdev_queue *txq; \
  438. typeof(vi) v = (vi); \
  439. \
  440. txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
  441. if (v->curr_queue_pairs > nr_cpu_ids) \
  442. __netif_tx_release(txq); \
  443. else \
  444. __netif_tx_unlock(txq); \
  445. }
  446. static int virtnet_xdp_xmit(struct net_device *dev,
  447. int n, struct xdp_frame **frames, u32 flags)
  448. {
  449. struct virtnet_info *vi = netdev_priv(dev);
  450. struct receive_queue *rq = vi->rq;
  451. struct bpf_prog *xdp_prog;
  452. struct send_queue *sq;
  453. unsigned int len;
  454. int packets = 0;
  455. int bytes = 0;
  456. int drops = 0;
  457. int kicks = 0;
  458. int ret, err;
  459. void *ptr;
  460. int i;
  461. /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
  462. * indicate XDP resources have been successfully allocated.
  463. */
  464. xdp_prog = rcu_access_pointer(rq->xdp_prog);
  465. if (!xdp_prog)
  466. return -ENXIO;
  467. sq = virtnet_xdp_get_sq(vi);
  468. if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
  469. ret = -EINVAL;
  470. drops = n;
  471. goto out;
  472. }
  473. /* Free up any pending old buffers before queueing new ones. */
  474. while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
  475. if (likely(is_xdp_frame(ptr))) {
  476. struct xdp_frame *frame = ptr_to_xdp(ptr);
  477. bytes += frame->len;
  478. xdp_return_frame(frame);
  479. } else {
  480. struct sk_buff *skb = ptr;
  481. bytes += skb->len;
  482. napi_consume_skb(skb, false);
  483. }
  484. packets++;
  485. }
  486. for (i = 0; i < n; i++) {
  487. struct xdp_frame *xdpf = frames[i];
  488. err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
  489. if (err) {
  490. xdp_return_frame_rx_napi(xdpf);
  491. drops++;
  492. }
  493. }
  494. ret = n - drops;
  495. if (flags & XDP_XMIT_FLUSH) {
  496. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
  497. kicks = 1;
  498. }
  499. out:
  500. u64_stats_update_begin(&sq->stats.syncp);
  501. sq->stats.bytes += bytes;
  502. sq->stats.packets += packets;
  503. sq->stats.xdp_tx += n;
  504. sq->stats.xdp_tx_drops += drops;
  505. sq->stats.kicks += kicks;
  506. u64_stats_update_end(&sq->stats.syncp);
  507. virtnet_xdp_put_sq(vi, sq);
  508. return ret;
  509. }
  510. static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
  511. {
  512. return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
  513. }
  514. /* We copy the packet for XDP in the following cases:
  515. *
  516. * 1) Packet is scattered across multiple rx buffers.
  517. * 2) Headroom space is insufficient.
  518. *
  519. * This is inefficient but it's a temporary condition that
  520. * we hit right after XDP is enabled and until queue is refilled
  521. * with large buffers with sufficient headroom - so it should affect
  522. * at most queue size packets.
  523. * Afterwards, the conditions to enable
  524. * XDP should preclude the underlying device from sending packets
  525. * across multiple buffers (num_buf > 1), and we make sure buffers
  526. * have enough headroom.
  527. */
  528. static struct page *xdp_linearize_page(struct receive_queue *rq,
  529. u16 *num_buf,
  530. struct page *p,
  531. int offset,
  532. int page_off,
  533. unsigned int *len)
  534. {
  535. struct page *page = alloc_page(GFP_ATOMIC);
  536. if (!page)
  537. return NULL;
  538. memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
  539. page_off += *len;
  540. while (--*num_buf) {
  541. int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  542. unsigned int buflen;
  543. void *buf;
  544. int off;
  545. buf = virtqueue_get_buf(rq->vq, &buflen);
  546. if (unlikely(!buf))
  547. goto err_buf;
  548. p = virt_to_head_page(buf);
  549. off = buf - page_address(p);
  550. /* guard against a misconfigured or uncooperative backend that
  551. * is sending packet larger than the MTU.
  552. */
  553. if ((page_off + buflen + tailroom) > PAGE_SIZE) {
  554. put_page(p);
  555. goto err_buf;
  556. }
  557. memcpy(page_address(page) + page_off,
  558. page_address(p) + off, buflen);
  559. page_off += buflen;
  560. put_page(p);
  561. }
  562. /* Headroom does not contribute to packet length */
  563. *len = page_off - VIRTIO_XDP_HEADROOM;
  564. return page;
  565. err_buf:
  566. __free_pages(page, 0);
  567. return NULL;
  568. }
  569. static struct sk_buff *receive_small(struct net_device *dev,
  570. struct virtnet_info *vi,
  571. struct receive_queue *rq,
  572. void *buf, void *ctx,
  573. unsigned int len,
  574. unsigned int *xdp_xmit,
  575. struct virtnet_rq_stats *stats)
  576. {
  577. struct sk_buff *skb;
  578. struct bpf_prog *xdp_prog;
  579. unsigned int xdp_headroom = (unsigned long)ctx;
  580. unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
  581. unsigned int headroom = vi->hdr_len + header_offset;
  582. unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  583. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  584. struct page *page = virt_to_head_page(buf);
  585. unsigned int delta = 0;
  586. struct page *xdp_page;
  587. int err;
  588. unsigned int metasize = 0;
  589. len -= vi->hdr_len;
  590. stats->bytes += len;
  591. if (unlikely(len > GOOD_PACKET_LEN)) {
  592. pr_debug("%s: rx error: len %u exceeds max size %d\n",
  593. dev->name, len, GOOD_PACKET_LEN);
  594. dev->stats.rx_length_errors++;
  595. goto err_len;
  596. }
  597. rcu_read_lock();
  598. xdp_prog = rcu_dereference(rq->xdp_prog);
  599. if (xdp_prog) {
  600. struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
  601. struct xdp_frame *xdpf;
  602. struct xdp_buff xdp;
  603. void *orig_data;
  604. u32 act;
  605. if (unlikely(hdr->hdr.gso_type))
  606. goto err_xdp;
  607. if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
  608. int offset = buf - page_address(page) + header_offset;
  609. unsigned int tlen = len + vi->hdr_len;
  610. u16 num_buf = 1;
  611. xdp_headroom = virtnet_get_headroom(vi);
  612. header_offset = VIRTNET_RX_PAD + xdp_headroom;
  613. headroom = vi->hdr_len + header_offset;
  614. buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
  615. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  616. xdp_page = xdp_linearize_page(rq, &num_buf, page,
  617. offset, header_offset,
  618. &tlen);
  619. if (!xdp_page)
  620. goto err_xdp;
  621. buf = page_address(xdp_page);
  622. put_page(page);
  623. page = xdp_page;
  624. }
  625. xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
  626. xdp.data = xdp.data_hard_start + xdp_headroom;
  627. xdp.data_end = xdp.data + len;
  628. xdp.data_meta = xdp.data;
  629. xdp.rxq = &rq->xdp_rxq;
  630. xdp.frame_sz = buflen;
  631. orig_data = xdp.data;
  632. act = bpf_prog_run_xdp(xdp_prog, &xdp);
  633. stats->xdp_packets++;
  634. switch (act) {
  635. case XDP_PASS:
  636. /* Recalculate length in case bpf program changed it */
  637. delta = orig_data - xdp.data;
  638. len = xdp.data_end - xdp.data;
  639. metasize = xdp.data - xdp.data_meta;
  640. break;
  641. case XDP_TX:
  642. stats->xdp_tx++;
  643. xdpf = xdp_convert_buff_to_frame(&xdp);
  644. if (unlikely(!xdpf))
  645. goto err_xdp;
  646. err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
  647. if (unlikely(err < 0)) {
  648. trace_xdp_exception(vi->dev, xdp_prog, act);
  649. goto err_xdp;
  650. }
  651. *xdp_xmit |= VIRTIO_XDP_TX;
  652. rcu_read_unlock();
  653. goto xdp_xmit;
  654. case XDP_REDIRECT:
  655. stats->xdp_redirects++;
  656. err = xdp_do_redirect(dev, &xdp, xdp_prog);
  657. if (err)
  658. goto err_xdp;
  659. *xdp_xmit |= VIRTIO_XDP_REDIR;
  660. rcu_read_unlock();
  661. goto xdp_xmit;
  662. default:
  663. bpf_warn_invalid_xdp_action(act);
  664. fallthrough;
  665. case XDP_ABORTED:
  666. trace_xdp_exception(vi->dev, xdp_prog, act);
  667. case XDP_DROP:
  668. goto err_xdp;
  669. }
  670. }
  671. rcu_read_unlock();
  672. skb = build_skb(buf, buflen);
  673. if (!skb) {
  674. put_page(page);
  675. goto err;
  676. }
  677. skb_reserve(skb, headroom - delta);
  678. skb_put(skb, len);
  679. if (!xdp_prog) {
  680. buf += header_offset;
  681. memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
  682. } /* keep zeroed vnet hdr since XDP is loaded */
  683. if (metasize)
  684. skb_metadata_set(skb, metasize);
  685. err:
  686. return skb;
  687. err_xdp:
  688. rcu_read_unlock();
  689. stats->xdp_drops++;
  690. err_len:
  691. stats->drops++;
  692. put_page(page);
  693. xdp_xmit:
  694. return NULL;
  695. }
  696. static struct sk_buff *receive_big(struct net_device *dev,
  697. struct virtnet_info *vi,
  698. struct receive_queue *rq,
  699. void *buf,
  700. unsigned int len,
  701. struct virtnet_rq_stats *stats)
  702. {
  703. struct page *page = buf;
  704. struct sk_buff *skb =
  705. page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
  706. stats->bytes += len - vi->hdr_len;
  707. if (unlikely(!skb))
  708. goto err;
  709. return skb;
  710. err:
  711. stats->drops++;
  712. give_pages(rq, page);
  713. return NULL;
  714. }
  715. static struct sk_buff *receive_mergeable(struct net_device *dev,
  716. struct virtnet_info *vi,
  717. struct receive_queue *rq,
  718. void *buf,
  719. void *ctx,
  720. unsigned int len,
  721. unsigned int *xdp_xmit,
  722. struct virtnet_rq_stats *stats)
  723. {
  724. struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
  725. u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
  726. struct page *page = virt_to_head_page(buf);
  727. int offset = buf - page_address(page);
  728. struct sk_buff *head_skb, *curr_skb;
  729. struct bpf_prog *xdp_prog;
  730. unsigned int truesize = mergeable_ctx_to_truesize(ctx);
  731. unsigned int headroom = mergeable_ctx_to_headroom(ctx);
  732. unsigned int metasize = 0;
  733. unsigned int frame_sz;
  734. int err;
  735. head_skb = NULL;
  736. stats->bytes += len - vi->hdr_len;
  737. if (unlikely(len > truesize)) {
  738. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  739. dev->name, len, (unsigned long)ctx);
  740. dev->stats.rx_length_errors++;
  741. goto err_skb;
  742. }
  743. rcu_read_lock();
  744. xdp_prog = rcu_dereference(rq->xdp_prog);
  745. if (xdp_prog) {
  746. struct xdp_frame *xdpf;
  747. struct page *xdp_page;
  748. struct xdp_buff xdp;
  749. void *data;
  750. u32 act;
  751. /* Transient failure which in theory could occur if
  752. * in-flight packets from before XDP was enabled reach
  753. * the receive path after XDP is loaded.
  754. */
  755. if (unlikely(hdr->hdr.gso_type))
  756. goto err_xdp;
  757. /* Buffers with headroom use PAGE_SIZE as alloc size,
  758. * see add_recvbuf_mergeable() + get_mergeable_buf_len()
  759. */
  760. frame_sz = headroom ? PAGE_SIZE : truesize;
  761. /* This happens when rx buffer size is underestimated
  762. * or headroom is not enough because of the buffer
  763. * was refilled before XDP is set. This should only
  764. * happen for the first several packets, so we don't
  765. * care much about its performance.
  766. */
  767. if (unlikely(num_buf > 1 ||
  768. headroom < virtnet_get_headroom(vi))) {
  769. /* linearize data for XDP */
  770. xdp_page = xdp_linearize_page(rq, &num_buf,
  771. page, offset,
  772. VIRTIO_XDP_HEADROOM,
  773. &len);
  774. frame_sz = PAGE_SIZE;
  775. if (!xdp_page)
  776. goto err_xdp;
  777. offset = VIRTIO_XDP_HEADROOM;
  778. } else {
  779. xdp_page = page;
  780. }
  781. /* Allow consuming headroom but reserve enough space to push
  782. * the descriptor on if we get an XDP_TX return code.
  783. */
  784. data = page_address(xdp_page) + offset;
  785. xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
  786. xdp.data = data + vi->hdr_len;
  787. xdp.data_end = xdp.data + (len - vi->hdr_len);
  788. xdp.data_meta = xdp.data;
  789. xdp.rxq = &rq->xdp_rxq;
  790. xdp.frame_sz = frame_sz - vi->hdr_len;
  791. act = bpf_prog_run_xdp(xdp_prog, &xdp);
  792. stats->xdp_packets++;
  793. switch (act) {
  794. case XDP_PASS:
  795. metasize = xdp.data - xdp.data_meta;
  796. /* recalculate offset to account for any header
  797. * adjustments and minus the metasize to copy the
  798. * metadata in page_to_skb(). Note other cases do not
  799. * build an skb and avoid using offset
  800. */
  801. offset = xdp.data - page_address(xdp_page) -
  802. vi->hdr_len - metasize;
  803. /* recalculate len if xdp.data, xdp.data_end or
  804. * xdp.data_meta were adjusted
  805. */
  806. len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
  807. /* We can only create skb based on xdp_page. */
  808. if (unlikely(xdp_page != page)) {
  809. rcu_read_unlock();
  810. put_page(page);
  811. head_skb = page_to_skb(vi, rq, xdp_page, offset,
  812. len, PAGE_SIZE, false,
  813. metasize);
  814. return head_skb;
  815. }
  816. break;
  817. case XDP_TX:
  818. stats->xdp_tx++;
  819. xdpf = xdp_convert_buff_to_frame(&xdp);
  820. if (unlikely(!xdpf)) {
  821. if (unlikely(xdp_page != page))
  822. put_page(xdp_page);
  823. goto err_xdp;
  824. }
  825. err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
  826. if (unlikely(err < 0)) {
  827. trace_xdp_exception(vi->dev, xdp_prog, act);
  828. if (unlikely(xdp_page != page))
  829. put_page(xdp_page);
  830. goto err_xdp;
  831. }
  832. *xdp_xmit |= VIRTIO_XDP_TX;
  833. if (unlikely(xdp_page != page))
  834. put_page(page);
  835. rcu_read_unlock();
  836. goto xdp_xmit;
  837. case XDP_REDIRECT:
  838. stats->xdp_redirects++;
  839. err = xdp_do_redirect(dev, &xdp, xdp_prog);
  840. if (err) {
  841. if (unlikely(xdp_page != page))
  842. put_page(xdp_page);
  843. goto err_xdp;
  844. }
  845. *xdp_xmit |= VIRTIO_XDP_REDIR;
  846. if (unlikely(xdp_page != page))
  847. put_page(page);
  848. rcu_read_unlock();
  849. goto xdp_xmit;
  850. default:
  851. bpf_warn_invalid_xdp_action(act);
  852. fallthrough;
  853. case XDP_ABORTED:
  854. trace_xdp_exception(vi->dev, xdp_prog, act);
  855. fallthrough;
  856. case XDP_DROP:
  857. if (unlikely(xdp_page != page))
  858. __free_pages(xdp_page, 0);
  859. goto err_xdp;
  860. }
  861. }
  862. rcu_read_unlock();
  863. head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
  864. metasize);
  865. curr_skb = head_skb;
  866. if (unlikely(!curr_skb))
  867. goto err_skb;
  868. while (--num_buf) {
  869. int num_skb_frags;
  870. buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
  871. if (unlikely(!buf)) {
  872. pr_debug("%s: rx error: %d buffers out of %d missing\n",
  873. dev->name, num_buf,
  874. virtio16_to_cpu(vi->vdev,
  875. hdr->num_buffers));
  876. dev->stats.rx_length_errors++;
  877. goto err_buf;
  878. }
  879. stats->bytes += len;
  880. page = virt_to_head_page(buf);
  881. truesize = mergeable_ctx_to_truesize(ctx);
  882. if (unlikely(len > truesize)) {
  883. pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
  884. dev->name, len, (unsigned long)ctx);
  885. dev->stats.rx_length_errors++;
  886. goto err_skb;
  887. }
  888. num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
  889. if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
  890. struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
  891. if (unlikely(!nskb))
  892. goto err_skb;
  893. if (curr_skb == head_skb)
  894. skb_shinfo(curr_skb)->frag_list = nskb;
  895. else
  896. curr_skb->next = nskb;
  897. curr_skb = nskb;
  898. head_skb->truesize += nskb->truesize;
  899. num_skb_frags = 0;
  900. }
  901. if (curr_skb != head_skb) {
  902. head_skb->data_len += len;
  903. head_skb->len += len;
  904. head_skb->truesize += truesize;
  905. }
  906. offset = buf - page_address(page);
  907. if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
  908. put_page(page);
  909. skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
  910. len, truesize);
  911. } else {
  912. skb_add_rx_frag(curr_skb, num_skb_frags, page,
  913. offset, len, truesize);
  914. }
  915. }
  916. ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
  917. return head_skb;
  918. err_xdp:
  919. rcu_read_unlock();
  920. stats->xdp_drops++;
  921. err_skb:
  922. put_page(page);
  923. while (num_buf-- > 1) {
  924. buf = virtqueue_get_buf(rq->vq, &len);
  925. if (unlikely(!buf)) {
  926. pr_debug("%s: rx error: %d buffers missing\n",
  927. dev->name, num_buf);
  928. dev->stats.rx_length_errors++;
  929. break;
  930. }
  931. stats->bytes += len;
  932. page = virt_to_head_page(buf);
  933. put_page(page);
  934. }
  935. err_buf:
  936. stats->drops++;
  937. dev_kfree_skb(head_skb);
  938. xdp_xmit:
  939. return NULL;
  940. }
  941. static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
  942. void *buf, unsigned int len, void **ctx,
  943. unsigned int *xdp_xmit,
  944. struct virtnet_rq_stats *stats)
  945. {
  946. struct net_device *dev = vi->dev;
  947. struct sk_buff *skb;
  948. struct virtio_net_hdr_mrg_rxbuf *hdr;
  949. if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
  950. pr_debug("%s: short packet %i\n", dev->name, len);
  951. dev->stats.rx_length_errors++;
  952. if (vi->mergeable_rx_bufs) {
  953. put_page(virt_to_head_page(buf));
  954. } else if (vi->big_packets) {
  955. give_pages(rq, buf);
  956. } else {
  957. put_page(virt_to_head_page(buf));
  958. }
  959. return;
  960. }
  961. if (vi->mergeable_rx_bufs)
  962. skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
  963. stats);
  964. else if (vi->big_packets)
  965. skb = receive_big(dev, vi, rq, buf, len, stats);
  966. else
  967. skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
  968. if (unlikely(!skb))
  969. return;
  970. hdr = skb_vnet_hdr(skb);
  971. if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
  972. skb->ip_summed = CHECKSUM_UNNECESSARY;
  973. if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
  974. virtio_is_little_endian(vi->vdev))) {
  975. net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
  976. dev->name, hdr->hdr.gso_type,
  977. hdr->hdr.gso_size);
  978. goto frame_err;
  979. }
  980. skb_record_rx_queue(skb, vq2rxq(rq->vq));
  981. skb->protocol = eth_type_trans(skb, dev);
  982. pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
  983. ntohs(skb->protocol), skb->len, skb->pkt_type);
  984. napi_gro_receive(&rq->napi, skb);
  985. return;
  986. frame_err:
  987. dev->stats.rx_frame_errors++;
  988. dev_kfree_skb(skb);
  989. }
  990. /* Unlike mergeable buffers, all buffers are allocated to the
  991. * same size, except for the headroom. For this reason we do
  992. * not need to use mergeable_len_to_ctx here - it is enough
  993. * to store the headroom as the context ignoring the truesize.
  994. */
  995. static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
  996. gfp_t gfp)
  997. {
  998. struct page_frag *alloc_frag = &rq->alloc_frag;
  999. char *buf;
  1000. unsigned int xdp_headroom = virtnet_get_headroom(vi);
  1001. void *ctx = (void *)(unsigned long)xdp_headroom;
  1002. int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
  1003. int err;
  1004. len = SKB_DATA_ALIGN(len) +
  1005. SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1006. if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
  1007. return -ENOMEM;
  1008. buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
  1009. get_page(alloc_frag->page);
  1010. alloc_frag->offset += len;
  1011. sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
  1012. vi->hdr_len + GOOD_PACKET_LEN);
  1013. err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
  1014. if (err < 0)
  1015. put_page(virt_to_head_page(buf));
  1016. return err;
  1017. }
  1018. static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
  1019. gfp_t gfp)
  1020. {
  1021. struct page *first, *list = NULL;
  1022. char *p;
  1023. int i, err, offset;
  1024. sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
  1025. /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
  1026. for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
  1027. first = get_a_page(rq, gfp);
  1028. if (!first) {
  1029. if (list)
  1030. give_pages(rq, list);
  1031. return -ENOMEM;
  1032. }
  1033. sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
  1034. /* chain new page in list head to match sg */
  1035. first->private = (unsigned long)list;
  1036. list = first;
  1037. }
  1038. first = get_a_page(rq, gfp);
  1039. if (!first) {
  1040. give_pages(rq, list);
  1041. return -ENOMEM;
  1042. }
  1043. p = page_address(first);
  1044. /* rq->sg[0], rq->sg[1] share the same page */
  1045. /* a separated rq->sg[0] for header - required in case !any_header_sg */
  1046. sg_set_buf(&rq->sg[0], p, vi->hdr_len);
  1047. /* rq->sg[1] for data packet, from offset */
  1048. offset = sizeof(struct padded_vnet_hdr);
  1049. sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
  1050. /* chain first in list head */
  1051. first->private = (unsigned long)list;
  1052. err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
  1053. first, gfp);
  1054. if (err < 0)
  1055. give_pages(rq, first);
  1056. return err;
  1057. }
  1058. static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
  1059. struct ewma_pkt_len *avg_pkt_len,
  1060. unsigned int room)
  1061. {
  1062. const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  1063. unsigned int len;
  1064. if (room)
  1065. return PAGE_SIZE - room;
  1066. len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
  1067. rq->min_buf_len, PAGE_SIZE - hdr_len);
  1068. return ALIGN(len, L1_CACHE_BYTES);
  1069. }
  1070. static int add_recvbuf_mergeable(struct virtnet_info *vi,
  1071. struct receive_queue *rq, gfp_t gfp)
  1072. {
  1073. struct page_frag *alloc_frag = &rq->alloc_frag;
  1074. unsigned int headroom = virtnet_get_headroom(vi);
  1075. unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  1076. unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
  1077. char *buf;
  1078. void *ctx;
  1079. int err;
  1080. unsigned int len, hole;
  1081. /* Extra tailroom is needed to satisfy XDP's assumption. This
  1082. * means rx frags coalescing won't work, but consider we've
  1083. * disabled GSO for XDP, it won't be a big issue.
  1084. */
  1085. len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
  1086. if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
  1087. return -ENOMEM;
  1088. buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
  1089. buf += headroom; /* advance address leaving hole at front of pkt */
  1090. get_page(alloc_frag->page);
  1091. alloc_frag->offset += len + room;
  1092. hole = alloc_frag->size - alloc_frag->offset;
  1093. if (hole < len + room) {
  1094. /* To avoid internal fragmentation, if there is very likely not
  1095. * enough space for another buffer, add the remaining space to
  1096. * the current buffer.
  1097. */
  1098. len += hole;
  1099. alloc_frag->offset += hole;
  1100. }
  1101. sg_init_one(rq->sg, buf, len);
  1102. ctx = mergeable_len_to_ctx(len, headroom);
  1103. err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
  1104. if (err < 0)
  1105. put_page(virt_to_head_page(buf));
  1106. return err;
  1107. }
  1108. /*
  1109. * Returns false if we couldn't fill entirely (OOM).
  1110. *
  1111. * Normally run in the receive path, but can also be run from ndo_open
  1112. * before we're receiving packets, or from refill_work which is
  1113. * careful to disable receiving (using napi_disable).
  1114. */
  1115. static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
  1116. gfp_t gfp)
  1117. {
  1118. int err;
  1119. bool oom;
  1120. do {
  1121. if (vi->mergeable_rx_bufs)
  1122. err = add_recvbuf_mergeable(vi, rq, gfp);
  1123. else if (vi->big_packets)
  1124. err = add_recvbuf_big(vi, rq, gfp);
  1125. else
  1126. err = add_recvbuf_small(vi, rq, gfp);
  1127. oom = err == -ENOMEM;
  1128. if (err)
  1129. break;
  1130. } while (rq->vq->num_free);
  1131. if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
  1132. unsigned long flags;
  1133. flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
  1134. rq->stats.kicks++;
  1135. u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
  1136. }
  1137. return !oom;
  1138. }
  1139. static void skb_recv_done(struct virtqueue *rvq)
  1140. {
  1141. struct virtnet_info *vi = rvq->vdev->priv;
  1142. struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
  1143. virtqueue_napi_schedule(&rq->napi, rvq);
  1144. }
  1145. static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
  1146. {
  1147. napi_enable(napi);
  1148. /* If all buffers were filled by other side before we napi_enabled, we
  1149. * won't get another interrupt, so process any outstanding packets now.
  1150. * Call local_bh_enable after to trigger softIRQ processing.
  1151. */
  1152. local_bh_disable();
  1153. virtqueue_napi_schedule(napi, vq);
  1154. local_bh_enable();
  1155. }
  1156. static void virtnet_napi_tx_enable(struct virtnet_info *vi,
  1157. struct virtqueue *vq,
  1158. struct napi_struct *napi)
  1159. {
  1160. if (!napi->weight)
  1161. return;
  1162. /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
  1163. * enable the feature if this is likely affine with the transmit path.
  1164. */
  1165. if (!vi->affinity_hint_set) {
  1166. napi->weight = 0;
  1167. return;
  1168. }
  1169. return virtnet_napi_enable(vq, napi);
  1170. }
  1171. static void virtnet_napi_tx_disable(struct napi_struct *napi)
  1172. {
  1173. if (napi->weight)
  1174. napi_disable(napi);
  1175. }
  1176. static void refill_work(struct work_struct *work)
  1177. {
  1178. struct virtnet_info *vi =
  1179. container_of(work, struct virtnet_info, refill.work);
  1180. bool still_empty;
  1181. int i;
  1182. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1183. struct receive_queue *rq = &vi->rq[i];
  1184. napi_disable(&rq->napi);
  1185. still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
  1186. virtnet_napi_enable(rq->vq, &rq->napi);
  1187. /* In theory, this can happen: if we don't get any buffers in
  1188. * we will *never* try to fill again.
  1189. */
  1190. if (still_empty)
  1191. schedule_delayed_work(&vi->refill, HZ/2);
  1192. }
  1193. }
  1194. static int virtnet_receive(struct receive_queue *rq, int budget,
  1195. unsigned int *xdp_xmit)
  1196. {
  1197. struct virtnet_info *vi = rq->vq->vdev->priv;
  1198. struct virtnet_rq_stats stats = {};
  1199. unsigned int len;
  1200. void *buf;
  1201. int i;
  1202. if (!vi->big_packets || vi->mergeable_rx_bufs) {
  1203. void *ctx;
  1204. while (stats.packets < budget &&
  1205. (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
  1206. receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
  1207. stats.packets++;
  1208. }
  1209. } else {
  1210. while (stats.packets < budget &&
  1211. (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
  1212. receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
  1213. stats.packets++;
  1214. }
  1215. }
  1216. if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
  1217. if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
  1218. spin_lock(&vi->refill_lock);
  1219. if (vi->refill_enabled)
  1220. schedule_delayed_work(&vi->refill, 0);
  1221. spin_unlock(&vi->refill_lock);
  1222. }
  1223. }
  1224. u64_stats_update_begin(&rq->stats.syncp);
  1225. for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
  1226. size_t offset = virtnet_rq_stats_desc[i].offset;
  1227. u64 *item;
  1228. item = (u64 *)((u8 *)&rq->stats + offset);
  1229. *item += *(u64 *)((u8 *)&stats + offset);
  1230. }
  1231. u64_stats_update_end(&rq->stats.syncp);
  1232. return stats.packets;
  1233. }
  1234. static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
  1235. {
  1236. unsigned int len;
  1237. unsigned int packets = 0;
  1238. unsigned int bytes = 0;
  1239. void *ptr;
  1240. while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
  1241. if (likely(!is_xdp_frame(ptr))) {
  1242. struct sk_buff *skb = ptr;
  1243. pr_debug("Sent skb %p\n", skb);
  1244. bytes += skb->len;
  1245. napi_consume_skb(skb, in_napi);
  1246. } else {
  1247. struct xdp_frame *frame = ptr_to_xdp(ptr);
  1248. bytes += frame->len;
  1249. xdp_return_frame(frame);
  1250. }
  1251. packets++;
  1252. }
  1253. /* Avoid overhead when no packets have been processed
  1254. * happens when called speculatively from start_xmit.
  1255. */
  1256. if (!packets)
  1257. return;
  1258. u64_stats_update_begin(&sq->stats.syncp);
  1259. sq->stats.bytes += bytes;
  1260. sq->stats.packets += packets;
  1261. u64_stats_update_end(&sq->stats.syncp);
  1262. }
  1263. static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
  1264. {
  1265. if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
  1266. return false;
  1267. else if (q < vi->curr_queue_pairs)
  1268. return true;
  1269. else
  1270. return false;
  1271. }
  1272. static void virtnet_poll_cleantx(struct receive_queue *rq)
  1273. {
  1274. struct virtnet_info *vi = rq->vq->vdev->priv;
  1275. unsigned int index = vq2rxq(rq->vq);
  1276. struct send_queue *sq = &vi->sq[index];
  1277. struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
  1278. if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
  1279. return;
  1280. if (__netif_tx_trylock(txq)) {
  1281. free_old_xmit_skbs(sq, true);
  1282. __netif_tx_unlock(txq);
  1283. }
  1284. if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
  1285. netif_tx_wake_queue(txq);
  1286. }
  1287. static int virtnet_poll(struct napi_struct *napi, int budget)
  1288. {
  1289. struct receive_queue *rq =
  1290. container_of(napi, struct receive_queue, napi);
  1291. struct virtnet_info *vi = rq->vq->vdev->priv;
  1292. struct send_queue *sq;
  1293. unsigned int received;
  1294. unsigned int xdp_xmit = 0;
  1295. virtnet_poll_cleantx(rq);
  1296. received = virtnet_receive(rq, budget, &xdp_xmit);
  1297. /* Out of packets? */
  1298. if (received < budget)
  1299. virtqueue_napi_complete(napi, rq->vq, received);
  1300. if (xdp_xmit & VIRTIO_XDP_REDIR)
  1301. xdp_do_flush();
  1302. if (xdp_xmit & VIRTIO_XDP_TX) {
  1303. sq = virtnet_xdp_get_sq(vi);
  1304. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
  1305. u64_stats_update_begin(&sq->stats.syncp);
  1306. sq->stats.kicks++;
  1307. u64_stats_update_end(&sq->stats.syncp);
  1308. }
  1309. virtnet_xdp_put_sq(vi, sq);
  1310. }
  1311. return received;
  1312. }
  1313. static int virtnet_open(struct net_device *dev)
  1314. {
  1315. struct virtnet_info *vi = netdev_priv(dev);
  1316. int i, err;
  1317. enable_delayed_refill(vi);
  1318. for (i = 0; i < vi->max_queue_pairs; i++) {
  1319. if (i < vi->curr_queue_pairs)
  1320. /* Make sure we have some buffers: if oom use wq. */
  1321. if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
  1322. schedule_delayed_work(&vi->refill, 0);
  1323. err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
  1324. if (err < 0)
  1325. return err;
  1326. err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
  1327. MEM_TYPE_PAGE_SHARED, NULL);
  1328. if (err < 0) {
  1329. xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
  1330. return err;
  1331. }
  1332. virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
  1333. virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
  1334. }
  1335. return 0;
  1336. }
  1337. static int virtnet_poll_tx(struct napi_struct *napi, int budget)
  1338. {
  1339. struct send_queue *sq = container_of(napi, struct send_queue, napi);
  1340. struct virtnet_info *vi = sq->vq->vdev->priv;
  1341. unsigned int index = vq2txq(sq->vq);
  1342. struct netdev_queue *txq;
  1343. int opaque;
  1344. bool done;
  1345. if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
  1346. /* We don't need to enable cb for XDP */
  1347. napi_complete_done(napi, 0);
  1348. return 0;
  1349. }
  1350. txq = netdev_get_tx_queue(vi->dev, index);
  1351. __netif_tx_lock(txq, raw_smp_processor_id());
  1352. virtqueue_disable_cb(sq->vq);
  1353. free_old_xmit_skbs(sq, true);
  1354. opaque = virtqueue_enable_cb_prepare(sq->vq);
  1355. done = napi_complete_done(napi, 0);
  1356. if (!done)
  1357. virtqueue_disable_cb(sq->vq);
  1358. __netif_tx_unlock(txq);
  1359. if (done) {
  1360. if (unlikely(virtqueue_poll(sq->vq, opaque))) {
  1361. if (napi_schedule_prep(napi)) {
  1362. __netif_tx_lock(txq, raw_smp_processor_id());
  1363. virtqueue_disable_cb(sq->vq);
  1364. __netif_tx_unlock(txq);
  1365. __napi_schedule(napi);
  1366. }
  1367. }
  1368. }
  1369. if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
  1370. netif_tx_wake_queue(txq);
  1371. return 0;
  1372. }
  1373. static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
  1374. {
  1375. struct virtio_net_hdr_mrg_rxbuf *hdr;
  1376. const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
  1377. struct virtnet_info *vi = sq->vq->vdev->priv;
  1378. int num_sg;
  1379. unsigned hdr_len = vi->hdr_len;
  1380. bool can_push;
  1381. pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
  1382. can_push = vi->any_header_sg &&
  1383. !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
  1384. !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
  1385. /* Even if we can, don't push here yet as this would skew
  1386. * csum_start offset below. */
  1387. if (can_push)
  1388. hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
  1389. else
  1390. hdr = skb_vnet_hdr(skb);
  1391. if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
  1392. virtio_is_little_endian(vi->vdev), false,
  1393. 0))
  1394. return -EPROTO;
  1395. if (vi->mergeable_rx_bufs)
  1396. hdr->num_buffers = 0;
  1397. sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
  1398. if (can_push) {
  1399. __skb_push(skb, hdr_len);
  1400. num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
  1401. if (unlikely(num_sg < 0))
  1402. return num_sg;
  1403. /* Pull header back to avoid skew in tx bytes calculations. */
  1404. __skb_pull(skb, hdr_len);
  1405. } else {
  1406. sg_set_buf(sq->sg, hdr, hdr_len);
  1407. num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
  1408. if (unlikely(num_sg < 0))
  1409. return num_sg;
  1410. num_sg++;
  1411. }
  1412. return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
  1413. }
  1414. static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
  1415. {
  1416. struct virtnet_info *vi = netdev_priv(dev);
  1417. int qnum = skb_get_queue_mapping(skb);
  1418. struct send_queue *sq = &vi->sq[qnum];
  1419. int err;
  1420. struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
  1421. bool kick = !netdev_xmit_more();
  1422. bool use_napi = sq->napi.weight;
  1423. /* Free up any pending old buffers before queueing new ones. */
  1424. free_old_xmit_skbs(sq, false);
  1425. if (use_napi && kick)
  1426. virtqueue_enable_cb_delayed(sq->vq);
  1427. /* timestamp packet in software */
  1428. skb_tx_timestamp(skb);
  1429. /* Try to transmit */
  1430. err = xmit_skb(sq, skb);
  1431. /* This should not happen! */
  1432. if (unlikely(err)) {
  1433. dev->stats.tx_fifo_errors++;
  1434. if (net_ratelimit())
  1435. dev_warn(&dev->dev,
  1436. "Unexpected TXQ (%d) queue failure: %d\n",
  1437. qnum, err);
  1438. dev->stats.tx_dropped++;
  1439. dev_kfree_skb_any(skb);
  1440. return NETDEV_TX_OK;
  1441. }
  1442. /* Don't wait up for transmitted skbs to be freed. */
  1443. if (!use_napi) {
  1444. skb_orphan(skb);
  1445. nf_reset_ct(skb);
  1446. }
  1447. /* If running out of space, stop queue to avoid getting packets that we
  1448. * are then unable to transmit.
  1449. * An alternative would be to force queuing layer to requeue the skb by
  1450. * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
  1451. * returned in a normal path of operation: it means that driver is not
  1452. * maintaining the TX queue stop/start state properly, and causes
  1453. * the stack to do a non-trivial amount of useless work.
  1454. * Since most packets only take 1 or 2 ring slots, stopping the queue
  1455. * early means 16 slots are typically wasted.
  1456. */
  1457. if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
  1458. netif_stop_subqueue(dev, qnum);
  1459. if (!use_napi &&
  1460. unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
  1461. /* More just got used, free them then recheck. */
  1462. free_old_xmit_skbs(sq, false);
  1463. if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
  1464. netif_start_subqueue(dev, qnum);
  1465. virtqueue_disable_cb(sq->vq);
  1466. }
  1467. }
  1468. }
  1469. if (kick || netif_xmit_stopped(txq)) {
  1470. if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
  1471. u64_stats_update_begin(&sq->stats.syncp);
  1472. sq->stats.kicks++;
  1473. u64_stats_update_end(&sq->stats.syncp);
  1474. }
  1475. }
  1476. return NETDEV_TX_OK;
  1477. }
  1478. /*
  1479. * Send command via the control virtqueue and check status. Commands
  1480. * supported by the hypervisor, as indicated by feature bits, should
  1481. * never fail unless improperly formatted.
  1482. */
  1483. static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
  1484. struct scatterlist *out)
  1485. {
  1486. struct scatterlist *sgs[4], hdr, stat;
  1487. unsigned out_num = 0, tmp;
  1488. /* Caller should know better */
  1489. BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
  1490. vi->ctrl->status = ~0;
  1491. vi->ctrl->hdr.class = class;
  1492. vi->ctrl->hdr.cmd = cmd;
  1493. /* Add header */
  1494. sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
  1495. sgs[out_num++] = &hdr;
  1496. if (out)
  1497. sgs[out_num++] = out;
  1498. /* Add return status. */
  1499. sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
  1500. sgs[out_num] = &stat;
  1501. BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
  1502. virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
  1503. if (unlikely(!virtqueue_kick(vi->cvq)))
  1504. return vi->ctrl->status == VIRTIO_NET_OK;
  1505. /* Spin for a response, the kick causes an ioport write, trapping
  1506. * into the hypervisor, so the request should be handled immediately.
  1507. */
  1508. while (!virtqueue_get_buf(vi->cvq, &tmp) &&
  1509. !virtqueue_is_broken(vi->cvq))
  1510. cpu_relax();
  1511. return vi->ctrl->status == VIRTIO_NET_OK;
  1512. }
  1513. static int virtnet_set_mac_address(struct net_device *dev, void *p)
  1514. {
  1515. struct virtnet_info *vi = netdev_priv(dev);
  1516. struct virtio_device *vdev = vi->vdev;
  1517. int ret;
  1518. struct sockaddr *addr;
  1519. struct scatterlist sg;
  1520. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  1521. return -EOPNOTSUPP;
  1522. addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
  1523. if (!addr)
  1524. return -ENOMEM;
  1525. ret = eth_prepare_mac_addr_change(dev, addr);
  1526. if (ret)
  1527. goto out;
  1528. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  1529. sg_init_one(&sg, addr->sa_data, dev->addr_len);
  1530. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  1531. VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
  1532. dev_warn(&vdev->dev,
  1533. "Failed to set mac address by vq command.\n");
  1534. ret = -EINVAL;
  1535. goto out;
  1536. }
  1537. } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
  1538. !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
  1539. unsigned int i;
  1540. /* Naturally, this has an atomicity problem. */
  1541. for (i = 0; i < dev->addr_len; i++)
  1542. virtio_cwrite8(vdev,
  1543. offsetof(struct virtio_net_config, mac) +
  1544. i, addr->sa_data[i]);
  1545. }
  1546. eth_commit_mac_addr_change(dev, p);
  1547. ret = 0;
  1548. out:
  1549. kfree(addr);
  1550. return ret;
  1551. }
  1552. static void virtnet_stats(struct net_device *dev,
  1553. struct rtnl_link_stats64 *tot)
  1554. {
  1555. struct virtnet_info *vi = netdev_priv(dev);
  1556. unsigned int start;
  1557. int i;
  1558. for (i = 0; i < vi->max_queue_pairs; i++) {
  1559. u64 tpackets, tbytes, rpackets, rbytes, rdrops;
  1560. struct receive_queue *rq = &vi->rq[i];
  1561. struct send_queue *sq = &vi->sq[i];
  1562. do {
  1563. start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
  1564. tpackets = sq->stats.packets;
  1565. tbytes = sq->stats.bytes;
  1566. } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
  1567. do {
  1568. start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
  1569. rpackets = rq->stats.packets;
  1570. rbytes = rq->stats.bytes;
  1571. rdrops = rq->stats.drops;
  1572. } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
  1573. tot->rx_packets += rpackets;
  1574. tot->tx_packets += tpackets;
  1575. tot->rx_bytes += rbytes;
  1576. tot->tx_bytes += tbytes;
  1577. tot->rx_dropped += rdrops;
  1578. }
  1579. tot->tx_dropped = dev->stats.tx_dropped;
  1580. tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
  1581. tot->rx_length_errors = dev->stats.rx_length_errors;
  1582. tot->rx_frame_errors = dev->stats.rx_frame_errors;
  1583. }
  1584. static void virtnet_ack_link_announce(struct virtnet_info *vi)
  1585. {
  1586. rtnl_lock();
  1587. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
  1588. VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
  1589. dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
  1590. rtnl_unlock();
  1591. }
  1592. static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
  1593. {
  1594. struct scatterlist sg;
  1595. struct net_device *dev = vi->dev;
  1596. if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
  1597. return 0;
  1598. vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
  1599. sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
  1600. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
  1601. VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
  1602. dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
  1603. queue_pairs);
  1604. return -EINVAL;
  1605. } else {
  1606. vi->curr_queue_pairs = queue_pairs;
  1607. /* virtnet_open() will refill when device is going to up. */
  1608. if (dev->flags & IFF_UP)
  1609. schedule_delayed_work(&vi->refill, 0);
  1610. }
  1611. return 0;
  1612. }
  1613. static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
  1614. {
  1615. int err;
  1616. rtnl_lock();
  1617. err = _virtnet_set_queues(vi, queue_pairs);
  1618. rtnl_unlock();
  1619. return err;
  1620. }
  1621. static int virtnet_close(struct net_device *dev)
  1622. {
  1623. struct virtnet_info *vi = netdev_priv(dev);
  1624. int i;
  1625. /* Make sure NAPI doesn't schedule refill work */
  1626. disable_delayed_refill(vi);
  1627. /* Make sure refill_work doesn't re-enable napi! */
  1628. cancel_delayed_work_sync(&vi->refill);
  1629. for (i = 0; i < vi->max_queue_pairs; i++) {
  1630. xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
  1631. napi_disable(&vi->rq[i].napi);
  1632. virtnet_napi_tx_disable(&vi->sq[i].napi);
  1633. }
  1634. return 0;
  1635. }
  1636. static void virtnet_set_rx_mode(struct net_device *dev)
  1637. {
  1638. struct virtnet_info *vi = netdev_priv(dev);
  1639. struct scatterlist sg[2];
  1640. struct virtio_net_ctrl_mac *mac_data;
  1641. struct netdev_hw_addr *ha;
  1642. int uc_count;
  1643. int mc_count;
  1644. void *buf;
  1645. int i;
  1646. /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
  1647. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
  1648. return;
  1649. vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
  1650. vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
  1651. sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
  1652. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
  1653. VIRTIO_NET_CTRL_RX_PROMISC, sg))
  1654. dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
  1655. vi->ctrl->promisc ? "en" : "dis");
  1656. sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));
  1657. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
  1658. VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
  1659. dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
  1660. vi->ctrl->allmulti ? "en" : "dis");
  1661. uc_count = netdev_uc_count(dev);
  1662. mc_count = netdev_mc_count(dev);
  1663. /* MAC filter - use one buffer for both lists */
  1664. buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
  1665. (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
  1666. mac_data = buf;
  1667. if (!buf)
  1668. return;
  1669. sg_init_table(sg, 2);
  1670. /* Store the unicast list and count in the front of the buffer */
  1671. mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
  1672. i = 0;
  1673. netdev_for_each_uc_addr(ha, dev)
  1674. memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
  1675. sg_set_buf(&sg[0], mac_data,
  1676. sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
  1677. /* multicast list and count fill the end */
  1678. mac_data = (void *)&mac_data->macs[uc_count][0];
  1679. mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
  1680. i = 0;
  1681. netdev_for_each_mc_addr(ha, dev)
  1682. memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
  1683. sg_set_buf(&sg[1], mac_data,
  1684. sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
  1685. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  1686. VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
  1687. dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
  1688. kfree(buf);
  1689. }
  1690. static int virtnet_vlan_rx_add_vid(struct net_device *dev,
  1691. __be16 proto, u16 vid)
  1692. {
  1693. struct virtnet_info *vi = netdev_priv(dev);
  1694. struct scatterlist sg;
  1695. vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
  1696. sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
  1697. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
  1698. VIRTIO_NET_CTRL_VLAN_ADD, &sg))
  1699. dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
  1700. return 0;
  1701. }
  1702. static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
  1703. __be16 proto, u16 vid)
  1704. {
  1705. struct virtnet_info *vi = netdev_priv(dev);
  1706. struct scatterlist sg;
  1707. vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
  1708. sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
  1709. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
  1710. VIRTIO_NET_CTRL_VLAN_DEL, &sg))
  1711. dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
  1712. return 0;
  1713. }
  1714. static void virtnet_clean_affinity(struct virtnet_info *vi)
  1715. {
  1716. int i;
  1717. if (vi->affinity_hint_set) {
  1718. for (i = 0; i < vi->max_queue_pairs; i++) {
  1719. virtqueue_set_affinity(vi->rq[i].vq, NULL);
  1720. virtqueue_set_affinity(vi->sq[i].vq, NULL);
  1721. }
  1722. vi->affinity_hint_set = false;
  1723. }
  1724. }
  1725. static void virtnet_set_affinity(struct virtnet_info *vi)
  1726. {
  1727. cpumask_var_t mask;
  1728. int stragglers;
  1729. int group_size;
  1730. int i, j, cpu;
  1731. int num_cpu;
  1732. int stride;
  1733. if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
  1734. virtnet_clean_affinity(vi);
  1735. return;
  1736. }
  1737. num_cpu = num_online_cpus();
  1738. stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
  1739. stragglers = num_cpu >= vi->curr_queue_pairs ?
  1740. num_cpu % vi->curr_queue_pairs :
  1741. 0;
  1742. cpu = cpumask_next(-1, cpu_online_mask);
  1743. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1744. group_size = stride + (i < stragglers ? 1 : 0);
  1745. for (j = 0; j < group_size; j++) {
  1746. cpumask_set_cpu(cpu, mask);
  1747. cpu = cpumask_next_wrap(cpu, cpu_online_mask,
  1748. nr_cpu_ids, false);
  1749. }
  1750. virtqueue_set_affinity(vi->rq[i].vq, mask);
  1751. virtqueue_set_affinity(vi->sq[i].vq, mask);
  1752. __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
  1753. cpumask_clear(mask);
  1754. }
  1755. vi->affinity_hint_set = true;
  1756. free_cpumask_var(mask);
  1757. }
  1758. static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
  1759. {
  1760. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  1761. node);
  1762. virtnet_set_affinity(vi);
  1763. return 0;
  1764. }
  1765. static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
  1766. {
  1767. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  1768. node_dead);
  1769. virtnet_set_affinity(vi);
  1770. return 0;
  1771. }
  1772. static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
  1773. {
  1774. struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
  1775. node);
  1776. virtnet_clean_affinity(vi);
  1777. return 0;
  1778. }
  1779. static enum cpuhp_state virtionet_online;
  1780. static int virtnet_cpu_notif_add(struct virtnet_info *vi)
  1781. {
  1782. int ret;
  1783. ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
  1784. if (ret)
  1785. return ret;
  1786. ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
  1787. &vi->node_dead);
  1788. if (!ret)
  1789. return ret;
  1790. cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
  1791. return ret;
  1792. }
  1793. static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
  1794. {
  1795. cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
  1796. cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
  1797. &vi->node_dead);
  1798. }
  1799. static void virtnet_get_ringparam(struct net_device *dev,
  1800. struct ethtool_ringparam *ring)
  1801. {
  1802. struct virtnet_info *vi = netdev_priv(dev);
  1803. ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
  1804. ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
  1805. ring->rx_pending = ring->rx_max_pending;
  1806. ring->tx_pending = ring->tx_max_pending;
  1807. }
  1808. static void virtnet_get_drvinfo(struct net_device *dev,
  1809. struct ethtool_drvinfo *info)
  1810. {
  1811. struct virtnet_info *vi = netdev_priv(dev);
  1812. struct virtio_device *vdev = vi->vdev;
  1813. strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
  1814. strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
  1815. strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
  1816. }
  1817. /* TODO: Eliminate OOO packets during switching */
  1818. static int virtnet_set_channels(struct net_device *dev,
  1819. struct ethtool_channels *channels)
  1820. {
  1821. struct virtnet_info *vi = netdev_priv(dev);
  1822. u16 queue_pairs = channels->combined_count;
  1823. int err;
  1824. /* We don't support separate rx/tx channels.
  1825. * We don't allow setting 'other' channels.
  1826. */
  1827. if (channels->rx_count || channels->tx_count || channels->other_count)
  1828. return -EINVAL;
  1829. if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
  1830. return -EINVAL;
  1831. /* For now we don't support modifying channels while XDP is loaded
  1832. * also when XDP is loaded all RX queues have XDP programs so we only
  1833. * need to check a single RX queue.
  1834. */
  1835. if (vi->rq[0].xdp_prog)
  1836. return -EINVAL;
  1837. get_online_cpus();
  1838. err = _virtnet_set_queues(vi, queue_pairs);
  1839. if (err) {
  1840. put_online_cpus();
  1841. goto err;
  1842. }
  1843. virtnet_set_affinity(vi);
  1844. put_online_cpus();
  1845. netif_set_real_num_tx_queues(dev, queue_pairs);
  1846. netif_set_real_num_rx_queues(dev, queue_pairs);
  1847. err:
  1848. return err;
  1849. }
  1850. static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  1851. {
  1852. struct virtnet_info *vi = netdev_priv(dev);
  1853. char *p = (char *)data;
  1854. unsigned int i, j;
  1855. switch (stringset) {
  1856. case ETH_SS_STATS:
  1857. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1858. for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
  1859. snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
  1860. i, virtnet_rq_stats_desc[j].desc);
  1861. p += ETH_GSTRING_LEN;
  1862. }
  1863. }
  1864. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1865. for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
  1866. snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
  1867. i, virtnet_sq_stats_desc[j].desc);
  1868. p += ETH_GSTRING_LEN;
  1869. }
  1870. }
  1871. break;
  1872. }
  1873. }
  1874. static int virtnet_get_sset_count(struct net_device *dev, int sset)
  1875. {
  1876. struct virtnet_info *vi = netdev_priv(dev);
  1877. switch (sset) {
  1878. case ETH_SS_STATS:
  1879. return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
  1880. VIRTNET_SQ_STATS_LEN);
  1881. default:
  1882. return -EOPNOTSUPP;
  1883. }
  1884. }
  1885. static void virtnet_get_ethtool_stats(struct net_device *dev,
  1886. struct ethtool_stats *stats, u64 *data)
  1887. {
  1888. struct virtnet_info *vi = netdev_priv(dev);
  1889. unsigned int idx = 0, start, i, j;
  1890. const u8 *stats_base;
  1891. size_t offset;
  1892. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1893. struct receive_queue *rq = &vi->rq[i];
  1894. stats_base = (u8 *)&rq->stats;
  1895. do {
  1896. start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
  1897. for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
  1898. offset = virtnet_rq_stats_desc[j].offset;
  1899. data[idx + j] = *(u64 *)(stats_base + offset);
  1900. }
  1901. } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
  1902. idx += VIRTNET_RQ_STATS_LEN;
  1903. }
  1904. for (i = 0; i < vi->curr_queue_pairs; i++) {
  1905. struct send_queue *sq = &vi->sq[i];
  1906. stats_base = (u8 *)&sq->stats;
  1907. do {
  1908. start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
  1909. for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
  1910. offset = virtnet_sq_stats_desc[j].offset;
  1911. data[idx + j] = *(u64 *)(stats_base + offset);
  1912. }
  1913. } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
  1914. idx += VIRTNET_SQ_STATS_LEN;
  1915. }
  1916. }
  1917. static void virtnet_get_channels(struct net_device *dev,
  1918. struct ethtool_channels *channels)
  1919. {
  1920. struct virtnet_info *vi = netdev_priv(dev);
  1921. channels->combined_count = vi->curr_queue_pairs;
  1922. channels->max_combined = vi->max_queue_pairs;
  1923. channels->max_other = 0;
  1924. channels->rx_count = 0;
  1925. channels->tx_count = 0;
  1926. channels->other_count = 0;
  1927. }
  1928. static int virtnet_set_link_ksettings(struct net_device *dev,
  1929. const struct ethtool_link_ksettings *cmd)
  1930. {
  1931. struct virtnet_info *vi = netdev_priv(dev);
  1932. return ethtool_virtdev_set_link_ksettings(dev, cmd,
  1933. &vi->speed, &vi->duplex);
  1934. }
  1935. static int virtnet_get_link_ksettings(struct net_device *dev,
  1936. struct ethtool_link_ksettings *cmd)
  1937. {
  1938. struct virtnet_info *vi = netdev_priv(dev);
  1939. cmd->base.speed = vi->speed;
  1940. cmd->base.duplex = vi->duplex;
  1941. cmd->base.port = PORT_OTHER;
  1942. return 0;
  1943. }
  1944. static int virtnet_set_coalesce(struct net_device *dev,
  1945. struct ethtool_coalesce *ec)
  1946. {
  1947. struct virtnet_info *vi = netdev_priv(dev);
  1948. int i, napi_weight;
  1949. if (ec->tx_max_coalesced_frames > 1 ||
  1950. ec->rx_max_coalesced_frames != 1)
  1951. return -EINVAL;
  1952. napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
  1953. if (napi_weight ^ vi->sq[0].napi.weight) {
  1954. if (dev->flags & IFF_UP)
  1955. return -EBUSY;
  1956. for (i = 0; i < vi->max_queue_pairs; i++)
  1957. vi->sq[i].napi.weight = napi_weight;
  1958. }
  1959. return 0;
  1960. }
  1961. static int virtnet_get_coalesce(struct net_device *dev,
  1962. struct ethtool_coalesce *ec)
  1963. {
  1964. struct ethtool_coalesce ec_default = {
  1965. .cmd = ETHTOOL_GCOALESCE,
  1966. .rx_max_coalesced_frames = 1,
  1967. };
  1968. struct virtnet_info *vi = netdev_priv(dev);
  1969. memcpy(ec, &ec_default, sizeof(ec_default));
  1970. if (vi->sq[0].napi.weight)
  1971. ec->tx_max_coalesced_frames = 1;
  1972. return 0;
  1973. }
  1974. static void virtnet_init_settings(struct net_device *dev)
  1975. {
  1976. struct virtnet_info *vi = netdev_priv(dev);
  1977. vi->speed = SPEED_UNKNOWN;
  1978. vi->duplex = DUPLEX_UNKNOWN;
  1979. }
  1980. static void virtnet_update_settings(struct virtnet_info *vi)
  1981. {
  1982. u32 speed;
  1983. u8 duplex;
  1984. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
  1985. return;
  1986. virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
  1987. if (ethtool_validate_speed(speed))
  1988. vi->speed = speed;
  1989. virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
  1990. if (ethtool_validate_duplex(duplex))
  1991. vi->duplex = duplex;
  1992. }
  1993. static const struct ethtool_ops virtnet_ethtool_ops = {
  1994. .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
  1995. .get_drvinfo = virtnet_get_drvinfo,
  1996. .get_link = ethtool_op_get_link,
  1997. .get_ringparam = virtnet_get_ringparam,
  1998. .get_strings = virtnet_get_strings,
  1999. .get_sset_count = virtnet_get_sset_count,
  2000. .get_ethtool_stats = virtnet_get_ethtool_stats,
  2001. .set_channels = virtnet_set_channels,
  2002. .get_channels = virtnet_get_channels,
  2003. .get_ts_info = ethtool_op_get_ts_info,
  2004. .get_link_ksettings = virtnet_get_link_ksettings,
  2005. .set_link_ksettings = virtnet_set_link_ksettings,
  2006. .set_coalesce = virtnet_set_coalesce,
  2007. .get_coalesce = virtnet_get_coalesce,
  2008. };
  2009. static void virtnet_freeze_down(struct virtio_device *vdev)
  2010. {
  2011. struct virtnet_info *vi = vdev->priv;
  2012. /* Make sure no work handler is accessing the device */
  2013. flush_work(&vi->config_work);
  2014. netif_tx_lock_bh(vi->dev);
  2015. netif_device_detach(vi->dev);
  2016. netif_tx_unlock_bh(vi->dev);
  2017. if (netif_running(vi->dev))
  2018. virtnet_close(vi->dev);
  2019. }
  2020. static int init_vqs(struct virtnet_info *vi);
  2021. static int virtnet_restore_up(struct virtio_device *vdev)
  2022. {
  2023. struct virtnet_info *vi = vdev->priv;
  2024. int err;
  2025. err = init_vqs(vi);
  2026. if (err)
  2027. return err;
  2028. virtio_device_ready(vdev);
  2029. enable_delayed_refill(vi);
  2030. if (netif_running(vi->dev)) {
  2031. err = virtnet_open(vi->dev);
  2032. if (err)
  2033. return err;
  2034. }
  2035. netif_tx_lock_bh(vi->dev);
  2036. netif_device_attach(vi->dev);
  2037. netif_tx_unlock_bh(vi->dev);
  2038. return err;
  2039. }
  2040. static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
  2041. {
  2042. struct scatterlist sg;
  2043. vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);
  2044. sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));
  2045. if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
  2046. VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
  2047. dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
  2048. return -EINVAL;
  2049. }
  2050. return 0;
  2051. }
  2052. static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
  2053. {
  2054. u64 offloads = 0;
  2055. if (!vi->guest_offloads)
  2056. return 0;
  2057. return virtnet_set_guest_offloads(vi, offloads);
  2058. }
  2059. static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
  2060. {
  2061. u64 offloads = vi->guest_offloads;
  2062. if (!vi->guest_offloads)
  2063. return 0;
  2064. return virtnet_set_guest_offloads(vi, offloads);
  2065. }
  2066. static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
  2067. struct netlink_ext_ack *extack)
  2068. {
  2069. unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
  2070. struct virtnet_info *vi = netdev_priv(dev);
  2071. struct bpf_prog *old_prog;
  2072. u16 xdp_qp = 0, curr_qp;
  2073. int i, err;
  2074. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
  2075. && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  2076. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
  2077. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
  2078. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
  2079. virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
  2080. NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
  2081. return -EOPNOTSUPP;
  2082. }
  2083. if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
  2084. NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
  2085. return -EINVAL;
  2086. }
  2087. if (dev->mtu > max_sz) {
  2088. NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
  2089. netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
  2090. return -EINVAL;
  2091. }
  2092. curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
  2093. if (prog)
  2094. xdp_qp = nr_cpu_ids;
  2095. /* XDP requires extra queues for XDP_TX */
  2096. if (curr_qp + xdp_qp > vi->max_queue_pairs) {
  2097. netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
  2098. curr_qp + xdp_qp, vi->max_queue_pairs);
  2099. xdp_qp = 0;
  2100. }
  2101. old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
  2102. if (!prog && !old_prog)
  2103. return 0;
  2104. if (prog)
  2105. bpf_prog_add(prog, vi->max_queue_pairs - 1);
  2106. /* Make sure NAPI is not using any XDP TX queues for RX. */
  2107. if (netif_running(dev)) {
  2108. for (i = 0; i < vi->max_queue_pairs; i++) {
  2109. napi_disable(&vi->rq[i].napi);
  2110. virtnet_napi_tx_disable(&vi->sq[i].napi);
  2111. }
  2112. }
  2113. if (!prog) {
  2114. for (i = 0; i < vi->max_queue_pairs; i++) {
  2115. rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
  2116. if (i == 0)
  2117. virtnet_restore_guest_offloads(vi);
  2118. }
  2119. synchronize_net();
  2120. }
  2121. err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
  2122. if (err)
  2123. goto err;
  2124. netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
  2125. vi->xdp_queue_pairs = xdp_qp;
  2126. if (prog) {
  2127. vi->xdp_enabled = true;
  2128. for (i = 0; i < vi->max_queue_pairs; i++) {
  2129. rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
  2130. if (i == 0 && !old_prog)
  2131. virtnet_clear_guest_offloads(vi);
  2132. }
  2133. } else {
  2134. vi->xdp_enabled = false;
  2135. }
  2136. for (i = 0; i < vi->max_queue_pairs; i++) {
  2137. if (old_prog)
  2138. bpf_prog_put(old_prog);
  2139. if (netif_running(dev)) {
  2140. virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
  2141. virtnet_napi_tx_enable(vi, vi->sq[i].vq,
  2142. &vi->sq[i].napi);
  2143. }
  2144. }
  2145. return 0;
  2146. err:
  2147. if (!prog) {
  2148. virtnet_clear_guest_offloads(vi);
  2149. for (i = 0; i < vi->max_queue_pairs; i++)
  2150. rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
  2151. }
  2152. if (netif_running(dev)) {
  2153. for (i = 0; i < vi->max_queue_pairs; i++) {
  2154. virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
  2155. virtnet_napi_tx_enable(vi, vi->sq[i].vq,
  2156. &vi->sq[i].napi);
  2157. }
  2158. }
  2159. if (prog)
  2160. bpf_prog_sub(prog, vi->max_queue_pairs - 1);
  2161. return err;
  2162. }
  2163. static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  2164. {
  2165. switch (xdp->command) {
  2166. case XDP_SETUP_PROG:
  2167. return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
  2168. default:
  2169. return -EINVAL;
  2170. }
  2171. }
  2172. static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
  2173. size_t len)
  2174. {
  2175. struct virtnet_info *vi = netdev_priv(dev);
  2176. int ret;
  2177. if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
  2178. return -EOPNOTSUPP;
  2179. ret = snprintf(buf, len, "sby");
  2180. if (ret >= len)
  2181. return -EOPNOTSUPP;
  2182. return 0;
  2183. }
  2184. static int virtnet_set_features(struct net_device *dev,
  2185. netdev_features_t features)
  2186. {
  2187. struct virtnet_info *vi = netdev_priv(dev);
  2188. u64 offloads;
  2189. int err;
  2190. if (!vi->has_cvq)
  2191. return 0;
  2192. if ((dev->features ^ features) & NETIF_F_GRO_HW) {
  2193. if (vi->xdp_enabled)
  2194. return -EBUSY;
  2195. if (features & NETIF_F_GRO_HW)
  2196. offloads = vi->guest_offloads_capable;
  2197. else
  2198. offloads = vi->guest_offloads_capable &
  2199. ~GUEST_OFFLOAD_GRO_HW_MASK;
  2200. err = virtnet_set_guest_offloads(vi, offloads);
  2201. if (err)
  2202. return err;
  2203. vi->guest_offloads = offloads;
  2204. }
  2205. return 0;
  2206. }
  2207. static const struct net_device_ops virtnet_netdev = {
  2208. .ndo_open = virtnet_open,
  2209. .ndo_stop = virtnet_close,
  2210. .ndo_start_xmit = start_xmit,
  2211. .ndo_validate_addr = eth_validate_addr,
  2212. .ndo_set_mac_address = virtnet_set_mac_address,
  2213. .ndo_set_rx_mode = virtnet_set_rx_mode,
  2214. .ndo_get_stats64 = virtnet_stats,
  2215. .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
  2216. .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
  2217. .ndo_bpf = virtnet_xdp,
  2218. .ndo_xdp_xmit = virtnet_xdp_xmit,
  2219. .ndo_features_check = passthru_features_check,
  2220. .ndo_get_phys_port_name = virtnet_get_phys_port_name,
  2221. .ndo_set_features = virtnet_set_features,
  2222. };
  2223. static void virtnet_config_changed_work(struct work_struct *work)
  2224. {
  2225. struct virtnet_info *vi =
  2226. container_of(work, struct virtnet_info, config_work);
  2227. u16 v;
  2228. if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
  2229. struct virtio_net_config, status, &v) < 0)
  2230. return;
  2231. if (v & VIRTIO_NET_S_ANNOUNCE) {
  2232. netdev_notify_peers(vi->dev);
  2233. virtnet_ack_link_announce(vi);
  2234. }
  2235. /* Ignore unknown (future) status bits */
  2236. v &= VIRTIO_NET_S_LINK_UP;
  2237. if (vi->status == v)
  2238. return;
  2239. vi->status = v;
  2240. if (vi->status & VIRTIO_NET_S_LINK_UP) {
  2241. virtnet_update_settings(vi);
  2242. netif_carrier_on(vi->dev);
  2243. netif_tx_wake_all_queues(vi->dev);
  2244. } else {
  2245. netif_carrier_off(vi->dev);
  2246. netif_tx_stop_all_queues(vi->dev);
  2247. }
  2248. }
  2249. static void virtnet_config_changed(struct virtio_device *vdev)
  2250. {
  2251. struct virtnet_info *vi = vdev->priv;
  2252. schedule_work(&vi->config_work);
  2253. }
  2254. static void virtnet_free_queues(struct virtnet_info *vi)
  2255. {
  2256. int i;
  2257. for (i = 0; i < vi->max_queue_pairs; i++) {
  2258. __netif_napi_del(&vi->rq[i].napi);
  2259. __netif_napi_del(&vi->sq[i].napi);
  2260. }
  2261. /* We called __netif_napi_del(),
  2262. * we need to respect an RCU grace period before freeing vi->rq
  2263. */
  2264. synchronize_net();
  2265. kfree(vi->rq);
  2266. kfree(vi->sq);
  2267. kfree(vi->ctrl);
  2268. }
  2269. static void _free_receive_bufs(struct virtnet_info *vi)
  2270. {
  2271. struct bpf_prog *old_prog;
  2272. int i;
  2273. for (i = 0; i < vi->max_queue_pairs; i++) {
  2274. while (vi->rq[i].pages)
  2275. __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
  2276. old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
  2277. RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
  2278. if (old_prog)
  2279. bpf_prog_put(old_prog);
  2280. }
  2281. }
  2282. static void free_receive_bufs(struct virtnet_info *vi)
  2283. {
  2284. rtnl_lock();
  2285. _free_receive_bufs(vi);
  2286. rtnl_unlock();
  2287. }
  2288. static void free_receive_page_frags(struct virtnet_info *vi)
  2289. {
  2290. int i;
  2291. for (i = 0; i < vi->max_queue_pairs; i++)
  2292. if (vi->rq[i].alloc_frag.page)
  2293. put_page(vi->rq[i].alloc_frag.page);
  2294. }
  2295. static void free_unused_bufs(struct virtnet_info *vi)
  2296. {
  2297. void *buf;
  2298. int i;
  2299. for (i = 0; i < vi->max_queue_pairs; i++) {
  2300. struct virtqueue *vq = vi->sq[i].vq;
  2301. while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
  2302. if (!is_xdp_frame(buf))
  2303. dev_kfree_skb(buf);
  2304. else
  2305. xdp_return_frame(ptr_to_xdp(buf));
  2306. }
  2307. }
  2308. for (i = 0; i < vi->max_queue_pairs; i++) {
  2309. struct virtqueue *vq = vi->rq[i].vq;
  2310. while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
  2311. if (vi->mergeable_rx_bufs) {
  2312. put_page(virt_to_head_page(buf));
  2313. } else if (vi->big_packets) {
  2314. give_pages(&vi->rq[i], buf);
  2315. } else {
  2316. put_page(virt_to_head_page(buf));
  2317. }
  2318. }
  2319. }
  2320. }
  2321. static void virtnet_del_vqs(struct virtnet_info *vi)
  2322. {
  2323. struct virtio_device *vdev = vi->vdev;
  2324. virtnet_clean_affinity(vi);
  2325. vdev->config->del_vqs(vdev);
  2326. virtnet_free_queues(vi);
  2327. }
  2328. /* How large should a single buffer be so a queue full of these can fit at
  2329. * least one full packet?
  2330. * Logic below assumes the mergeable buffer header is used.
  2331. */
  2332. static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
  2333. {
  2334. const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  2335. unsigned int rq_size = virtqueue_get_vring_size(vq);
  2336. unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
  2337. unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
  2338. unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
  2339. return max(max(min_buf_len, hdr_len) - hdr_len,
  2340. (unsigned int)GOOD_PACKET_LEN);
  2341. }
  2342. static int virtnet_find_vqs(struct virtnet_info *vi)
  2343. {
  2344. vq_callback_t **callbacks;
  2345. struct virtqueue **vqs;
  2346. int ret = -ENOMEM;
  2347. int i, total_vqs;
  2348. const char **names;
  2349. bool *ctx;
  2350. /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
  2351. * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
  2352. * possible control vq.
  2353. */
  2354. total_vqs = vi->max_queue_pairs * 2 +
  2355. virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
  2356. /* Allocate space for find_vqs parameters */
  2357. vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
  2358. if (!vqs)
  2359. goto err_vq;
  2360. callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL);
  2361. if (!callbacks)
  2362. goto err_callback;
  2363. names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL);
  2364. if (!names)
  2365. goto err_names;
  2366. if (!vi->big_packets || vi->mergeable_rx_bufs) {
  2367. ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
  2368. if (!ctx)
  2369. goto err_ctx;
  2370. } else {
  2371. ctx = NULL;
  2372. }
  2373. /* Parameters for control virtqueue, if any */
  2374. if (vi->has_cvq) {
  2375. callbacks[total_vqs - 1] = NULL;
  2376. names[total_vqs - 1] = "control";
  2377. }
  2378. /* Allocate/initialize parameters for send/receive virtqueues */
  2379. for (i = 0; i < vi->max_queue_pairs; i++) {
  2380. callbacks[rxq2vq(i)] = skb_recv_done;
  2381. callbacks[txq2vq(i)] = skb_xmit_done;
  2382. sprintf(vi->rq[i].name, "input.%d", i);
  2383. sprintf(vi->sq[i].name, "output.%d", i);
  2384. names[rxq2vq(i)] = vi->rq[i].name;
  2385. names[txq2vq(i)] = vi->sq[i].name;
  2386. if (ctx)
  2387. ctx[rxq2vq(i)] = true;
  2388. }
  2389. ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
  2390. names, ctx, NULL);
  2391. if (ret)
  2392. goto err_find;
  2393. if (vi->has_cvq) {
  2394. vi->cvq = vqs[total_vqs - 1];
  2395. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
  2396. vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
  2397. }
  2398. for (i = 0; i < vi->max_queue_pairs; i++) {
  2399. vi->rq[i].vq = vqs[rxq2vq(i)];
  2400. vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
  2401. vi->sq[i].vq = vqs[txq2vq(i)];
  2402. }
  2403. /* run here: ret == 0. */
  2404. err_find:
  2405. kfree(ctx);
  2406. err_ctx:
  2407. kfree(names);
  2408. err_names:
  2409. kfree(callbacks);
  2410. err_callback:
  2411. kfree(vqs);
  2412. err_vq:
  2413. return ret;
  2414. }
  2415. static int virtnet_alloc_queues(struct virtnet_info *vi)
  2416. {
  2417. int i;
  2418. vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
  2419. if (!vi->ctrl)
  2420. goto err_ctrl;
  2421. vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
  2422. if (!vi->sq)
  2423. goto err_sq;
  2424. vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
  2425. if (!vi->rq)
  2426. goto err_rq;
  2427. INIT_DELAYED_WORK(&vi->refill, refill_work);
  2428. for (i = 0; i < vi->max_queue_pairs; i++) {
  2429. vi->rq[i].pages = NULL;
  2430. netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
  2431. napi_weight);
  2432. netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
  2433. napi_tx ? napi_weight : 0);
  2434. sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
  2435. ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
  2436. sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
  2437. u64_stats_init(&vi->rq[i].stats.syncp);
  2438. u64_stats_init(&vi->sq[i].stats.syncp);
  2439. }
  2440. return 0;
  2441. err_rq:
  2442. kfree(vi->sq);
  2443. err_sq:
  2444. kfree(vi->ctrl);
  2445. err_ctrl:
  2446. return -ENOMEM;
  2447. }
  2448. static int init_vqs(struct virtnet_info *vi)
  2449. {
  2450. int ret;
  2451. /* Allocate send & receive queues */
  2452. ret = virtnet_alloc_queues(vi);
  2453. if (ret)
  2454. goto err;
  2455. ret = virtnet_find_vqs(vi);
  2456. if (ret)
  2457. goto err_free;
  2458. get_online_cpus();
  2459. virtnet_set_affinity(vi);
  2460. put_online_cpus();
  2461. return 0;
  2462. err_free:
  2463. virtnet_free_queues(vi);
  2464. err:
  2465. return ret;
  2466. }
  2467. #ifdef CONFIG_SYSFS
  2468. static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
  2469. char *buf)
  2470. {
  2471. struct virtnet_info *vi = netdev_priv(queue->dev);
  2472. unsigned int queue_index = get_netdev_rx_queue_index(queue);
  2473. unsigned int headroom = virtnet_get_headroom(vi);
  2474. unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
  2475. struct ewma_pkt_len *avg;
  2476. BUG_ON(queue_index >= vi->max_queue_pairs);
  2477. avg = &vi->rq[queue_index].mrg_avg_pkt_len;
  2478. return sprintf(buf, "%u\n",
  2479. get_mergeable_buf_len(&vi->rq[queue_index], avg,
  2480. SKB_DATA_ALIGN(headroom + tailroom)));
  2481. }
  2482. static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
  2483. __ATTR_RO(mergeable_rx_buffer_size);
  2484. static struct attribute *virtio_net_mrg_rx_attrs[] = {
  2485. &mergeable_rx_buffer_size_attribute.attr,
  2486. NULL
  2487. };
  2488. static const struct attribute_group virtio_net_mrg_rx_group = {
  2489. .name = "virtio_net",
  2490. .attrs = virtio_net_mrg_rx_attrs
  2491. };
  2492. #endif
  2493. static bool virtnet_fail_on_feature(struct virtio_device *vdev,
  2494. unsigned int fbit,
  2495. const char *fname, const char *dname)
  2496. {
  2497. if (!virtio_has_feature(vdev, fbit))
  2498. return false;
  2499. dev_err(&vdev->dev, "device advertises feature %s but not %s",
  2500. fname, dname);
  2501. return true;
  2502. }
  2503. #define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
  2504. virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
  2505. static bool virtnet_validate_features(struct virtio_device *vdev)
  2506. {
  2507. if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
  2508. (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
  2509. "VIRTIO_NET_F_CTRL_VQ") ||
  2510. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
  2511. "VIRTIO_NET_F_CTRL_VQ") ||
  2512. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
  2513. "VIRTIO_NET_F_CTRL_VQ") ||
  2514. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
  2515. VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
  2516. "VIRTIO_NET_F_CTRL_VQ"))) {
  2517. return false;
  2518. }
  2519. return true;
  2520. }
  2521. #define MIN_MTU ETH_MIN_MTU
  2522. #define MAX_MTU ETH_MAX_MTU
  2523. static int virtnet_validate(struct virtio_device *vdev)
  2524. {
  2525. if (!vdev->config->get) {
  2526. dev_err(&vdev->dev, "%s failure: config access disabled\n",
  2527. __func__);
  2528. return -EINVAL;
  2529. }
  2530. if (!virtnet_validate_features(vdev))
  2531. return -EINVAL;
  2532. if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
  2533. int mtu = virtio_cread16(vdev,
  2534. offsetof(struct virtio_net_config,
  2535. mtu));
  2536. if (mtu < MIN_MTU)
  2537. __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
  2538. }
  2539. return 0;
  2540. }
  2541. static int virtnet_probe(struct virtio_device *vdev)
  2542. {
  2543. int i, err = -ENOMEM;
  2544. struct net_device *dev;
  2545. struct virtnet_info *vi;
  2546. u16 max_queue_pairs;
  2547. int mtu;
  2548. /* Find if host supports multiqueue virtio_net device */
  2549. err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
  2550. struct virtio_net_config,
  2551. max_virtqueue_pairs, &max_queue_pairs);
  2552. /* We need at least 2 queue's */
  2553. if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
  2554. max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
  2555. !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
  2556. max_queue_pairs = 1;
  2557. /* Allocate ourselves a network device with room for our info */
  2558. dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
  2559. if (!dev)
  2560. return -ENOMEM;
  2561. /* Set up network device as normal. */
  2562. dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
  2563. dev->netdev_ops = &virtnet_netdev;
  2564. dev->features = NETIF_F_HIGHDMA;
  2565. dev->ethtool_ops = &virtnet_ethtool_ops;
  2566. SET_NETDEV_DEV(dev, &vdev->dev);
  2567. /* Do we support "hardware" checksums? */
  2568. if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
  2569. /* This opens up the world of extra features. */
  2570. dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
  2571. if (csum)
  2572. dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
  2573. if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
  2574. dev->hw_features |= NETIF_F_TSO
  2575. | NETIF_F_TSO_ECN | NETIF_F_TSO6;
  2576. }
  2577. /* Individual feature bits: what can host handle? */
  2578. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
  2579. dev->hw_features |= NETIF_F_TSO;
  2580. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
  2581. dev->hw_features |= NETIF_F_TSO6;
  2582. if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
  2583. dev->hw_features |= NETIF_F_TSO_ECN;
  2584. dev->features |= NETIF_F_GSO_ROBUST;
  2585. if (gso)
  2586. dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
  2587. /* (!csum && gso) case will be fixed by register_netdev() */
  2588. }
  2589. if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
  2590. dev->features |= NETIF_F_RXCSUM;
  2591. if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  2592. virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
  2593. dev->features |= NETIF_F_GRO_HW;
  2594. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
  2595. dev->hw_features |= NETIF_F_GRO_HW;
  2596. dev->vlan_features = dev->features;
  2597. /* MTU range: 68 - 65535 */
  2598. dev->min_mtu = MIN_MTU;
  2599. dev->max_mtu = MAX_MTU;
  2600. /* Configuration may specify what MAC to use. Otherwise random. */
  2601. if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
  2602. virtio_cread_bytes(vdev,
  2603. offsetof(struct virtio_net_config, mac),
  2604. dev->dev_addr, dev->addr_len);
  2605. else
  2606. eth_hw_addr_random(dev);
  2607. /* Set up our device-specific information */
  2608. vi = netdev_priv(dev);
  2609. vi->dev = dev;
  2610. vi->vdev = vdev;
  2611. vdev->priv = vi;
  2612. INIT_WORK(&vi->config_work, virtnet_config_changed_work);
  2613. spin_lock_init(&vi->refill_lock);
  2614. /* If we can receive ANY GSO packets, we must allocate large ones. */
  2615. if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
  2616. virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
  2617. virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
  2618. virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
  2619. vi->big_packets = true;
  2620. if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
  2621. vi->mergeable_rx_bufs = true;
  2622. if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
  2623. virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
  2624. vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  2625. else
  2626. vi->hdr_len = sizeof(struct virtio_net_hdr);
  2627. if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
  2628. virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
  2629. vi->any_header_sg = true;
  2630. if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
  2631. vi->has_cvq = true;
  2632. if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
  2633. mtu = virtio_cread16(vdev,
  2634. offsetof(struct virtio_net_config,
  2635. mtu));
  2636. if (mtu < dev->min_mtu) {
  2637. /* Should never trigger: MTU was previously validated
  2638. * in virtnet_validate.
  2639. */
  2640. dev_err(&vdev->dev,
  2641. "device MTU appears to have changed it is now %d < %d",
  2642. mtu, dev->min_mtu);
  2643. err = -EINVAL;
  2644. goto free;
  2645. }
  2646. dev->mtu = mtu;
  2647. dev->max_mtu = mtu;
  2648. /* TODO: size buffers correctly in this case. */
  2649. if (dev->mtu > ETH_DATA_LEN)
  2650. vi->big_packets = true;
  2651. }
  2652. if (vi->any_header_sg)
  2653. dev->needed_headroom = vi->hdr_len;
  2654. /* Enable multiqueue by default */
  2655. if (num_online_cpus() >= max_queue_pairs)
  2656. vi->curr_queue_pairs = max_queue_pairs;
  2657. else
  2658. vi->curr_queue_pairs = num_online_cpus();
  2659. vi->max_queue_pairs = max_queue_pairs;
  2660. /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
  2661. err = init_vqs(vi);
  2662. if (err)
  2663. goto free;
  2664. #ifdef CONFIG_SYSFS
  2665. if (vi->mergeable_rx_bufs)
  2666. dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
  2667. #endif
  2668. netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
  2669. netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
  2670. virtnet_init_settings(dev);
  2671. if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
  2672. vi->failover = net_failover_create(vi->dev);
  2673. if (IS_ERR(vi->failover)) {
  2674. err = PTR_ERR(vi->failover);
  2675. goto free_vqs;
  2676. }
  2677. }
  2678. /* serialize netdev register + virtio_device_ready() with ndo_open() */
  2679. rtnl_lock();
  2680. err = register_netdevice(dev);
  2681. if (err) {
  2682. pr_debug("virtio_net: registering device failed\n");
  2683. rtnl_unlock();
  2684. goto free_failover;
  2685. }
  2686. virtio_device_ready(vdev);
  2687. rtnl_unlock();
  2688. err = virtnet_cpu_notif_add(vi);
  2689. if (err) {
  2690. pr_debug("virtio_net: registering cpu notifier failed\n");
  2691. goto free_unregister_netdev;
  2692. }
  2693. virtnet_set_queues(vi, vi->curr_queue_pairs);
  2694. /* Assume link up if device can't report link status,
  2695. otherwise get link status from config. */
  2696. netif_carrier_off(dev);
  2697. if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
  2698. schedule_work(&vi->config_work);
  2699. } else {
  2700. vi->status = VIRTIO_NET_S_LINK_UP;
  2701. virtnet_update_settings(vi);
  2702. netif_carrier_on(dev);
  2703. }
  2704. for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
  2705. if (virtio_has_feature(vi->vdev, guest_offloads[i]))
  2706. set_bit(guest_offloads[i], &vi->guest_offloads);
  2707. vi->guest_offloads_capable = vi->guest_offloads;
  2708. pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
  2709. dev->name, max_queue_pairs);
  2710. return 0;
  2711. free_unregister_netdev:
  2712. vi->vdev->config->reset(vdev);
  2713. unregister_netdev(dev);
  2714. free_failover:
  2715. net_failover_destroy(vi->failover);
  2716. free_vqs:
  2717. cancel_delayed_work_sync(&vi->refill);
  2718. free_receive_page_frags(vi);
  2719. virtnet_del_vqs(vi);
  2720. free:
  2721. free_netdev(dev);
  2722. return err;
  2723. }
  2724. static void remove_vq_common(struct virtnet_info *vi)
  2725. {
  2726. vi->vdev->config->reset(vi->vdev);
  2727. /* Free unused buffers in both send and recv, if any. */
  2728. free_unused_bufs(vi);
  2729. free_receive_bufs(vi);
  2730. free_receive_page_frags(vi);
  2731. virtnet_del_vqs(vi);
  2732. }
  2733. static void virtnet_remove(struct virtio_device *vdev)
  2734. {
  2735. struct virtnet_info *vi = vdev->priv;
  2736. virtnet_cpu_notif_remove(vi);
  2737. /* Make sure no work handler is accessing the device. */
  2738. flush_work(&vi->config_work);
  2739. unregister_netdev(vi->dev);
  2740. net_failover_destroy(vi->failover);
  2741. remove_vq_common(vi);
  2742. free_netdev(vi->dev);
  2743. }
  2744. static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
  2745. {
  2746. struct virtnet_info *vi = vdev->priv;
  2747. virtnet_cpu_notif_remove(vi);
  2748. virtnet_freeze_down(vdev);
  2749. remove_vq_common(vi);
  2750. return 0;
  2751. }
  2752. static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
  2753. {
  2754. struct virtnet_info *vi = vdev->priv;
  2755. int err;
  2756. err = virtnet_restore_up(vdev);
  2757. if (err)
  2758. return err;
  2759. virtnet_set_queues(vi, vi->curr_queue_pairs);
  2760. err = virtnet_cpu_notif_add(vi);
  2761. if (err) {
  2762. virtnet_freeze_down(vdev);
  2763. remove_vq_common(vi);
  2764. return err;
  2765. }
  2766. return 0;
  2767. }
  2768. static struct virtio_device_id id_table[] = {
  2769. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  2770. { 0 },
  2771. };
  2772. #define VIRTNET_FEATURES \
  2773. VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
  2774. VIRTIO_NET_F_MAC, \
  2775. VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
  2776. VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
  2777. VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
  2778. VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
  2779. VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
  2780. VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
  2781. VIRTIO_NET_F_CTRL_MAC_ADDR, \
  2782. VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
  2783. VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
  2784. static unsigned int features[] = {
  2785. VIRTNET_FEATURES,
  2786. };
  2787. static unsigned int features_legacy[] = {
  2788. VIRTNET_FEATURES,
  2789. VIRTIO_NET_F_GSO,
  2790. VIRTIO_F_ANY_LAYOUT,
  2791. };
  2792. static struct virtio_driver virtio_net_driver = {
  2793. .feature_table = features,
  2794. .feature_table_size = ARRAY_SIZE(features),
  2795. .feature_table_legacy = features_legacy,
  2796. .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
  2797. .driver.name = KBUILD_MODNAME,
  2798. .driver.owner = THIS_MODULE,
  2799. .id_table = id_table,
  2800. .validate = virtnet_validate,
  2801. .probe = virtnet_probe,
  2802. .remove = virtnet_remove,
  2803. .config_changed = virtnet_config_changed,
  2804. #ifdef CONFIG_PM_SLEEP
  2805. .freeze = virtnet_freeze,
  2806. .restore = virtnet_restore,
  2807. #endif
  2808. };
  2809. static __init int virtio_net_driver_init(void)
  2810. {
  2811. int ret;
  2812. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
  2813. virtnet_cpu_online,
  2814. virtnet_cpu_down_prep);
  2815. if (ret < 0)
  2816. goto out;
  2817. virtionet_online = ret;
  2818. ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
  2819. NULL, virtnet_cpu_dead);
  2820. if (ret)
  2821. goto err_dead;
  2822. ret = register_virtio_driver(&virtio_net_driver);
  2823. if (ret)
  2824. goto err_virtio;
  2825. return 0;
  2826. err_virtio:
  2827. cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
  2828. err_dead:
  2829. cpuhp_remove_multi_state(virtionet_online);
  2830. out:
  2831. return ret;
  2832. }
  2833. module_init(virtio_net_driver_init);
  2834. static __exit void virtio_net_driver_exit(void)
  2835. {
  2836. unregister_virtio_driver(&virtio_net_driver);
  2837. cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
  2838. cpuhp_remove_multi_state(virtionet_online);
  2839. }
  2840. module_exit(virtio_net_driver_exit);
  2841. MODULE_DEVICE_TABLE(virtio, id_table);
  2842. MODULE_DESCRIPTION("Virtio network driver");
  2843. MODULE_LICENSE("GPL");