vector_kern.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2017 - 2019 Cambridge Greys Limited
  4. * Copyright (C) 2011 - 2014 Cisco Systems Inc
  5. * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  6. * Copyright (C) 2001 Lennert Buytenhek ([email protected]) and
  7. * James Leu ([email protected]).
  8. * Copyright (C) 2001 by various other people who didn't put their name here.
  9. */
  10. #include <linux/memblock.h>
  11. #include <linux/etherdevice.h>
  12. #include <linux/ethtool.h>
  13. #include <linux/inetdevice.h>
  14. #include <linux/init.h>
  15. #include <linux/list.h>
  16. #include <linux/netdevice.h>
  17. #include <linux/platform_device.h>
  18. #include <linux/rtnetlink.h>
  19. #include <linux/skbuff.h>
  20. #include <linux/slab.h>
  21. #include <linux/interrupt.h>
  22. #include <linux/firmware.h>
  23. #include <linux/fs.h>
  24. #include <uapi/linux/filter.h>
  25. #include <init.h>
  26. #include <irq_kern.h>
  27. #include <irq_user.h>
  28. #include <net_kern.h>
  29. #include <os.h>
  30. #include "mconsole_kern.h"
  31. #include "vector_user.h"
  32. #include "vector_kern.h"
  33. /*
  34. * Adapted from network devices with the following major changes:
  35. * All transports are static - simplifies the code significantly
  36. * Multiple FDs/IRQs per device
  37. * Vector IO optionally used for read/write, falling back to legacy
  38. * based on configuration and/or availability
  39. * Configuration is no longer positional - L2TPv3 and GRE require up to
  40. * 10 parameters, passing this as positional is not fit for purpose.
  41. * Only socket transports are supported
  42. */
  43. #define DRIVER_NAME "uml-vector"
  44. struct vector_cmd_line_arg {
  45. struct list_head list;
  46. int unit;
  47. char *arguments;
  48. };
  49. struct vector_device {
  50. struct list_head list;
  51. struct net_device *dev;
  52. struct platform_device pdev;
  53. int unit;
  54. int opened;
  55. };
  56. static LIST_HEAD(vec_cmd_line);
  57. static DEFINE_SPINLOCK(vector_devices_lock);
  58. static LIST_HEAD(vector_devices);
  59. static int driver_registered;
  60. static void vector_eth_configure(int n, struct arglist *def);
  61. static int vector_mmsg_rx(struct vector_private *vp, int budget);
  62. /* Argument accessors to set variables (and/or set default values)
  63. * mtu, buffer sizing, default headroom, etc
  64. */
  65. #define DEFAULT_HEADROOM 2
  66. #define SAFETY_MARGIN 32
  67. #define DEFAULT_VECTOR_SIZE 64
  68. #define TX_SMALL_PACKET 128
  69. #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
  70. static const struct {
  71. const char string[ETH_GSTRING_LEN];
  72. } ethtool_stats_keys[] = {
  73. { "rx_queue_max" },
  74. { "rx_queue_running_average" },
  75. { "tx_queue_max" },
  76. { "tx_queue_running_average" },
  77. { "rx_encaps_errors" },
  78. { "tx_timeout_count" },
  79. { "tx_restart_queue" },
  80. { "tx_kicks" },
  81. { "tx_flow_control_xon" },
  82. { "tx_flow_control_xoff" },
  83. { "rx_csum_offload_good" },
  84. { "rx_csum_offload_errors"},
  85. { "sg_ok"},
  86. { "sg_linearized"},
  87. };
  88. #define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
  89. static void vector_reset_stats(struct vector_private *vp)
  90. {
  91. vp->estats.rx_queue_max = 0;
  92. vp->estats.rx_queue_running_average = 0;
  93. vp->estats.tx_queue_max = 0;
  94. vp->estats.tx_queue_running_average = 0;
  95. vp->estats.rx_encaps_errors = 0;
  96. vp->estats.tx_timeout_count = 0;
  97. vp->estats.tx_restart_queue = 0;
  98. vp->estats.tx_kicks = 0;
  99. vp->estats.tx_flow_control_xon = 0;
  100. vp->estats.tx_flow_control_xoff = 0;
  101. vp->estats.sg_ok = 0;
  102. vp->estats.sg_linearized = 0;
  103. }
  104. static int get_mtu(struct arglist *def)
  105. {
  106. char *mtu = uml_vector_fetch_arg(def, "mtu");
  107. long result;
  108. if (mtu != NULL) {
  109. if (kstrtoul(mtu, 10, &result) == 0)
  110. if ((result < (1 << 16) - 1) && (result >= 576))
  111. return result;
  112. }
  113. return ETH_MAX_PACKET;
  114. }
  115. static char *get_bpf_file(struct arglist *def)
  116. {
  117. return uml_vector_fetch_arg(def, "bpffile");
  118. }
  119. static bool get_bpf_flash(struct arglist *def)
  120. {
  121. char *allow = uml_vector_fetch_arg(def, "bpfflash");
  122. long result;
  123. if (allow != NULL) {
  124. if (kstrtoul(allow, 10, &result) == 0)
  125. return (allow > 0);
  126. }
  127. return false;
  128. }
  129. static int get_depth(struct arglist *def)
  130. {
  131. char *mtu = uml_vector_fetch_arg(def, "depth");
  132. long result;
  133. if (mtu != NULL) {
  134. if (kstrtoul(mtu, 10, &result) == 0)
  135. return result;
  136. }
  137. return DEFAULT_VECTOR_SIZE;
  138. }
  139. static int get_headroom(struct arglist *def)
  140. {
  141. char *mtu = uml_vector_fetch_arg(def, "headroom");
  142. long result;
  143. if (mtu != NULL) {
  144. if (kstrtoul(mtu, 10, &result) == 0)
  145. return result;
  146. }
  147. return DEFAULT_HEADROOM;
  148. }
  149. static int get_req_size(struct arglist *def)
  150. {
  151. char *gro = uml_vector_fetch_arg(def, "gro");
  152. long result;
  153. if (gro != NULL) {
  154. if (kstrtoul(gro, 10, &result) == 0) {
  155. if (result > 0)
  156. return 65536;
  157. }
  158. }
  159. return get_mtu(def) + ETH_HEADER_OTHER +
  160. get_headroom(def) + SAFETY_MARGIN;
  161. }
  162. static int get_transport_options(struct arglist *def)
  163. {
  164. char *transport = uml_vector_fetch_arg(def, "transport");
  165. char *vector = uml_vector_fetch_arg(def, "vec");
  166. int vec_rx = VECTOR_RX;
  167. int vec_tx = VECTOR_TX;
  168. long parsed;
  169. int result = 0;
  170. if (transport == NULL)
  171. return -EINVAL;
  172. if (vector != NULL) {
  173. if (kstrtoul(vector, 10, &parsed) == 0) {
  174. if (parsed == 0) {
  175. vec_rx = 0;
  176. vec_tx = 0;
  177. }
  178. }
  179. }
  180. if (get_bpf_flash(def))
  181. result = VECTOR_BPF_FLASH;
  182. if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
  183. return result;
  184. if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
  185. return (result | vec_rx | VECTOR_BPF);
  186. if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
  187. return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
  188. return (result | vec_rx | vec_tx);
  189. }
  190. /* A mini-buffer for packet drop read
  191. * All of our supported transports are datagram oriented and we always
  192. * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
  193. * than the packet size it still counts as full packet read and will
  194. * clean the incoming stream to keep sigio/epoll happy
  195. */
  196. #define DROP_BUFFER_SIZE 32
  197. static char *drop_buffer;
  198. /* Array backed queues optimized for bulk enqueue/dequeue and
  199. * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
  200. * For more details and full design rationale see
  201. * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
  202. */
  203. /*
  204. * Advance the mmsg queue head by n = advance. Resets the queue to
  205. * maximum enqueue/dequeue-at-once capacity if possible. Called by
  206. * dequeuers. Caller must hold the head_lock!
  207. */
  208. static int vector_advancehead(struct vector_queue *qi, int advance)
  209. {
  210. int queue_depth;
  211. qi->head =
  212. (qi->head + advance)
  213. % qi->max_depth;
  214. spin_lock(&qi->tail_lock);
  215. qi->queue_depth -= advance;
  216. /* we are at 0, use this to
  217. * reset head and tail so we can use max size vectors
  218. */
  219. if (qi->queue_depth == 0) {
  220. qi->head = 0;
  221. qi->tail = 0;
  222. }
  223. queue_depth = qi->queue_depth;
  224. spin_unlock(&qi->tail_lock);
  225. return queue_depth;
  226. }
  227. /* Advance the queue tail by n = advance.
  228. * This is called by enqueuers which should hold the
  229. * head lock already
  230. */
  231. static int vector_advancetail(struct vector_queue *qi, int advance)
  232. {
  233. int queue_depth;
  234. qi->tail =
  235. (qi->tail + advance)
  236. % qi->max_depth;
  237. spin_lock(&qi->head_lock);
  238. qi->queue_depth += advance;
  239. queue_depth = qi->queue_depth;
  240. spin_unlock(&qi->head_lock);
  241. return queue_depth;
  242. }
  243. static int prep_msg(struct vector_private *vp,
  244. struct sk_buff *skb,
  245. struct iovec *iov)
  246. {
  247. int iov_index = 0;
  248. int nr_frags, frag;
  249. skb_frag_t *skb_frag;
  250. nr_frags = skb_shinfo(skb)->nr_frags;
  251. if (nr_frags > MAX_IOV_SIZE) {
  252. if (skb_linearize(skb) != 0)
  253. goto drop;
  254. }
  255. if (vp->header_size > 0) {
  256. iov[iov_index].iov_len = vp->header_size;
  257. vp->form_header(iov[iov_index].iov_base, skb, vp);
  258. iov_index++;
  259. }
  260. iov[iov_index].iov_base = skb->data;
  261. if (nr_frags > 0) {
  262. iov[iov_index].iov_len = skb->len - skb->data_len;
  263. vp->estats.sg_ok++;
  264. } else
  265. iov[iov_index].iov_len = skb->len;
  266. iov_index++;
  267. for (frag = 0; frag < nr_frags; frag++) {
  268. skb_frag = &skb_shinfo(skb)->frags[frag];
  269. iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
  270. iov[iov_index].iov_len = skb_frag_size(skb_frag);
  271. iov_index++;
  272. }
  273. return iov_index;
  274. drop:
  275. return -1;
  276. }
  277. /*
  278. * Generic vector enqueue with support for forming headers using transport
  279. * specific callback. Allows GRE, L2TPv3, RAW and other transports
  280. * to use a common enqueue procedure in vector mode
  281. */
  282. static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
  283. {
  284. struct vector_private *vp = netdev_priv(qi->dev);
  285. int queue_depth;
  286. int packet_len;
  287. struct mmsghdr *mmsg_vector = qi->mmsg_vector;
  288. int iov_count;
  289. spin_lock(&qi->tail_lock);
  290. spin_lock(&qi->head_lock);
  291. queue_depth = qi->queue_depth;
  292. spin_unlock(&qi->head_lock);
  293. if (skb)
  294. packet_len = skb->len;
  295. if (queue_depth < qi->max_depth) {
  296. *(qi->skbuff_vector + qi->tail) = skb;
  297. mmsg_vector += qi->tail;
  298. iov_count = prep_msg(
  299. vp,
  300. skb,
  301. mmsg_vector->msg_hdr.msg_iov
  302. );
  303. if (iov_count < 1)
  304. goto drop;
  305. mmsg_vector->msg_hdr.msg_iovlen = iov_count;
  306. mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
  307. mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
  308. queue_depth = vector_advancetail(qi, 1);
  309. } else
  310. goto drop;
  311. spin_unlock(&qi->tail_lock);
  312. return queue_depth;
  313. drop:
  314. qi->dev->stats.tx_dropped++;
  315. if (skb != NULL) {
  316. packet_len = skb->len;
  317. dev_consume_skb_any(skb);
  318. netdev_completed_queue(qi->dev, 1, packet_len);
  319. }
  320. spin_unlock(&qi->tail_lock);
  321. return queue_depth;
  322. }
  323. static int consume_vector_skbs(struct vector_queue *qi, int count)
  324. {
  325. struct sk_buff *skb;
  326. int skb_index;
  327. int bytes_compl = 0;
  328. for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
  329. skb = *(qi->skbuff_vector + skb_index);
  330. /* mark as empty to ensure correct destruction if
  331. * needed
  332. */
  333. bytes_compl += skb->len;
  334. *(qi->skbuff_vector + skb_index) = NULL;
  335. dev_consume_skb_any(skb);
  336. }
  337. qi->dev->stats.tx_bytes += bytes_compl;
  338. qi->dev->stats.tx_packets += count;
  339. netdev_completed_queue(qi->dev, count, bytes_compl);
  340. return vector_advancehead(qi, count);
  341. }
  342. /*
  343. * Generic vector deque via sendmmsg with support for forming headers
  344. * using transport specific callback. Allows GRE, L2TPv3, RAW and
  345. * other transports to use a common dequeue procedure in vector mode
  346. */
  347. static int vector_send(struct vector_queue *qi)
  348. {
  349. struct vector_private *vp = netdev_priv(qi->dev);
  350. struct mmsghdr *send_from;
  351. int result = 0, send_len, queue_depth = qi->max_depth;
  352. if (spin_trylock(&qi->head_lock)) {
  353. if (spin_trylock(&qi->tail_lock)) {
  354. /* update queue_depth to current value */
  355. queue_depth = qi->queue_depth;
  356. spin_unlock(&qi->tail_lock);
  357. while (queue_depth > 0) {
  358. /* Calculate the start of the vector */
  359. send_len = queue_depth;
  360. send_from = qi->mmsg_vector;
  361. send_from += qi->head;
  362. /* Adjust vector size if wraparound */
  363. if (send_len + qi->head > qi->max_depth)
  364. send_len = qi->max_depth - qi->head;
  365. /* Try to TX as many packets as possible */
  366. if (send_len > 0) {
  367. result = uml_vector_sendmmsg(
  368. vp->fds->tx_fd,
  369. send_from,
  370. send_len,
  371. 0
  372. );
  373. vp->in_write_poll =
  374. (result != send_len);
  375. }
  376. /* For some of the sendmmsg error scenarios
  377. * we may end being unsure in the TX success
  378. * for all packets. It is safer to declare
  379. * them all TX-ed and blame the network.
  380. */
  381. if (result < 0) {
  382. if (net_ratelimit())
  383. netdev_err(vp->dev, "sendmmsg err=%i\n",
  384. result);
  385. vp->in_error = true;
  386. result = send_len;
  387. }
  388. if (result > 0) {
  389. queue_depth =
  390. consume_vector_skbs(qi, result);
  391. /* This is equivalent to an TX IRQ.
  392. * Restart the upper layers to feed us
  393. * more packets.
  394. */
  395. if (result > vp->estats.tx_queue_max)
  396. vp->estats.tx_queue_max = result;
  397. vp->estats.tx_queue_running_average =
  398. (vp->estats.tx_queue_running_average + result) >> 1;
  399. }
  400. netif_wake_queue(qi->dev);
  401. /* if TX is busy, break out of the send loop,
  402. * poll write IRQ will reschedule xmit for us
  403. */
  404. if (result != send_len) {
  405. vp->estats.tx_restart_queue++;
  406. break;
  407. }
  408. }
  409. }
  410. spin_unlock(&qi->head_lock);
  411. }
  412. return queue_depth;
  413. }
  414. /* Queue destructor. Deliberately stateless so we can use
  415. * it in queue cleanup if initialization fails.
  416. */
  417. static void destroy_queue(struct vector_queue *qi)
  418. {
  419. int i;
  420. struct iovec *iov;
  421. struct vector_private *vp = netdev_priv(qi->dev);
  422. struct mmsghdr *mmsg_vector;
  423. if (qi == NULL)
  424. return;
  425. /* deallocate any skbuffs - we rely on any unused to be
  426. * set to NULL.
  427. */
  428. if (qi->skbuff_vector != NULL) {
  429. for (i = 0; i < qi->max_depth; i++) {
  430. if (*(qi->skbuff_vector + i) != NULL)
  431. dev_kfree_skb_any(*(qi->skbuff_vector + i));
  432. }
  433. kfree(qi->skbuff_vector);
  434. }
  435. /* deallocate matching IOV structures including header buffs */
  436. if (qi->mmsg_vector != NULL) {
  437. mmsg_vector = qi->mmsg_vector;
  438. for (i = 0; i < qi->max_depth; i++) {
  439. iov = mmsg_vector->msg_hdr.msg_iov;
  440. if (iov != NULL) {
  441. if ((vp->header_size > 0) &&
  442. (iov->iov_base != NULL))
  443. kfree(iov->iov_base);
  444. kfree(iov);
  445. }
  446. mmsg_vector++;
  447. }
  448. kfree(qi->mmsg_vector);
  449. }
  450. kfree(qi);
  451. }
  452. /*
  453. * Queue constructor. Create a queue with a given side.
  454. */
  455. static struct vector_queue *create_queue(
  456. struct vector_private *vp,
  457. int max_size,
  458. int header_size,
  459. int num_extra_frags)
  460. {
  461. struct vector_queue *result;
  462. int i;
  463. struct iovec *iov;
  464. struct mmsghdr *mmsg_vector;
  465. result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
  466. if (result == NULL)
  467. return NULL;
  468. result->max_depth = max_size;
  469. result->dev = vp->dev;
  470. result->mmsg_vector = kmalloc(
  471. (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
  472. if (result->mmsg_vector == NULL)
  473. goto out_mmsg_fail;
  474. result->skbuff_vector = kmalloc(
  475. (sizeof(void *) * max_size), GFP_KERNEL);
  476. if (result->skbuff_vector == NULL)
  477. goto out_skb_fail;
  478. /* further failures can be handled safely by destroy_queue*/
  479. mmsg_vector = result->mmsg_vector;
  480. for (i = 0; i < max_size; i++) {
  481. /* Clear all pointers - we use non-NULL as marking on
  482. * what to free on destruction
  483. */
  484. *(result->skbuff_vector + i) = NULL;
  485. mmsg_vector->msg_hdr.msg_iov = NULL;
  486. mmsg_vector++;
  487. }
  488. mmsg_vector = result->mmsg_vector;
  489. result->max_iov_frags = num_extra_frags;
  490. for (i = 0; i < max_size; i++) {
  491. if (vp->header_size > 0)
  492. iov = kmalloc_array(3 + num_extra_frags,
  493. sizeof(struct iovec),
  494. GFP_KERNEL
  495. );
  496. else
  497. iov = kmalloc_array(2 + num_extra_frags,
  498. sizeof(struct iovec),
  499. GFP_KERNEL
  500. );
  501. if (iov == NULL)
  502. goto out_fail;
  503. mmsg_vector->msg_hdr.msg_iov = iov;
  504. mmsg_vector->msg_hdr.msg_iovlen = 1;
  505. mmsg_vector->msg_hdr.msg_control = NULL;
  506. mmsg_vector->msg_hdr.msg_controllen = 0;
  507. mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
  508. mmsg_vector->msg_hdr.msg_name = NULL;
  509. mmsg_vector->msg_hdr.msg_namelen = 0;
  510. if (vp->header_size > 0) {
  511. iov->iov_base = kmalloc(header_size, GFP_KERNEL);
  512. if (iov->iov_base == NULL)
  513. goto out_fail;
  514. iov->iov_len = header_size;
  515. mmsg_vector->msg_hdr.msg_iovlen = 2;
  516. iov++;
  517. }
  518. iov->iov_base = NULL;
  519. iov->iov_len = 0;
  520. mmsg_vector++;
  521. }
  522. spin_lock_init(&result->head_lock);
  523. spin_lock_init(&result->tail_lock);
  524. result->queue_depth = 0;
  525. result->head = 0;
  526. result->tail = 0;
  527. return result;
  528. out_skb_fail:
  529. kfree(result->mmsg_vector);
  530. out_mmsg_fail:
  531. kfree(result);
  532. return NULL;
  533. out_fail:
  534. destroy_queue(result);
  535. return NULL;
  536. }
  537. /*
  538. * We do not use the RX queue as a proper wraparound queue for now
  539. * This is not necessary because the consumption via napi_gro_receive()
  540. * happens in-line. While we can try using the return code of
  541. * netif_rx() for flow control there are no drivers doing this today.
  542. * For this RX specific use we ignore the tail/head locks and
  543. * just read into a prepared queue filled with skbuffs.
  544. */
  545. static struct sk_buff *prep_skb(
  546. struct vector_private *vp,
  547. struct user_msghdr *msg)
  548. {
  549. int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
  550. struct sk_buff *result;
  551. int iov_index = 0, len;
  552. struct iovec *iov = msg->msg_iov;
  553. int err, nr_frags, frag;
  554. skb_frag_t *skb_frag;
  555. if (vp->req_size <= linear)
  556. len = linear;
  557. else
  558. len = vp->req_size;
  559. result = alloc_skb_with_frags(
  560. linear,
  561. len - vp->max_packet,
  562. 3,
  563. &err,
  564. GFP_ATOMIC
  565. );
  566. if (vp->header_size > 0)
  567. iov_index++;
  568. if (result == NULL) {
  569. iov[iov_index].iov_base = NULL;
  570. iov[iov_index].iov_len = 0;
  571. goto done;
  572. }
  573. skb_reserve(result, vp->headroom);
  574. result->dev = vp->dev;
  575. skb_put(result, vp->max_packet);
  576. result->data_len = len - vp->max_packet;
  577. result->len += len - vp->max_packet;
  578. skb_reset_mac_header(result);
  579. result->ip_summed = CHECKSUM_NONE;
  580. iov[iov_index].iov_base = result->data;
  581. iov[iov_index].iov_len = vp->max_packet;
  582. iov_index++;
  583. nr_frags = skb_shinfo(result)->nr_frags;
  584. for (frag = 0; frag < nr_frags; frag++) {
  585. skb_frag = &skb_shinfo(result)->frags[frag];
  586. iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
  587. if (iov[iov_index].iov_base != NULL)
  588. iov[iov_index].iov_len = skb_frag_size(skb_frag);
  589. else
  590. iov[iov_index].iov_len = 0;
  591. iov_index++;
  592. }
  593. done:
  594. msg->msg_iovlen = iov_index;
  595. return result;
  596. }
  597. /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
  598. static void prep_queue_for_rx(struct vector_queue *qi)
  599. {
  600. struct vector_private *vp = netdev_priv(qi->dev);
  601. struct mmsghdr *mmsg_vector = qi->mmsg_vector;
  602. void **skbuff_vector = qi->skbuff_vector;
  603. int i;
  604. if (qi->queue_depth == 0)
  605. return;
  606. for (i = 0; i < qi->queue_depth; i++) {
  607. /* it is OK if allocation fails - recvmmsg with NULL data in
  608. * iov argument still performs an RX, just drops the packet
  609. * This allows us stop faffing around with a "drop buffer"
  610. */
  611. *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
  612. skbuff_vector++;
  613. mmsg_vector++;
  614. }
  615. qi->queue_depth = 0;
  616. }
  617. static struct vector_device *find_device(int n)
  618. {
  619. struct vector_device *device;
  620. struct list_head *ele;
  621. spin_lock(&vector_devices_lock);
  622. list_for_each(ele, &vector_devices) {
  623. device = list_entry(ele, struct vector_device, list);
  624. if (device->unit == n)
  625. goto out;
  626. }
  627. device = NULL;
  628. out:
  629. spin_unlock(&vector_devices_lock);
  630. return device;
  631. }
  632. static int vector_parse(char *str, int *index_out, char **str_out,
  633. char **error_out)
  634. {
  635. int n, len, err;
  636. char *start = str;
  637. len = strlen(str);
  638. while ((*str != ':') && (strlen(str) > 1))
  639. str++;
  640. if (*str != ':') {
  641. *error_out = "Expected ':' after device number";
  642. return -EINVAL;
  643. }
  644. *str = '\0';
  645. err = kstrtouint(start, 0, &n);
  646. if (err < 0) {
  647. *error_out = "Bad device number";
  648. return err;
  649. }
  650. str++;
  651. if (find_device(n)) {
  652. *error_out = "Device already configured";
  653. return -EINVAL;
  654. }
  655. *index_out = n;
  656. *str_out = str;
  657. return 0;
  658. }
  659. static int vector_config(char *str, char **error_out)
  660. {
  661. int err, n;
  662. char *params;
  663. struct arglist *parsed;
  664. err = vector_parse(str, &n, &params, error_out);
  665. if (err != 0)
  666. return err;
  667. /* This string is broken up and the pieces used by the underlying
  668. * driver. We should copy it to make sure things do not go wrong
  669. * later.
  670. */
  671. params = kstrdup(params, GFP_KERNEL);
  672. if (params == NULL) {
  673. *error_out = "vector_config failed to strdup string";
  674. return -ENOMEM;
  675. }
  676. parsed = uml_parse_vector_ifspec(params);
  677. if (parsed == NULL) {
  678. *error_out = "vector_config failed to parse parameters";
  679. kfree(params);
  680. return -EINVAL;
  681. }
  682. vector_eth_configure(n, parsed);
  683. return 0;
  684. }
  685. static int vector_id(char **str, int *start_out, int *end_out)
  686. {
  687. char *end;
  688. int n;
  689. n = simple_strtoul(*str, &end, 0);
  690. if ((*end != '\0') || (end == *str))
  691. return -1;
  692. *start_out = n;
  693. *end_out = n;
  694. *str = end;
  695. return n;
  696. }
  697. static int vector_remove(int n, char **error_out)
  698. {
  699. struct vector_device *vec_d;
  700. struct net_device *dev;
  701. struct vector_private *vp;
  702. vec_d = find_device(n);
  703. if (vec_d == NULL)
  704. return -ENODEV;
  705. dev = vec_d->dev;
  706. vp = netdev_priv(dev);
  707. if (vp->fds != NULL)
  708. return -EBUSY;
  709. unregister_netdev(dev);
  710. platform_device_unregister(&vec_d->pdev);
  711. return 0;
  712. }
  713. /*
  714. * There is no shared per-transport initialization code, so
  715. * we will just initialize each interface one by one and
  716. * add them to a list
  717. */
  718. static struct platform_driver uml_net_driver = {
  719. .driver = {
  720. .name = DRIVER_NAME,
  721. },
  722. };
  723. static void vector_device_release(struct device *dev)
  724. {
  725. struct vector_device *device = dev_get_drvdata(dev);
  726. struct net_device *netdev = device->dev;
  727. list_del(&device->list);
  728. kfree(device);
  729. free_netdev(netdev);
  730. }
  731. /* Bog standard recv using recvmsg - not used normally unless the user
  732. * explicitly specifies not to use recvmmsg vector RX.
  733. */
  734. static int vector_legacy_rx(struct vector_private *vp)
  735. {
  736. int pkt_len;
  737. struct user_msghdr hdr;
  738. struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
  739. int iovpos = 0;
  740. struct sk_buff *skb;
  741. int header_check;
  742. hdr.msg_name = NULL;
  743. hdr.msg_namelen = 0;
  744. hdr.msg_iov = (struct iovec *) &iov;
  745. hdr.msg_control = NULL;
  746. hdr.msg_controllen = 0;
  747. hdr.msg_flags = 0;
  748. if (vp->header_size > 0) {
  749. iov[0].iov_base = vp->header_rxbuffer;
  750. iov[0].iov_len = vp->header_size;
  751. }
  752. skb = prep_skb(vp, &hdr);
  753. if (skb == NULL) {
  754. /* Read a packet into drop_buffer and don't do
  755. * anything with it.
  756. */
  757. iov[iovpos].iov_base = drop_buffer;
  758. iov[iovpos].iov_len = DROP_BUFFER_SIZE;
  759. hdr.msg_iovlen = 1;
  760. vp->dev->stats.rx_dropped++;
  761. }
  762. pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
  763. if (pkt_len < 0) {
  764. vp->in_error = true;
  765. return pkt_len;
  766. }
  767. if (skb != NULL) {
  768. if (pkt_len > vp->header_size) {
  769. if (vp->header_size > 0) {
  770. header_check = vp->verify_header(
  771. vp->header_rxbuffer, skb, vp);
  772. if (header_check < 0) {
  773. dev_kfree_skb_irq(skb);
  774. vp->dev->stats.rx_dropped++;
  775. vp->estats.rx_encaps_errors++;
  776. return 0;
  777. }
  778. if (header_check > 0) {
  779. vp->estats.rx_csum_offload_good++;
  780. skb->ip_summed = CHECKSUM_UNNECESSARY;
  781. }
  782. }
  783. pskb_trim(skb, pkt_len - vp->rx_header_size);
  784. skb->protocol = eth_type_trans(skb, skb->dev);
  785. vp->dev->stats.rx_bytes += skb->len;
  786. vp->dev->stats.rx_packets++;
  787. napi_gro_receive(&vp->napi, skb);
  788. } else {
  789. dev_kfree_skb_irq(skb);
  790. }
  791. }
  792. return pkt_len;
  793. }
  794. /*
  795. * Packet at a time TX which falls back to vector TX if the
  796. * underlying transport is busy.
  797. */
  798. static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
  799. {
  800. struct iovec iov[3 + MAX_IOV_SIZE];
  801. int iov_count, pkt_len = 0;
  802. iov[0].iov_base = vp->header_txbuffer;
  803. iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
  804. if (iov_count < 1)
  805. goto drop;
  806. pkt_len = uml_vector_writev(
  807. vp->fds->tx_fd,
  808. (struct iovec *) &iov,
  809. iov_count
  810. );
  811. if (pkt_len < 0)
  812. goto drop;
  813. netif_trans_update(vp->dev);
  814. netif_wake_queue(vp->dev);
  815. if (pkt_len > 0) {
  816. vp->dev->stats.tx_bytes += skb->len;
  817. vp->dev->stats.tx_packets++;
  818. } else {
  819. vp->dev->stats.tx_dropped++;
  820. }
  821. consume_skb(skb);
  822. return pkt_len;
  823. drop:
  824. vp->dev->stats.tx_dropped++;
  825. consume_skb(skb);
  826. if (pkt_len < 0)
  827. vp->in_error = true;
  828. return pkt_len;
  829. }
  830. /*
  831. * Receive as many messages as we can in one call using the special
  832. * mmsg vector matched to an skb vector which we prepared earlier.
  833. */
  834. static int vector_mmsg_rx(struct vector_private *vp, int budget)
  835. {
  836. int packet_count, i;
  837. struct vector_queue *qi = vp->rx_queue;
  838. struct sk_buff *skb;
  839. struct mmsghdr *mmsg_vector = qi->mmsg_vector;
  840. void **skbuff_vector = qi->skbuff_vector;
  841. int header_check;
  842. /* Refresh the vector and make sure it is with new skbs and the
  843. * iovs are updated to point to them.
  844. */
  845. prep_queue_for_rx(qi);
  846. /* Fire the Lazy Gun - get as many packets as we can in one go. */
  847. if (budget > qi->max_depth)
  848. budget = qi->max_depth;
  849. packet_count = uml_vector_recvmmsg(
  850. vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
  851. if (packet_count < 0)
  852. vp->in_error = true;
  853. if (packet_count <= 0)
  854. return packet_count;
  855. /* We treat packet processing as enqueue, buffer refresh as dequeue
  856. * The queue_depth tells us how many buffers have been used and how
  857. * many do we need to prep the next time prep_queue_for_rx() is called.
  858. */
  859. qi->queue_depth = packet_count;
  860. for (i = 0; i < packet_count; i++) {
  861. skb = (*skbuff_vector);
  862. if (mmsg_vector->msg_len > vp->header_size) {
  863. if (vp->header_size > 0) {
  864. header_check = vp->verify_header(
  865. mmsg_vector->msg_hdr.msg_iov->iov_base,
  866. skb,
  867. vp
  868. );
  869. if (header_check < 0) {
  870. /* Overlay header failed to verify - discard.
  871. * We can actually keep this skb and reuse it,
  872. * but that will make the prep logic too
  873. * complex.
  874. */
  875. dev_kfree_skb_irq(skb);
  876. vp->estats.rx_encaps_errors++;
  877. continue;
  878. }
  879. if (header_check > 0) {
  880. vp->estats.rx_csum_offload_good++;
  881. skb->ip_summed = CHECKSUM_UNNECESSARY;
  882. }
  883. }
  884. pskb_trim(skb,
  885. mmsg_vector->msg_len - vp->rx_header_size);
  886. skb->protocol = eth_type_trans(skb, skb->dev);
  887. /*
  888. * We do not need to lock on updating stats here
  889. * The interrupt loop is non-reentrant.
  890. */
  891. vp->dev->stats.rx_bytes += skb->len;
  892. vp->dev->stats.rx_packets++;
  893. napi_gro_receive(&vp->napi, skb);
  894. } else {
  895. /* Overlay header too short to do anything - discard.
  896. * We can actually keep this skb and reuse it,
  897. * but that will make the prep logic too complex.
  898. */
  899. if (skb != NULL)
  900. dev_kfree_skb_irq(skb);
  901. }
  902. (*skbuff_vector) = NULL;
  903. /* Move to the next buffer element */
  904. mmsg_vector++;
  905. skbuff_vector++;
  906. }
  907. if (packet_count > 0) {
  908. if (vp->estats.rx_queue_max < packet_count)
  909. vp->estats.rx_queue_max = packet_count;
  910. vp->estats.rx_queue_running_average =
  911. (vp->estats.rx_queue_running_average + packet_count) >> 1;
  912. }
  913. return packet_count;
  914. }
  915. static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
  916. {
  917. struct vector_private *vp = netdev_priv(dev);
  918. int queue_depth = 0;
  919. if (vp->in_error) {
  920. deactivate_fd(vp->fds->rx_fd, vp->rx_irq);
  921. if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0))
  922. deactivate_fd(vp->fds->tx_fd, vp->tx_irq);
  923. return NETDEV_TX_BUSY;
  924. }
  925. if ((vp->options & VECTOR_TX) == 0) {
  926. writev_tx(vp, skb);
  927. return NETDEV_TX_OK;
  928. }
  929. /* We do BQL only in the vector path, no point doing it in
  930. * packet at a time mode as there is no device queue
  931. */
  932. netdev_sent_queue(vp->dev, skb->len);
  933. queue_depth = vector_enqueue(vp->tx_queue, skb);
  934. if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) {
  935. mod_timer(&vp->tl, vp->coalesce);
  936. return NETDEV_TX_OK;
  937. } else {
  938. queue_depth = vector_send(vp->tx_queue);
  939. if (queue_depth > 0)
  940. napi_schedule(&vp->napi);
  941. }
  942. return NETDEV_TX_OK;
  943. }
  944. static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
  945. {
  946. struct net_device *dev = dev_id;
  947. struct vector_private *vp = netdev_priv(dev);
  948. if (!netif_running(dev))
  949. return IRQ_NONE;
  950. napi_schedule(&vp->napi);
  951. return IRQ_HANDLED;
  952. }
  953. static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
  954. {
  955. struct net_device *dev = dev_id;
  956. struct vector_private *vp = netdev_priv(dev);
  957. if (!netif_running(dev))
  958. return IRQ_NONE;
  959. /* We need to pay attention to it only if we got
  960. * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
  961. * we ignore it. In the future, it may be worth
  962. * it to improve the IRQ controller a bit to make
  963. * tweaking the IRQ mask less costly
  964. */
  965. napi_schedule(&vp->napi);
  966. return IRQ_HANDLED;
  967. }
  968. static int irq_rr;
  969. static int vector_net_close(struct net_device *dev)
  970. {
  971. struct vector_private *vp = netdev_priv(dev);
  972. unsigned long flags;
  973. netif_stop_queue(dev);
  974. del_timer(&vp->tl);
  975. if (vp->fds == NULL)
  976. return 0;
  977. /* Disable and free all IRQS */
  978. if (vp->rx_irq > 0) {
  979. um_free_irq(vp->rx_irq, dev);
  980. vp->rx_irq = 0;
  981. }
  982. if (vp->tx_irq > 0) {
  983. um_free_irq(vp->tx_irq, dev);
  984. vp->tx_irq = 0;
  985. }
  986. napi_disable(&vp->napi);
  987. netif_napi_del(&vp->napi);
  988. if (vp->fds->rx_fd > 0) {
  989. if (vp->bpf)
  990. uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
  991. os_close_file(vp->fds->rx_fd);
  992. vp->fds->rx_fd = -1;
  993. }
  994. if (vp->fds->tx_fd > 0) {
  995. os_close_file(vp->fds->tx_fd);
  996. vp->fds->tx_fd = -1;
  997. }
  998. if (vp->bpf != NULL)
  999. kfree(vp->bpf->filter);
  1000. kfree(vp->bpf);
  1001. vp->bpf = NULL;
  1002. kfree(vp->fds->remote_addr);
  1003. kfree(vp->transport_data);
  1004. kfree(vp->header_rxbuffer);
  1005. kfree(vp->header_txbuffer);
  1006. if (vp->rx_queue != NULL)
  1007. destroy_queue(vp->rx_queue);
  1008. if (vp->tx_queue != NULL)
  1009. destroy_queue(vp->tx_queue);
  1010. kfree(vp->fds);
  1011. vp->fds = NULL;
  1012. spin_lock_irqsave(&vp->lock, flags);
  1013. vp->opened = false;
  1014. vp->in_error = false;
  1015. spin_unlock_irqrestore(&vp->lock, flags);
  1016. return 0;
  1017. }
  1018. static int vector_poll(struct napi_struct *napi, int budget)
  1019. {
  1020. struct vector_private *vp = container_of(napi, struct vector_private, napi);
  1021. int work_done = 0;
  1022. int err;
  1023. bool tx_enqueued = false;
  1024. if ((vp->options & VECTOR_TX) != 0)
  1025. tx_enqueued = (vector_send(vp->tx_queue) > 0);
  1026. if ((vp->options & VECTOR_RX) > 0)
  1027. err = vector_mmsg_rx(vp, budget);
  1028. else {
  1029. err = vector_legacy_rx(vp);
  1030. if (err > 0)
  1031. err = 1;
  1032. }
  1033. if (err > 0)
  1034. work_done += err;
  1035. if (tx_enqueued || err > 0)
  1036. napi_schedule(napi);
  1037. if (work_done < budget)
  1038. napi_complete_done(napi, work_done);
  1039. return work_done;
  1040. }
  1041. static void vector_reset_tx(struct work_struct *work)
  1042. {
  1043. struct vector_private *vp =
  1044. container_of(work, struct vector_private, reset_tx);
  1045. netdev_reset_queue(vp->dev);
  1046. netif_start_queue(vp->dev);
  1047. netif_wake_queue(vp->dev);
  1048. }
  1049. static int vector_net_open(struct net_device *dev)
  1050. {
  1051. struct vector_private *vp = netdev_priv(dev);
  1052. unsigned long flags;
  1053. int err = -EINVAL;
  1054. struct vector_device *vdevice;
  1055. spin_lock_irqsave(&vp->lock, flags);
  1056. if (vp->opened) {
  1057. spin_unlock_irqrestore(&vp->lock, flags);
  1058. return -ENXIO;
  1059. }
  1060. vp->opened = true;
  1061. spin_unlock_irqrestore(&vp->lock, flags);
  1062. vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
  1063. vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
  1064. if (vp->fds == NULL)
  1065. goto out_close;
  1066. if (build_transport_data(vp) < 0)
  1067. goto out_close;
  1068. if ((vp->options & VECTOR_RX) > 0) {
  1069. vp->rx_queue = create_queue(
  1070. vp,
  1071. get_depth(vp->parsed),
  1072. vp->rx_header_size,
  1073. MAX_IOV_SIZE
  1074. );
  1075. vp->rx_queue->queue_depth = get_depth(vp->parsed);
  1076. } else {
  1077. vp->header_rxbuffer = kmalloc(
  1078. vp->rx_header_size,
  1079. GFP_KERNEL
  1080. );
  1081. if (vp->header_rxbuffer == NULL)
  1082. goto out_close;
  1083. }
  1084. if ((vp->options & VECTOR_TX) > 0) {
  1085. vp->tx_queue = create_queue(
  1086. vp,
  1087. get_depth(vp->parsed),
  1088. vp->header_size,
  1089. MAX_IOV_SIZE
  1090. );
  1091. } else {
  1092. vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
  1093. if (vp->header_txbuffer == NULL)
  1094. goto out_close;
  1095. }
  1096. netif_napi_add_weight(vp->dev, &vp->napi, vector_poll,
  1097. get_depth(vp->parsed));
  1098. napi_enable(&vp->napi);
  1099. /* READ IRQ */
  1100. err = um_request_irq(
  1101. irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
  1102. IRQ_READ, vector_rx_interrupt,
  1103. IRQF_SHARED, dev->name, dev);
  1104. if (err < 0) {
  1105. netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
  1106. err = -ENETUNREACH;
  1107. goto out_close;
  1108. }
  1109. vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
  1110. dev->irq = irq_rr + VECTOR_BASE_IRQ;
  1111. irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
  1112. /* WRITE IRQ - we need it only if we have vector TX */
  1113. if ((vp->options & VECTOR_TX) > 0) {
  1114. err = um_request_irq(
  1115. irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
  1116. IRQ_WRITE, vector_tx_interrupt,
  1117. IRQF_SHARED, dev->name, dev);
  1118. if (err < 0) {
  1119. netdev_err(dev,
  1120. "vector_open: failed to get tx irq(%d)\n", err);
  1121. err = -ENETUNREACH;
  1122. goto out_close;
  1123. }
  1124. vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
  1125. irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
  1126. }
  1127. if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
  1128. if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
  1129. vp->options |= VECTOR_BPF;
  1130. }
  1131. if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
  1132. vp->bpf = uml_vector_default_bpf(dev->dev_addr);
  1133. if (vp->bpf != NULL)
  1134. uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
  1135. netif_start_queue(dev);
  1136. vector_reset_stats(vp);
  1137. /* clear buffer - it can happen that the host side of the interface
  1138. * is full when we get here. In this case, new data is never queued,
  1139. * SIGIOs never arrive, and the net never works.
  1140. */
  1141. napi_schedule(&vp->napi);
  1142. vdevice = find_device(vp->unit);
  1143. vdevice->opened = 1;
  1144. if ((vp->options & VECTOR_TX) != 0)
  1145. add_timer(&vp->tl);
  1146. return 0;
  1147. out_close:
  1148. vector_net_close(dev);
  1149. return err;
  1150. }
  1151. static void vector_net_set_multicast_list(struct net_device *dev)
  1152. {
  1153. /* TODO: - we can do some BPF games here */
  1154. return;
  1155. }
  1156. static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
  1157. {
  1158. struct vector_private *vp = netdev_priv(dev);
  1159. vp->estats.tx_timeout_count++;
  1160. netif_trans_update(dev);
  1161. schedule_work(&vp->reset_tx);
  1162. }
  1163. static netdev_features_t vector_fix_features(struct net_device *dev,
  1164. netdev_features_t features)
  1165. {
  1166. features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
  1167. return features;
  1168. }
  1169. static int vector_set_features(struct net_device *dev,
  1170. netdev_features_t features)
  1171. {
  1172. struct vector_private *vp = netdev_priv(dev);
  1173. /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
  1174. * no way to negotiate it on raw sockets, so we can change
  1175. * only our side.
  1176. */
  1177. if (features & NETIF_F_GRO)
  1178. /* All new frame buffers will be GRO-sized */
  1179. vp->req_size = 65536;
  1180. else
  1181. /* All new frame buffers will be normal sized */
  1182. vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
  1183. return 0;
  1184. }
  1185. #ifdef CONFIG_NET_POLL_CONTROLLER
  1186. static void vector_net_poll_controller(struct net_device *dev)
  1187. {
  1188. disable_irq(dev->irq);
  1189. vector_rx_interrupt(dev->irq, dev);
  1190. enable_irq(dev->irq);
  1191. }
  1192. #endif
  1193. static void vector_net_get_drvinfo(struct net_device *dev,
  1194. struct ethtool_drvinfo *info)
  1195. {
  1196. strscpy(info->driver, DRIVER_NAME, sizeof(info->driver));
  1197. }
  1198. static int vector_net_load_bpf_flash(struct net_device *dev,
  1199. struct ethtool_flash *efl)
  1200. {
  1201. struct vector_private *vp = netdev_priv(dev);
  1202. struct vector_device *vdevice;
  1203. const struct firmware *fw;
  1204. int result = 0;
  1205. if (!(vp->options & VECTOR_BPF_FLASH)) {
  1206. netdev_err(dev, "loading firmware not permitted: %s\n", efl->data);
  1207. return -1;
  1208. }
  1209. spin_lock(&vp->lock);
  1210. if (vp->bpf != NULL) {
  1211. if (vp->opened)
  1212. uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
  1213. kfree(vp->bpf->filter);
  1214. vp->bpf->filter = NULL;
  1215. } else {
  1216. vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
  1217. if (vp->bpf == NULL) {
  1218. netdev_err(dev, "failed to allocate memory for firmware\n");
  1219. goto flash_fail;
  1220. }
  1221. }
  1222. vdevice = find_device(vp->unit);
  1223. if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
  1224. goto flash_fail;
  1225. vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC);
  1226. if (!vp->bpf->filter)
  1227. goto free_buffer;
  1228. vp->bpf->len = fw->size / sizeof(struct sock_filter);
  1229. release_firmware(fw);
  1230. if (vp->opened)
  1231. result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
  1232. spin_unlock(&vp->lock);
  1233. return result;
  1234. free_buffer:
  1235. release_firmware(fw);
  1236. flash_fail:
  1237. spin_unlock(&vp->lock);
  1238. if (vp->bpf != NULL)
  1239. kfree(vp->bpf->filter);
  1240. kfree(vp->bpf);
  1241. vp->bpf = NULL;
  1242. return -1;
  1243. }
  1244. static void vector_get_ringparam(struct net_device *netdev,
  1245. struct ethtool_ringparam *ring,
  1246. struct kernel_ethtool_ringparam *kernel_ring,
  1247. struct netlink_ext_ack *extack)
  1248. {
  1249. struct vector_private *vp = netdev_priv(netdev);
  1250. ring->rx_max_pending = vp->rx_queue->max_depth;
  1251. ring->tx_max_pending = vp->tx_queue->max_depth;
  1252. ring->rx_pending = vp->rx_queue->max_depth;
  1253. ring->tx_pending = vp->tx_queue->max_depth;
  1254. }
  1255. static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
  1256. {
  1257. switch (stringset) {
  1258. case ETH_SS_TEST:
  1259. *buf = '\0';
  1260. break;
  1261. case ETH_SS_STATS:
  1262. memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
  1263. break;
  1264. default:
  1265. WARN_ON(1);
  1266. break;
  1267. }
  1268. }
  1269. static int vector_get_sset_count(struct net_device *dev, int sset)
  1270. {
  1271. switch (sset) {
  1272. case ETH_SS_TEST:
  1273. return 0;
  1274. case ETH_SS_STATS:
  1275. return VECTOR_NUM_STATS;
  1276. default:
  1277. return -EOPNOTSUPP;
  1278. }
  1279. }
  1280. static void vector_get_ethtool_stats(struct net_device *dev,
  1281. struct ethtool_stats *estats,
  1282. u64 *tmp_stats)
  1283. {
  1284. struct vector_private *vp = netdev_priv(dev);
  1285. memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
  1286. }
  1287. static int vector_get_coalesce(struct net_device *netdev,
  1288. struct ethtool_coalesce *ec,
  1289. struct kernel_ethtool_coalesce *kernel_coal,
  1290. struct netlink_ext_ack *extack)
  1291. {
  1292. struct vector_private *vp = netdev_priv(netdev);
  1293. ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
  1294. return 0;
  1295. }
  1296. static int vector_set_coalesce(struct net_device *netdev,
  1297. struct ethtool_coalesce *ec,
  1298. struct kernel_ethtool_coalesce *kernel_coal,
  1299. struct netlink_ext_ack *extack)
  1300. {
  1301. struct vector_private *vp = netdev_priv(netdev);
  1302. vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
  1303. if (vp->coalesce == 0)
  1304. vp->coalesce = 1;
  1305. return 0;
  1306. }
  1307. static const struct ethtool_ops vector_net_ethtool_ops = {
  1308. .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS,
  1309. .get_drvinfo = vector_net_get_drvinfo,
  1310. .get_link = ethtool_op_get_link,
  1311. .get_ts_info = ethtool_op_get_ts_info,
  1312. .get_ringparam = vector_get_ringparam,
  1313. .get_strings = vector_get_strings,
  1314. .get_sset_count = vector_get_sset_count,
  1315. .get_ethtool_stats = vector_get_ethtool_stats,
  1316. .get_coalesce = vector_get_coalesce,
  1317. .set_coalesce = vector_set_coalesce,
  1318. .flash_device = vector_net_load_bpf_flash,
  1319. };
  1320. static const struct net_device_ops vector_netdev_ops = {
  1321. .ndo_open = vector_net_open,
  1322. .ndo_stop = vector_net_close,
  1323. .ndo_start_xmit = vector_net_start_xmit,
  1324. .ndo_set_rx_mode = vector_net_set_multicast_list,
  1325. .ndo_tx_timeout = vector_net_tx_timeout,
  1326. .ndo_set_mac_address = eth_mac_addr,
  1327. .ndo_validate_addr = eth_validate_addr,
  1328. .ndo_fix_features = vector_fix_features,
  1329. .ndo_set_features = vector_set_features,
  1330. #ifdef CONFIG_NET_POLL_CONTROLLER
  1331. .ndo_poll_controller = vector_net_poll_controller,
  1332. #endif
  1333. };
  1334. static void vector_timer_expire(struct timer_list *t)
  1335. {
  1336. struct vector_private *vp = from_timer(vp, t, tl);
  1337. vp->estats.tx_kicks++;
  1338. napi_schedule(&vp->napi);
  1339. }
  1340. static void vector_eth_configure(
  1341. int n,
  1342. struct arglist *def
  1343. )
  1344. {
  1345. struct vector_device *device;
  1346. struct net_device *dev;
  1347. struct vector_private *vp;
  1348. int err;
  1349. device = kzalloc(sizeof(*device), GFP_KERNEL);
  1350. if (device == NULL) {
  1351. printk(KERN_ERR "eth_configure failed to allocate struct "
  1352. "vector_device\n");
  1353. return;
  1354. }
  1355. dev = alloc_etherdev(sizeof(struct vector_private));
  1356. if (dev == NULL) {
  1357. printk(KERN_ERR "eth_configure: failed to allocate struct "
  1358. "net_device for vec%d\n", n);
  1359. goto out_free_device;
  1360. }
  1361. dev->mtu = get_mtu(def);
  1362. INIT_LIST_HEAD(&device->list);
  1363. device->unit = n;
  1364. /* If this name ends up conflicting with an existing registered
  1365. * netdevice, that is OK, register_netdev{,ice}() will notice this
  1366. * and fail.
  1367. */
  1368. snprintf(dev->name, sizeof(dev->name), "vec%d", n);
  1369. uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
  1370. vp = netdev_priv(dev);
  1371. /* sysfs register */
  1372. if (!driver_registered) {
  1373. platform_driver_register(&uml_net_driver);
  1374. driver_registered = 1;
  1375. }
  1376. device->pdev.id = n;
  1377. device->pdev.name = DRIVER_NAME;
  1378. device->pdev.dev.release = vector_device_release;
  1379. dev_set_drvdata(&device->pdev.dev, device);
  1380. if (platform_device_register(&device->pdev))
  1381. goto out_free_netdev;
  1382. SET_NETDEV_DEV(dev, &device->pdev.dev);
  1383. device->dev = dev;
  1384. *vp = ((struct vector_private)
  1385. {
  1386. .list = LIST_HEAD_INIT(vp->list),
  1387. .dev = dev,
  1388. .unit = n,
  1389. .options = get_transport_options(def),
  1390. .rx_irq = 0,
  1391. .tx_irq = 0,
  1392. .parsed = def,
  1393. .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
  1394. /* TODO - we need to calculate headroom so that ip header
  1395. * is 16 byte aligned all the time
  1396. */
  1397. .headroom = get_headroom(def),
  1398. .form_header = NULL,
  1399. .verify_header = NULL,
  1400. .header_rxbuffer = NULL,
  1401. .header_txbuffer = NULL,
  1402. .header_size = 0,
  1403. .rx_header_size = 0,
  1404. .rexmit_scheduled = false,
  1405. .opened = false,
  1406. .transport_data = NULL,
  1407. .in_write_poll = false,
  1408. .coalesce = 2,
  1409. .req_size = get_req_size(def),
  1410. .in_error = false,
  1411. .bpf = NULL
  1412. });
  1413. dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
  1414. INIT_WORK(&vp->reset_tx, vector_reset_tx);
  1415. timer_setup(&vp->tl, vector_timer_expire, 0);
  1416. spin_lock_init(&vp->lock);
  1417. /* FIXME */
  1418. dev->netdev_ops = &vector_netdev_ops;
  1419. dev->ethtool_ops = &vector_net_ethtool_ops;
  1420. dev->watchdog_timeo = (HZ >> 1);
  1421. /* primary IRQ - fixme */
  1422. dev->irq = 0; /* we will adjust this once opened */
  1423. rtnl_lock();
  1424. err = register_netdevice(dev);
  1425. rtnl_unlock();
  1426. if (err)
  1427. goto out_undo_user_init;
  1428. spin_lock(&vector_devices_lock);
  1429. list_add(&device->list, &vector_devices);
  1430. spin_unlock(&vector_devices_lock);
  1431. return;
  1432. out_undo_user_init:
  1433. return;
  1434. out_free_netdev:
  1435. free_netdev(dev);
  1436. out_free_device:
  1437. kfree(device);
  1438. }
  1439. /*
  1440. * Invoked late in the init
  1441. */
  1442. static int __init vector_init(void)
  1443. {
  1444. struct list_head *ele;
  1445. struct vector_cmd_line_arg *def;
  1446. struct arglist *parsed;
  1447. list_for_each(ele, &vec_cmd_line) {
  1448. def = list_entry(ele, struct vector_cmd_line_arg, list);
  1449. parsed = uml_parse_vector_ifspec(def->arguments);
  1450. if (parsed != NULL)
  1451. vector_eth_configure(def->unit, parsed);
  1452. }
  1453. return 0;
  1454. }
  1455. /* Invoked at initial argument parsing, only stores
  1456. * arguments until a proper vector_init is called
  1457. * later
  1458. */
  1459. static int __init vector_setup(char *str)
  1460. {
  1461. char *error;
  1462. int n, err;
  1463. struct vector_cmd_line_arg *new;
  1464. err = vector_parse(str, &n, &str, &error);
  1465. if (err) {
  1466. printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
  1467. str, error);
  1468. return 1;
  1469. }
  1470. new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
  1471. if (!new)
  1472. panic("%s: Failed to allocate %zu bytes\n", __func__,
  1473. sizeof(*new));
  1474. INIT_LIST_HEAD(&new->list);
  1475. new->unit = n;
  1476. new->arguments = str;
  1477. list_add_tail(&new->list, &vec_cmd_line);
  1478. return 1;
  1479. }
  1480. __setup("vec", vector_setup);
  1481. __uml_help(vector_setup,
  1482. "vec[0-9]+:<option>=<value>,<option>=<value>\n"
  1483. " Configure a vector io network device.\n\n"
  1484. );
  1485. late_initcall(vector_init);
  1486. static struct mc_device vector_mc = {
  1487. .list = LIST_HEAD_INIT(vector_mc.list),
  1488. .name = "vec",
  1489. .config = vector_config,
  1490. .get_config = NULL,
  1491. .id = vector_id,
  1492. .remove = vector_remove,
  1493. };
  1494. #ifdef CONFIG_INET
  1495. static int vector_inetaddr_event(
  1496. struct notifier_block *this,
  1497. unsigned long event,
  1498. void *ptr)
  1499. {
  1500. return NOTIFY_DONE;
  1501. }
  1502. static struct notifier_block vector_inetaddr_notifier = {
  1503. .notifier_call = vector_inetaddr_event,
  1504. };
  1505. static void inet_register(void)
  1506. {
  1507. register_inetaddr_notifier(&vector_inetaddr_notifier);
  1508. }
  1509. #else
  1510. static inline void inet_register(void)
  1511. {
  1512. }
  1513. #endif
  1514. static int vector_net_init(void)
  1515. {
  1516. mconsole_register_dev(&vector_mc);
  1517. inet_register();
  1518. return 0;
  1519. }
  1520. __initcall(vector_net_init);