netvsc.c 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <[email protected]>
  7. * Hank Janssen <[email protected]>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/kernel.h>
  11. #include <linux/sched.h>
  12. #include <linux/wait.h>
  13. #include <linux/mm.h>
  14. #include <linux/delay.h>
  15. #include <linux/io.h>
  16. #include <linux/slab.h>
  17. #include <linux/netdevice.h>
  18. #include <linux/if_ether.h>
  19. #include <linux/vmalloc.h>
  20. #include <linux/rtnetlink.h>
  21. #include <linux/prefetch.h>
  22. #include <linux/filter.h>
  23. #include <asm/sync_bitops.h>
  24. #include <asm/mshyperv.h>
  25. #include "hyperv_net.h"
  26. #include "netvsc_trace.h"
  27. /*
  28. * Switch the data path from the synthetic interface to the VF
  29. * interface.
  30. */
  31. int netvsc_switch_datapath(struct net_device *ndev, bool vf)
  32. {
  33. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  34. struct hv_device *dev = net_device_ctx->device_ctx;
  35. struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  36. struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  37. int ret, retry = 0;
  38. /* Block sending traffic to VF if it's about to be gone */
  39. if (!vf)
  40. net_device_ctx->data_path_is_vf = vf;
  41. memset(init_pkt, 0, sizeof(struct nvsp_message));
  42. init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
  43. if (vf)
  44. init_pkt->msg.v4_msg.active_dp.active_datapath =
  45. NVSP_DATAPATH_VF;
  46. else
  47. init_pkt->msg.v4_msg.active_dp.active_datapath =
  48. NVSP_DATAPATH_SYNTHETIC;
  49. again:
  50. trace_nvsp_send(ndev, init_pkt);
  51. ret = vmbus_sendpacket(dev->channel, init_pkt,
  52. sizeof(struct nvsp_message),
  53. (unsigned long)init_pkt, VM_PKT_DATA_INBAND,
  54. VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  55. /* If failed to switch to/from VF, let data_path_is_vf stay false,
  56. * so we use synthetic path to send data.
  57. */
  58. if (ret) {
  59. if (ret != -EAGAIN) {
  60. netdev_err(ndev,
  61. "Unable to send sw datapath msg, err: %d\n",
  62. ret);
  63. return ret;
  64. }
  65. if (retry++ < RETRY_MAX) {
  66. usleep_range(RETRY_US_LO, RETRY_US_HI);
  67. goto again;
  68. } else {
  69. netdev_err(
  70. ndev,
  71. "Retry failed to send sw datapath msg, err: %d\n",
  72. ret);
  73. return ret;
  74. }
  75. }
  76. wait_for_completion(&nv_dev->channel_init_wait);
  77. net_device_ctx->data_path_is_vf = vf;
  78. return 0;
  79. }
  80. /* Worker to setup sub channels on initial setup
  81. * Initial hotplug event occurs in softirq context
  82. * and can't wait for channels.
  83. */
  84. static void netvsc_subchan_work(struct work_struct *w)
  85. {
  86. struct netvsc_device *nvdev =
  87. container_of(w, struct netvsc_device, subchan_work);
  88. struct rndis_device *rdev;
  89. int i, ret;
  90. /* Avoid deadlock with device removal already under RTNL */
  91. if (!rtnl_trylock()) {
  92. schedule_work(w);
  93. return;
  94. }
  95. rdev = nvdev->extension;
  96. if (rdev) {
  97. ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL);
  98. if (ret == 0) {
  99. netif_device_attach(rdev->ndev);
  100. } else {
  101. /* fallback to only primary channel */
  102. for (i = 1; i < nvdev->num_chn; i++)
  103. netif_napi_del(&nvdev->chan_table[i].napi);
  104. nvdev->max_chn = 1;
  105. nvdev->num_chn = 1;
  106. }
  107. }
  108. rtnl_unlock();
  109. }
  110. static struct netvsc_device *alloc_net_device(void)
  111. {
  112. struct netvsc_device *net_device;
  113. net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
  114. if (!net_device)
  115. return NULL;
  116. init_waitqueue_head(&net_device->wait_drain);
  117. net_device->destroy = false;
  118. net_device->tx_disable = true;
  119. net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
  120. net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  121. init_completion(&net_device->channel_init_wait);
  122. init_waitqueue_head(&net_device->subchan_open);
  123. INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
  124. return net_device;
  125. }
  126. static void free_netvsc_device(struct rcu_head *head)
  127. {
  128. struct netvsc_device *nvdev
  129. = container_of(head, struct netvsc_device, rcu);
  130. int i;
  131. kfree(nvdev->extension);
  132. if (nvdev->recv_original_buf)
  133. vfree(nvdev->recv_original_buf);
  134. else
  135. vfree(nvdev->recv_buf);
  136. if (nvdev->send_original_buf)
  137. vfree(nvdev->send_original_buf);
  138. else
  139. vfree(nvdev->send_buf);
  140. bitmap_free(nvdev->send_section_map);
  141. for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
  142. xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
  143. kfree(nvdev->chan_table[i].recv_buf);
  144. vfree(nvdev->chan_table[i].mrc.slots);
  145. }
  146. kfree(nvdev);
  147. }
  148. static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
  149. {
  150. call_rcu(&nvdev->rcu, free_netvsc_device);
  151. }
  152. static void netvsc_revoke_recv_buf(struct hv_device *device,
  153. struct netvsc_device *net_device,
  154. struct net_device *ndev)
  155. {
  156. struct nvsp_message *revoke_packet;
  157. int ret;
  158. /*
  159. * If we got a section count, it means we received a
  160. * SendReceiveBufferComplete msg (ie sent
  161. * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
  162. * to send a revoke msg here
  163. */
  164. if (net_device->recv_section_cnt) {
  165. /* Send the revoke receive buffer */
  166. revoke_packet = &net_device->revoke_packet;
  167. memset(revoke_packet, 0, sizeof(struct nvsp_message));
  168. revoke_packet->hdr.msg_type =
  169. NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
  170. revoke_packet->msg.v1_msg.
  171. revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
  172. trace_nvsp_send(ndev, revoke_packet);
  173. ret = vmbus_sendpacket(device->channel,
  174. revoke_packet,
  175. sizeof(struct nvsp_message),
  176. VMBUS_RQST_ID_NO_RESPONSE,
  177. VM_PKT_DATA_INBAND, 0);
  178. /* If the failure is because the channel is rescinded;
  179. * ignore the failure since we cannot send on a rescinded
  180. * channel. This would allow us to properly cleanup
  181. * even when the channel is rescinded.
  182. */
  183. if (device->channel->rescind)
  184. ret = 0;
  185. /*
  186. * If we failed here, we might as well return and
  187. * have a leak rather than continue and a bugchk
  188. */
  189. if (ret != 0) {
  190. netdev_err(ndev, "unable to send "
  191. "revoke receive buffer to netvsp\n");
  192. return;
  193. }
  194. net_device->recv_section_cnt = 0;
  195. }
  196. }
  197. static void netvsc_revoke_send_buf(struct hv_device *device,
  198. struct netvsc_device *net_device,
  199. struct net_device *ndev)
  200. {
  201. struct nvsp_message *revoke_packet;
  202. int ret;
  203. /* Deal with the send buffer we may have setup.
  204. * If we got a send section size, it means we received a
  205. * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
  206. * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
  207. * to send a revoke msg here
  208. */
  209. if (net_device->send_section_cnt) {
  210. /* Send the revoke receive buffer */
  211. revoke_packet = &net_device->revoke_packet;
  212. memset(revoke_packet, 0, sizeof(struct nvsp_message));
  213. revoke_packet->hdr.msg_type =
  214. NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
  215. revoke_packet->msg.v1_msg.revoke_send_buf.id =
  216. NETVSC_SEND_BUFFER_ID;
  217. trace_nvsp_send(ndev, revoke_packet);
  218. ret = vmbus_sendpacket(device->channel,
  219. revoke_packet,
  220. sizeof(struct nvsp_message),
  221. VMBUS_RQST_ID_NO_RESPONSE,
  222. VM_PKT_DATA_INBAND, 0);
  223. /* If the failure is because the channel is rescinded;
  224. * ignore the failure since we cannot send on a rescinded
  225. * channel. This would allow us to properly cleanup
  226. * even when the channel is rescinded.
  227. */
  228. if (device->channel->rescind)
  229. ret = 0;
  230. /* If we failed here, we might as well return and
  231. * have a leak rather than continue and a bugchk
  232. */
  233. if (ret != 0) {
  234. netdev_err(ndev, "unable to send "
  235. "revoke send buffer to netvsp\n");
  236. return;
  237. }
  238. net_device->send_section_cnt = 0;
  239. }
  240. }
  241. static void netvsc_teardown_recv_gpadl(struct hv_device *device,
  242. struct netvsc_device *net_device,
  243. struct net_device *ndev)
  244. {
  245. int ret;
  246. if (net_device->recv_buf_gpadl_handle.gpadl_handle) {
  247. ret = vmbus_teardown_gpadl(device->channel,
  248. &net_device->recv_buf_gpadl_handle);
  249. /* If we failed here, we might as well return and have a leak
  250. * rather than continue and a bugchk
  251. */
  252. if (ret != 0) {
  253. netdev_err(ndev,
  254. "unable to teardown receive buffer's gpadl\n");
  255. return;
  256. }
  257. }
  258. }
  259. static void netvsc_teardown_send_gpadl(struct hv_device *device,
  260. struct netvsc_device *net_device,
  261. struct net_device *ndev)
  262. {
  263. int ret;
  264. if (net_device->send_buf_gpadl_handle.gpadl_handle) {
  265. ret = vmbus_teardown_gpadl(device->channel,
  266. &net_device->send_buf_gpadl_handle);
  267. /* If we failed here, we might as well return and have a leak
  268. * rather than continue and a bugchk
  269. */
  270. if (ret != 0) {
  271. netdev_err(ndev,
  272. "unable to teardown send buffer's gpadl\n");
  273. return;
  274. }
  275. }
  276. }
  277. int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
  278. {
  279. struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
  280. int node = cpu_to_node(nvchan->channel->target_cpu);
  281. size_t size;
  282. size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
  283. nvchan->mrc.slots = vzalloc_node(size, node);
  284. if (!nvchan->mrc.slots)
  285. nvchan->mrc.slots = vzalloc(size);
  286. return nvchan->mrc.slots ? 0 : -ENOMEM;
  287. }
  288. static int netvsc_init_buf(struct hv_device *device,
  289. struct netvsc_device *net_device,
  290. const struct netvsc_device_info *device_info)
  291. {
  292. struct nvsp_1_message_send_receive_buffer_complete *resp;
  293. struct net_device *ndev = hv_get_drvdata(device);
  294. struct nvsp_message *init_packet;
  295. unsigned int buf_size;
  296. int i, ret = 0;
  297. void *vaddr;
  298. /* Get receive buffer area. */
  299. buf_size = device_info->recv_sections * device_info->recv_section_size;
  300. buf_size = roundup(buf_size, PAGE_SIZE);
  301. /* Legacy hosts only allow smaller receive buffer */
  302. if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
  303. buf_size = min_t(unsigned int, buf_size,
  304. NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
  305. net_device->recv_buf = vzalloc(buf_size);
  306. if (!net_device->recv_buf) {
  307. netdev_err(ndev,
  308. "unable to allocate receive buffer of size %u\n",
  309. buf_size);
  310. ret = -ENOMEM;
  311. goto cleanup;
  312. }
  313. net_device->recv_buf_size = buf_size;
  314. /*
  315. * Establish the gpadl handle for this buffer on this
  316. * channel. Note: This call uses the vmbus connection rather
  317. * than the channel to establish the gpadl handle.
  318. */
  319. ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
  320. buf_size,
  321. &net_device->recv_buf_gpadl_handle);
  322. if (ret != 0) {
  323. netdev_err(ndev,
  324. "unable to establish receive buffer's gpadl\n");
  325. goto cleanup;
  326. }
  327. if (hv_isolation_type_snp()) {
  328. vaddr = hv_map_memory(net_device->recv_buf, buf_size);
  329. if (!vaddr) {
  330. ret = -ENOMEM;
  331. goto cleanup;
  332. }
  333. net_device->recv_original_buf = net_device->recv_buf;
  334. net_device->recv_buf = vaddr;
  335. }
  336. /* Notify the NetVsp of the gpadl handle */
  337. init_packet = &net_device->channel_init_pkt;
  338. memset(init_packet, 0, sizeof(struct nvsp_message));
  339. init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
  340. init_packet->msg.v1_msg.send_recv_buf.
  341. gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle;
  342. init_packet->msg.v1_msg.
  343. send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
  344. trace_nvsp_send(ndev, init_packet);
  345. /* Send the gpadl notification request */
  346. ret = vmbus_sendpacket(device->channel, init_packet,
  347. sizeof(struct nvsp_message),
  348. (unsigned long)init_packet,
  349. VM_PKT_DATA_INBAND,
  350. VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  351. if (ret != 0) {
  352. netdev_err(ndev,
  353. "unable to send receive buffer's gpadl to netvsp\n");
  354. goto cleanup;
  355. }
  356. wait_for_completion(&net_device->channel_init_wait);
  357. /* Check the response */
  358. resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
  359. if (resp->status != NVSP_STAT_SUCCESS) {
  360. netdev_err(ndev,
  361. "Unable to complete receive buffer initialization with NetVsp - status %d\n",
  362. resp->status);
  363. ret = -EINVAL;
  364. goto cleanup;
  365. }
  366. /* Parse the response */
  367. netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
  368. resp->num_sections, resp->sections[0].sub_alloc_size,
  369. resp->sections[0].num_sub_allocs);
  370. /* There should only be one section for the entire receive buffer */
  371. if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
  372. ret = -EINVAL;
  373. goto cleanup;
  374. }
  375. net_device->recv_section_size = resp->sections[0].sub_alloc_size;
  376. net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
  377. /* Ensure buffer will not overflow */
  378. if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
  379. (u64)net_device->recv_section_cnt > (u64)buf_size) {
  380. netdev_err(ndev, "invalid recv_section_size %u\n",
  381. net_device->recv_section_size);
  382. ret = -EINVAL;
  383. goto cleanup;
  384. }
  385. for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
  386. struct netvsc_channel *nvchan = &net_device->chan_table[i];
  387. nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
  388. if (nvchan->recv_buf == NULL) {
  389. ret = -ENOMEM;
  390. goto cleanup;
  391. }
  392. }
  393. /* Setup receive completion ring.
  394. * Add 1 to the recv_section_cnt because at least one entry in a
  395. * ring buffer has to be empty.
  396. */
  397. net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
  398. ret = netvsc_alloc_recv_comp_ring(net_device, 0);
  399. if (ret)
  400. goto cleanup;
  401. /* Now setup the send buffer. */
  402. buf_size = device_info->send_sections * device_info->send_section_size;
  403. buf_size = round_up(buf_size, PAGE_SIZE);
  404. net_device->send_buf = vzalloc(buf_size);
  405. if (!net_device->send_buf) {
  406. netdev_err(ndev, "unable to allocate send buffer of size %u\n",
  407. buf_size);
  408. ret = -ENOMEM;
  409. goto cleanup;
  410. }
  411. net_device->send_buf_size = buf_size;
  412. /* Establish the gpadl handle for this buffer on this
  413. * channel. Note: This call uses the vmbus connection rather
  414. * than the channel to establish the gpadl handle.
  415. */
  416. ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
  417. buf_size,
  418. &net_device->send_buf_gpadl_handle);
  419. if (ret != 0) {
  420. netdev_err(ndev,
  421. "unable to establish send buffer's gpadl\n");
  422. goto cleanup;
  423. }
  424. if (hv_isolation_type_snp()) {
  425. vaddr = hv_map_memory(net_device->send_buf, buf_size);
  426. if (!vaddr) {
  427. ret = -ENOMEM;
  428. goto cleanup;
  429. }
  430. net_device->send_original_buf = net_device->send_buf;
  431. net_device->send_buf = vaddr;
  432. }
  433. /* Notify the NetVsp of the gpadl handle */
  434. init_packet = &net_device->channel_init_pkt;
  435. memset(init_packet, 0, sizeof(struct nvsp_message));
  436. init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
  437. init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
  438. net_device->send_buf_gpadl_handle.gpadl_handle;
  439. init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
  440. trace_nvsp_send(ndev, init_packet);
  441. /* Send the gpadl notification request */
  442. ret = vmbus_sendpacket(device->channel, init_packet,
  443. sizeof(struct nvsp_message),
  444. (unsigned long)init_packet,
  445. VM_PKT_DATA_INBAND,
  446. VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  447. if (ret != 0) {
  448. netdev_err(ndev,
  449. "unable to send send buffer's gpadl to netvsp\n");
  450. goto cleanup;
  451. }
  452. wait_for_completion(&net_device->channel_init_wait);
  453. /* Check the response */
  454. if (init_packet->msg.v1_msg.
  455. send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
  456. netdev_err(ndev, "Unable to complete send buffer "
  457. "initialization with NetVsp - status %d\n",
  458. init_packet->msg.v1_msg.
  459. send_send_buf_complete.status);
  460. ret = -EINVAL;
  461. goto cleanup;
  462. }
  463. /* Parse the response */
  464. net_device->send_section_size = init_packet->msg.
  465. v1_msg.send_send_buf_complete.section_size;
  466. if (net_device->send_section_size < NETVSC_MTU_MIN) {
  467. netdev_err(ndev, "invalid send_section_size %u\n",
  468. net_device->send_section_size);
  469. ret = -EINVAL;
  470. goto cleanup;
  471. }
  472. /* Section count is simply the size divided by the section size. */
  473. net_device->send_section_cnt = buf_size / net_device->send_section_size;
  474. netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
  475. net_device->send_section_size, net_device->send_section_cnt);
  476. /* Setup state for managing the send buffer. */
  477. net_device->send_section_map = bitmap_zalloc(net_device->send_section_cnt,
  478. GFP_KERNEL);
  479. if (!net_device->send_section_map) {
  480. ret = -ENOMEM;
  481. goto cleanup;
  482. }
  483. goto exit;
  484. cleanup:
  485. netvsc_revoke_recv_buf(device, net_device, ndev);
  486. netvsc_revoke_send_buf(device, net_device, ndev);
  487. netvsc_teardown_recv_gpadl(device, net_device, ndev);
  488. netvsc_teardown_send_gpadl(device, net_device, ndev);
  489. exit:
  490. return ret;
  491. }
  492. /* Negotiate NVSP protocol version */
  493. static int negotiate_nvsp_ver(struct hv_device *device,
  494. struct netvsc_device *net_device,
  495. struct nvsp_message *init_packet,
  496. u32 nvsp_ver)
  497. {
  498. struct net_device *ndev = hv_get_drvdata(device);
  499. int ret;
  500. memset(init_packet, 0, sizeof(struct nvsp_message));
  501. init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
  502. init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
  503. init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
  504. trace_nvsp_send(ndev, init_packet);
  505. /* Send the init request */
  506. ret = vmbus_sendpacket(device->channel, init_packet,
  507. sizeof(struct nvsp_message),
  508. (unsigned long)init_packet,
  509. VM_PKT_DATA_INBAND,
  510. VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  511. if (ret != 0)
  512. return ret;
  513. wait_for_completion(&net_device->channel_init_wait);
  514. if (init_packet->msg.init_msg.init_complete.status !=
  515. NVSP_STAT_SUCCESS)
  516. return -EINVAL;
  517. if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
  518. return 0;
  519. /* NVSPv2 or later: Send NDIS config */
  520. memset(init_packet, 0, sizeof(struct nvsp_message));
  521. init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
  522. init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
  523. init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
  524. if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
  525. if (hv_is_isolation_supported())
  526. netdev_info(ndev, "SR-IOV not advertised by guests on the host supporting isolation\n");
  527. else
  528. init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
  529. /* Teaming bit is needed to receive link speed updates */
  530. init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
  531. }
  532. if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
  533. init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
  534. trace_nvsp_send(ndev, init_packet);
  535. ret = vmbus_sendpacket(device->channel, init_packet,
  536. sizeof(struct nvsp_message),
  537. VMBUS_RQST_ID_NO_RESPONSE,
  538. VM_PKT_DATA_INBAND, 0);
  539. return ret;
  540. }
  541. static int netvsc_connect_vsp(struct hv_device *device,
  542. struct netvsc_device *net_device,
  543. const struct netvsc_device_info *device_info)
  544. {
  545. struct net_device *ndev = hv_get_drvdata(device);
  546. static const u32 ver_list[] = {
  547. NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
  548. NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
  549. NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
  550. };
  551. struct nvsp_message *init_packet;
  552. int ndis_version, i, ret;
  553. init_packet = &net_device->channel_init_pkt;
  554. /* Negotiate the latest NVSP protocol supported */
  555. for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
  556. if (negotiate_nvsp_ver(device, net_device, init_packet,
  557. ver_list[i]) == 0) {
  558. net_device->nvsp_version = ver_list[i];
  559. break;
  560. }
  561. if (i < 0) {
  562. ret = -EPROTO;
  563. goto cleanup;
  564. }
  565. if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) {
  566. netdev_err(ndev, "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n",
  567. net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61);
  568. ret = -EPROTO;
  569. goto cleanup;
  570. }
  571. pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
  572. /* Send the ndis version */
  573. memset(init_packet, 0, sizeof(struct nvsp_message));
  574. if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
  575. ndis_version = 0x00060001;
  576. else
  577. ndis_version = 0x0006001e;
  578. init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
  579. init_packet->msg.v1_msg.
  580. send_ndis_ver.ndis_major_ver =
  581. (ndis_version & 0xFFFF0000) >> 16;
  582. init_packet->msg.v1_msg.
  583. send_ndis_ver.ndis_minor_ver =
  584. ndis_version & 0xFFFF;
  585. trace_nvsp_send(ndev, init_packet);
  586. /* Send the init request */
  587. ret = vmbus_sendpacket(device->channel, init_packet,
  588. sizeof(struct nvsp_message),
  589. VMBUS_RQST_ID_NO_RESPONSE,
  590. VM_PKT_DATA_INBAND, 0);
  591. if (ret != 0)
  592. goto cleanup;
  593. ret = netvsc_init_buf(device, net_device, device_info);
  594. cleanup:
  595. return ret;
  596. }
  597. /*
  598. * netvsc_device_remove - Callback when the root bus device is removed
  599. */
  600. void netvsc_device_remove(struct hv_device *device)
  601. {
  602. struct net_device *ndev = hv_get_drvdata(device);
  603. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  604. struct netvsc_device *net_device
  605. = rtnl_dereference(net_device_ctx->nvdev);
  606. int i;
  607. /*
  608. * Revoke receive buffer. If host is pre-Win2016 then tear down
  609. * receive buffer GPADL. Do the same for send buffer.
  610. */
  611. netvsc_revoke_recv_buf(device, net_device, ndev);
  612. if (vmbus_proto_version < VERSION_WIN10)
  613. netvsc_teardown_recv_gpadl(device, net_device, ndev);
  614. netvsc_revoke_send_buf(device, net_device, ndev);
  615. if (vmbus_proto_version < VERSION_WIN10)
  616. netvsc_teardown_send_gpadl(device, net_device, ndev);
  617. RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
  618. /* Disable NAPI and disassociate its context from the device. */
  619. for (i = 0; i < net_device->num_chn; i++) {
  620. /* See also vmbus_reset_channel_cb(). */
  621. napi_disable(&net_device->chan_table[i].napi);
  622. netif_napi_del(&net_device->chan_table[i].napi);
  623. }
  624. /*
  625. * At this point, no one should be accessing net_device
  626. * except in here
  627. */
  628. netdev_dbg(ndev, "net device safe to remove\n");
  629. /* Now, we can close the channel safely */
  630. vmbus_close(device->channel);
  631. /*
  632. * If host is Win2016 or higher then we do the GPADL tear down
  633. * here after VMBus is closed.
  634. */
  635. if (vmbus_proto_version >= VERSION_WIN10) {
  636. netvsc_teardown_recv_gpadl(device, net_device, ndev);
  637. netvsc_teardown_send_gpadl(device, net_device, ndev);
  638. }
  639. if (net_device->recv_original_buf)
  640. hv_unmap_memory(net_device->recv_buf);
  641. if (net_device->send_original_buf)
  642. hv_unmap_memory(net_device->send_buf);
  643. /* Release all resources */
  644. free_netvsc_device_rcu(net_device);
  645. }
  646. #define RING_AVAIL_PERCENT_HIWATER 20
  647. #define RING_AVAIL_PERCENT_LOWATER 10
  648. static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
  649. u32 index)
  650. {
  651. sync_change_bit(index, net_device->send_section_map);
  652. }
  653. static void netvsc_send_tx_complete(struct net_device *ndev,
  654. struct netvsc_device *net_device,
  655. struct vmbus_channel *channel,
  656. const struct vmpacket_descriptor *desc,
  657. int budget)
  658. {
  659. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  660. struct sk_buff *skb;
  661. u16 q_idx = 0;
  662. int queue_sends;
  663. u64 cmd_rqst;
  664. cmd_rqst = channel->request_addr_callback(channel, desc->trans_id);
  665. if (cmd_rqst == VMBUS_RQST_ERROR) {
  666. netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
  667. return;
  668. }
  669. skb = (struct sk_buff *)(unsigned long)cmd_rqst;
  670. /* Notify the layer above us */
  671. if (likely(skb)) {
  672. struct hv_netvsc_packet *packet
  673. = (struct hv_netvsc_packet *)skb->cb;
  674. u32 send_index = packet->send_buf_index;
  675. struct netvsc_stats_tx *tx_stats;
  676. if (send_index != NETVSC_INVALID_INDEX)
  677. netvsc_free_send_slot(net_device, send_index);
  678. q_idx = packet->q_idx;
  679. tx_stats = &net_device->chan_table[q_idx].tx_stats;
  680. u64_stats_update_begin(&tx_stats->syncp);
  681. tx_stats->packets += packet->total_packets;
  682. tx_stats->bytes += packet->total_bytes;
  683. u64_stats_update_end(&tx_stats->syncp);
  684. netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
  685. napi_consume_skb(skb, budget);
  686. }
  687. queue_sends =
  688. atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
  689. if (unlikely(net_device->destroy)) {
  690. if (queue_sends == 0)
  691. wake_up(&net_device->wait_drain);
  692. } else {
  693. struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
  694. if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
  695. (hv_get_avail_to_write_percent(&channel->outbound) >
  696. RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
  697. netif_tx_wake_queue(txq);
  698. ndev_ctx->eth_stats.wake_queue++;
  699. }
  700. }
  701. }
  702. static void netvsc_send_completion(struct net_device *ndev,
  703. struct netvsc_device *net_device,
  704. struct vmbus_channel *incoming_channel,
  705. const struct vmpacket_descriptor *desc,
  706. int budget)
  707. {
  708. const struct nvsp_message *nvsp_packet;
  709. u32 msglen = hv_pkt_datalen(desc);
  710. struct nvsp_message *pkt_rqst;
  711. u64 cmd_rqst;
  712. u32 status;
  713. /* First check if this is a VMBUS completion without data payload */
  714. if (!msglen) {
  715. cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
  716. desc->trans_id);
  717. if (cmd_rqst == VMBUS_RQST_ERROR) {
  718. netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
  719. return;
  720. }
  721. pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
  722. switch (pkt_rqst->hdr.msg_type) {
  723. case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
  724. complete(&net_device->channel_init_wait);
  725. break;
  726. default:
  727. netdev_err(ndev, "Unexpected VMBUS completion!!\n");
  728. }
  729. return;
  730. }
  731. /* Ensure packet is big enough to read header fields */
  732. if (msglen < sizeof(struct nvsp_message_header)) {
  733. netdev_err(ndev, "nvsp_message length too small: %u\n", msglen);
  734. return;
  735. }
  736. nvsp_packet = hv_pkt_data(desc);
  737. switch (nvsp_packet->hdr.msg_type) {
  738. case NVSP_MSG_TYPE_INIT_COMPLETE:
  739. if (msglen < sizeof(struct nvsp_message_header) +
  740. sizeof(struct nvsp_message_init_complete)) {
  741. netdev_err(ndev, "nvsp_msg length too small: %u\n",
  742. msglen);
  743. return;
  744. }
  745. fallthrough;
  746. case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
  747. if (msglen < sizeof(struct nvsp_message_header) +
  748. sizeof(struct nvsp_1_message_send_receive_buffer_complete)) {
  749. netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
  750. msglen);
  751. return;
  752. }
  753. fallthrough;
  754. case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
  755. if (msglen < sizeof(struct nvsp_message_header) +
  756. sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
  757. netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
  758. msglen);
  759. return;
  760. }
  761. fallthrough;
  762. case NVSP_MSG5_TYPE_SUBCHANNEL:
  763. if (msglen < sizeof(struct nvsp_message_header) +
  764. sizeof(struct nvsp_5_subchannel_complete)) {
  765. netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
  766. msglen);
  767. return;
  768. }
  769. /* Copy the response back */
  770. memcpy(&net_device->channel_init_pkt, nvsp_packet,
  771. sizeof(struct nvsp_message));
  772. complete(&net_device->channel_init_wait);
  773. break;
  774. case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
  775. if (msglen < sizeof(struct nvsp_message_header) +
  776. sizeof(struct nvsp_1_message_send_rndis_packet_complete)) {
  777. if (net_ratelimit())
  778. netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n",
  779. msglen);
  780. return;
  781. }
  782. /* If status indicates an error, output a message so we know
  783. * there's a problem. But process the completion anyway so the
  784. * resources are released.
  785. */
  786. status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status;
  787. if (status != NVSP_STAT_SUCCESS && net_ratelimit())
  788. netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n",
  789. status);
  790. netvsc_send_tx_complete(ndev, net_device, incoming_channel,
  791. desc, budget);
  792. break;
  793. default:
  794. netdev_err(ndev,
  795. "Unknown send completion type %d received!!\n",
  796. nvsp_packet->hdr.msg_type);
  797. }
  798. }
  799. static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
  800. {
  801. unsigned long *map_addr = net_device->send_section_map;
  802. unsigned int i;
  803. for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
  804. if (sync_test_and_set_bit(i, map_addr) == 0)
  805. return i;
  806. }
  807. return NETVSC_INVALID_INDEX;
  808. }
  809. static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
  810. unsigned int section_index,
  811. u32 pend_size,
  812. struct hv_netvsc_packet *packet,
  813. struct rndis_message *rndis_msg,
  814. struct hv_page_buffer *pb,
  815. bool xmit_more)
  816. {
  817. char *start = net_device->send_buf;
  818. char *dest = start + (section_index * net_device->send_section_size)
  819. + pend_size;
  820. int i;
  821. u32 padding = 0;
  822. u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
  823. packet->page_buf_cnt;
  824. u32 remain;
  825. /* Add padding */
  826. remain = packet->total_data_buflen & (net_device->pkt_align - 1);
  827. if (xmit_more && remain) {
  828. padding = net_device->pkt_align - remain;
  829. rndis_msg->msg_len += padding;
  830. packet->total_data_buflen += padding;
  831. }
  832. for (i = 0; i < page_count; i++) {
  833. char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
  834. u32 offset = pb[i].offset;
  835. u32 len = pb[i].len;
  836. memcpy(dest, (src + offset), len);
  837. dest += len;
  838. }
  839. if (padding)
  840. memset(dest, 0, padding);
  841. }
  842. void netvsc_dma_unmap(struct hv_device *hv_dev,
  843. struct hv_netvsc_packet *packet)
  844. {
  845. int i;
  846. if (!hv_is_isolation_supported())
  847. return;
  848. if (!packet->dma_range)
  849. return;
  850. for (i = 0; i < packet->page_buf_cnt; i++)
  851. dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
  852. packet->dma_range[i].mapping_size,
  853. DMA_TO_DEVICE);
  854. kfree(packet->dma_range);
  855. }
  856. /* netvsc_dma_map - Map swiotlb bounce buffer with data page of
  857. * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
  858. * VM.
  859. *
  860. * In isolation VM, netvsc send buffer has been marked visible to
  861. * host and so the data copied to send buffer doesn't need to use
  862. * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
  863. * may not be copied to send buffer and so these pages need to be
  864. * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
  865. * that. The pfns in the struct hv_page_buffer need to be converted
  866. * to bounce buffer's pfn. The loop here is necessary because the
  867. * entries in the page buffer array are not necessarily full
  868. * pages of data. Each entry in the array has a separate offset and
  869. * len that may be non-zero, even for entries in the middle of the
  870. * array. And the entries are not physically contiguous. So each
  871. * entry must be individually mapped rather than as a contiguous unit.
  872. * So not use dma_map_sg() here.
  873. */
  874. static int netvsc_dma_map(struct hv_device *hv_dev,
  875. struct hv_netvsc_packet *packet,
  876. struct hv_page_buffer *pb)
  877. {
  878. u32 page_count = packet->page_buf_cnt;
  879. dma_addr_t dma;
  880. int i;
  881. if (!hv_is_isolation_supported())
  882. return 0;
  883. packet->dma_range = kcalloc(page_count,
  884. sizeof(*packet->dma_range),
  885. GFP_ATOMIC);
  886. if (!packet->dma_range)
  887. return -ENOMEM;
  888. for (i = 0; i < page_count; i++) {
  889. char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
  890. + pb[i].offset);
  891. u32 len = pb[i].len;
  892. dma = dma_map_single(&hv_dev->device, src, len,
  893. DMA_TO_DEVICE);
  894. if (dma_mapping_error(&hv_dev->device, dma)) {
  895. kfree(packet->dma_range);
  896. return -ENOMEM;
  897. }
  898. /* pb[].offset and pb[].len are not changed during dma mapping
  899. * and so not reassign.
  900. */
  901. packet->dma_range[i].dma = dma;
  902. packet->dma_range[i].mapping_size = len;
  903. pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
  904. }
  905. return 0;
  906. }
  907. static inline int netvsc_send_pkt(
  908. struct hv_device *device,
  909. struct hv_netvsc_packet *packet,
  910. struct netvsc_device *net_device,
  911. struct hv_page_buffer *pb,
  912. struct sk_buff *skb)
  913. {
  914. struct nvsp_message nvmsg;
  915. struct nvsp_1_message_send_rndis_packet *rpkt =
  916. &nvmsg.msg.v1_msg.send_rndis_pkt;
  917. struct netvsc_channel * const nvchan =
  918. &net_device->chan_table[packet->q_idx];
  919. struct vmbus_channel *out_channel = nvchan->channel;
  920. struct net_device *ndev = hv_get_drvdata(device);
  921. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  922. struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
  923. u64 req_id;
  924. int ret;
  925. u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
  926. memset(&nvmsg, 0, sizeof(struct nvsp_message));
  927. nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
  928. if (skb)
  929. rpkt->channel_type = 0; /* 0 is RMC_DATA */
  930. else
  931. rpkt->channel_type = 1; /* 1 is RMC_CONTROL */
  932. rpkt->send_buf_section_index = packet->send_buf_index;
  933. if (packet->send_buf_index == NETVSC_INVALID_INDEX)
  934. rpkt->send_buf_section_size = 0;
  935. else
  936. rpkt->send_buf_section_size = packet->total_data_buflen;
  937. req_id = (ulong)skb;
  938. if (out_channel->rescind)
  939. return -ENODEV;
  940. trace_nvsp_send_pkt(ndev, out_channel, rpkt);
  941. packet->dma_range = NULL;
  942. if (packet->page_buf_cnt) {
  943. if (packet->cp_partial)
  944. pb += packet->rmsg_pgcnt;
  945. ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
  946. if (ret) {
  947. ret = -EAGAIN;
  948. goto exit;
  949. }
  950. ret = vmbus_sendpacket_pagebuffer(out_channel,
  951. pb, packet->page_buf_cnt,
  952. &nvmsg, sizeof(nvmsg),
  953. req_id);
  954. if (ret)
  955. netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
  956. } else {
  957. ret = vmbus_sendpacket(out_channel,
  958. &nvmsg, sizeof(nvmsg),
  959. req_id, VM_PKT_DATA_INBAND,
  960. VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  961. }
  962. exit:
  963. if (ret == 0) {
  964. atomic_inc_return(&nvchan->queue_sends);
  965. if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
  966. netif_tx_stop_queue(txq);
  967. ndev_ctx->eth_stats.stop_queue++;
  968. }
  969. } else if (ret == -EAGAIN) {
  970. netif_tx_stop_queue(txq);
  971. ndev_ctx->eth_stats.stop_queue++;
  972. } else {
  973. netdev_err(ndev,
  974. "Unable to send packet pages %u len %u, ret %d\n",
  975. packet->page_buf_cnt, packet->total_data_buflen,
  976. ret);
  977. }
  978. if (netif_tx_queue_stopped(txq) &&
  979. atomic_read(&nvchan->queue_sends) < 1 &&
  980. !net_device->tx_disable) {
  981. netif_tx_wake_queue(txq);
  982. ndev_ctx->eth_stats.wake_queue++;
  983. if (ret == -EAGAIN)
  984. ret = -ENOSPC;
  985. }
  986. return ret;
  987. }
  988. /* Move packet out of multi send data (msd), and clear msd */
  989. static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
  990. struct sk_buff **msd_skb,
  991. struct multi_send_data *msdp)
  992. {
  993. *msd_skb = msdp->skb;
  994. *msd_send = msdp->pkt;
  995. msdp->skb = NULL;
  996. msdp->pkt = NULL;
  997. msdp->count = 0;
  998. }
  999. /* RCU already held by caller */
  1000. /* Batching/bouncing logic is designed to attempt to optimize
  1001. * performance.
  1002. *
  1003. * For small, non-LSO packets we copy the packet to a send buffer
  1004. * which is pre-registered with the Hyper-V side. This enables the
  1005. * hypervisor to avoid remapping the aperture to access the packet
  1006. * descriptor and data.
  1007. *
  1008. * If we already started using a buffer and the netdev is transmitting
  1009. * a burst of packets, keep on copying into the buffer until it is
  1010. * full or we are done collecting a burst. If there is an existing
  1011. * buffer with space for the RNDIS descriptor but not the packet, copy
  1012. * the RNDIS descriptor to the buffer, keeping the packet in place.
  1013. *
  1014. * If we do batching and send more than one packet using a single
  1015. * NetVSC message, free the SKBs of the packets copied, except for the
  1016. * last packet. This is done to streamline the handling of the case
  1017. * where the last packet only had the RNDIS descriptor copied to the
  1018. * send buffer, with the data pointers included in the NetVSC message.
  1019. */
  1020. int netvsc_send(struct net_device *ndev,
  1021. struct hv_netvsc_packet *packet,
  1022. struct rndis_message *rndis_msg,
  1023. struct hv_page_buffer *pb,
  1024. struct sk_buff *skb,
  1025. bool xdp_tx)
  1026. {
  1027. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1028. struct netvsc_device *net_device
  1029. = rcu_dereference_bh(ndev_ctx->nvdev);
  1030. struct hv_device *device = ndev_ctx->device_ctx;
  1031. int ret = 0;
  1032. struct netvsc_channel *nvchan;
  1033. u32 pktlen = packet->total_data_buflen, msd_len = 0;
  1034. unsigned int section_index = NETVSC_INVALID_INDEX;
  1035. struct multi_send_data *msdp;
  1036. struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
  1037. struct sk_buff *msd_skb = NULL;
  1038. bool try_batch, xmit_more;
  1039. /* If device is rescinded, return error and packet will get dropped. */
  1040. if (unlikely(!net_device || net_device->destroy))
  1041. return -ENODEV;
  1042. nvchan = &net_device->chan_table[packet->q_idx];
  1043. packet->send_buf_index = NETVSC_INVALID_INDEX;
  1044. packet->cp_partial = false;
  1045. /* Send a control message or XDP packet directly without accessing
  1046. * msd (Multi-Send Data) field which may be changed during data packet
  1047. * processing.
  1048. */
  1049. if (!skb || xdp_tx)
  1050. return netvsc_send_pkt(device, packet, net_device, pb, skb);
  1051. /* batch packets in send buffer if possible */
  1052. msdp = &nvchan->msd;
  1053. if (msdp->pkt)
  1054. msd_len = msdp->pkt->total_data_buflen;
  1055. try_batch = msd_len > 0 && msdp->count < net_device->max_pkt;
  1056. if (try_batch && msd_len + pktlen + net_device->pkt_align <
  1057. net_device->send_section_size) {
  1058. section_index = msdp->pkt->send_buf_index;
  1059. } else if (try_batch && msd_len + packet->rmsg_size <
  1060. net_device->send_section_size) {
  1061. section_index = msdp->pkt->send_buf_index;
  1062. packet->cp_partial = true;
  1063. } else if (pktlen + net_device->pkt_align <
  1064. net_device->send_section_size) {
  1065. section_index = netvsc_get_next_send_section(net_device);
  1066. if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
  1067. ++ndev_ctx->eth_stats.tx_send_full;
  1068. } else {
  1069. move_pkt_msd(&msd_send, &msd_skb, msdp);
  1070. msd_len = 0;
  1071. }
  1072. }
  1073. /* Keep aggregating only if stack says more data is coming
  1074. * and not doing mixed modes send and not flow blocked
  1075. */
  1076. xmit_more = netdev_xmit_more() &&
  1077. !packet->cp_partial &&
  1078. !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
  1079. if (section_index != NETVSC_INVALID_INDEX) {
  1080. netvsc_copy_to_send_buf(net_device,
  1081. section_index, msd_len,
  1082. packet, rndis_msg, pb, xmit_more);
  1083. packet->send_buf_index = section_index;
  1084. if (packet->cp_partial) {
  1085. packet->page_buf_cnt -= packet->rmsg_pgcnt;
  1086. packet->total_data_buflen = msd_len + packet->rmsg_size;
  1087. } else {
  1088. packet->page_buf_cnt = 0;
  1089. packet->total_data_buflen += msd_len;
  1090. }
  1091. if (msdp->pkt) {
  1092. packet->total_packets += msdp->pkt->total_packets;
  1093. packet->total_bytes += msdp->pkt->total_bytes;
  1094. }
  1095. if (msdp->skb)
  1096. dev_consume_skb_any(msdp->skb);
  1097. if (xmit_more) {
  1098. msdp->skb = skb;
  1099. msdp->pkt = packet;
  1100. msdp->count++;
  1101. } else {
  1102. cur_send = packet;
  1103. msdp->skb = NULL;
  1104. msdp->pkt = NULL;
  1105. msdp->count = 0;
  1106. }
  1107. } else {
  1108. move_pkt_msd(&msd_send, &msd_skb, msdp);
  1109. cur_send = packet;
  1110. }
  1111. if (msd_send) {
  1112. int m_ret = netvsc_send_pkt(device, msd_send, net_device,
  1113. NULL, msd_skb);
  1114. if (m_ret != 0) {
  1115. netvsc_free_send_slot(net_device,
  1116. msd_send->send_buf_index);
  1117. dev_kfree_skb_any(msd_skb);
  1118. }
  1119. }
  1120. if (cur_send)
  1121. ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
  1122. if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
  1123. netvsc_free_send_slot(net_device, section_index);
  1124. return ret;
  1125. }
  1126. /* Send pending recv completions */
  1127. static int send_recv_completions(struct net_device *ndev,
  1128. struct netvsc_device *nvdev,
  1129. struct netvsc_channel *nvchan)
  1130. {
  1131. struct multi_recv_comp *mrc = &nvchan->mrc;
  1132. struct recv_comp_msg {
  1133. struct nvsp_message_header hdr;
  1134. u32 status;
  1135. } __packed;
  1136. struct recv_comp_msg msg = {
  1137. .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
  1138. };
  1139. int ret;
  1140. while (mrc->first != mrc->next) {
  1141. const struct recv_comp_data *rcd
  1142. = mrc->slots + mrc->first;
  1143. msg.status = rcd->status;
  1144. ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
  1145. rcd->tid, VM_PKT_COMP, 0);
  1146. if (unlikely(ret)) {
  1147. struct net_device_context *ndev_ctx = netdev_priv(ndev);
  1148. ++ndev_ctx->eth_stats.rx_comp_busy;
  1149. return ret;
  1150. }
  1151. if (++mrc->first == nvdev->recv_completion_cnt)
  1152. mrc->first = 0;
  1153. }
  1154. /* receive completion ring has been emptied */
  1155. if (unlikely(nvdev->destroy))
  1156. wake_up(&nvdev->wait_drain);
  1157. return 0;
  1158. }
  1159. /* Count how many receive completions are outstanding */
  1160. static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
  1161. const struct multi_recv_comp *mrc,
  1162. u32 *filled, u32 *avail)
  1163. {
  1164. u32 count = nvdev->recv_completion_cnt;
  1165. if (mrc->next >= mrc->first)
  1166. *filled = mrc->next - mrc->first;
  1167. else
  1168. *filled = (count - mrc->first) + mrc->next;
  1169. *avail = count - *filled - 1;
  1170. }
  1171. /* Add receive complete to ring to send to host. */
  1172. static void enq_receive_complete(struct net_device *ndev,
  1173. struct netvsc_device *nvdev, u16 q_idx,
  1174. u64 tid, u32 status)
  1175. {
  1176. struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
  1177. struct multi_recv_comp *mrc = &nvchan->mrc;
  1178. struct recv_comp_data *rcd;
  1179. u32 filled, avail;
  1180. recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
  1181. if (unlikely(filled > NAPI_POLL_WEIGHT)) {
  1182. send_recv_completions(ndev, nvdev, nvchan);
  1183. recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
  1184. }
  1185. if (unlikely(!avail)) {
  1186. netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
  1187. q_idx, tid);
  1188. return;
  1189. }
  1190. rcd = mrc->slots + mrc->next;
  1191. rcd->tid = tid;
  1192. rcd->status = status;
  1193. if (++mrc->next == nvdev->recv_completion_cnt)
  1194. mrc->next = 0;
  1195. }
  1196. static int netvsc_receive(struct net_device *ndev,
  1197. struct netvsc_device *net_device,
  1198. struct netvsc_channel *nvchan,
  1199. const struct vmpacket_descriptor *desc)
  1200. {
  1201. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  1202. struct vmbus_channel *channel = nvchan->channel;
  1203. const struct vmtransfer_page_packet_header *vmxferpage_packet
  1204. = container_of(desc, const struct vmtransfer_page_packet_header, d);
  1205. const struct nvsp_message *nvsp = hv_pkt_data(desc);
  1206. u32 msglen = hv_pkt_datalen(desc);
  1207. u16 q_idx = channel->offermsg.offer.sub_channel_index;
  1208. char *recv_buf = net_device->recv_buf;
  1209. u32 status = NVSP_STAT_SUCCESS;
  1210. int i;
  1211. int count = 0;
  1212. /* Ensure packet is big enough to read header fields */
  1213. if (msglen < sizeof(struct nvsp_message_header)) {
  1214. netif_err(net_device_ctx, rx_err, ndev,
  1215. "invalid nvsp header, length too small: %u\n",
  1216. msglen);
  1217. return 0;
  1218. }
  1219. /* Make sure this is a valid nvsp packet */
  1220. if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
  1221. netif_err(net_device_ctx, rx_err, ndev,
  1222. "Unknown nvsp packet type received %u\n",
  1223. nvsp->hdr.msg_type);
  1224. return 0;
  1225. }
  1226. /* Validate xfer page pkt header */
  1227. if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) {
  1228. netif_err(net_device_ctx, rx_err, ndev,
  1229. "Invalid xfer page pkt, offset too small: %u\n",
  1230. desc->offset8 << 3);
  1231. return 0;
  1232. }
  1233. if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
  1234. netif_err(net_device_ctx, rx_err, ndev,
  1235. "Invalid xfer page set id - expecting %x got %x\n",
  1236. NETVSC_RECEIVE_BUFFER_ID,
  1237. vmxferpage_packet->xfer_pageset_id);
  1238. return 0;
  1239. }
  1240. count = vmxferpage_packet->range_cnt;
  1241. /* Check count for a valid value */
  1242. if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
  1243. netif_err(net_device_ctx, rx_err, ndev,
  1244. "Range count is not valid: %d\n",
  1245. count);
  1246. return 0;
  1247. }
  1248. /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
  1249. for (i = 0; i < count; i++) {
  1250. u32 offset = vmxferpage_packet->ranges[i].byte_offset;
  1251. u32 buflen = vmxferpage_packet->ranges[i].byte_count;
  1252. void *data;
  1253. int ret;
  1254. if (unlikely(offset > net_device->recv_buf_size ||
  1255. buflen > net_device->recv_buf_size - offset)) {
  1256. nvchan->rsc.cnt = 0;
  1257. status = NVSP_STAT_FAIL;
  1258. netif_err(net_device_ctx, rx_err, ndev,
  1259. "Packet offset:%u + len:%u too big\n",
  1260. offset, buflen);
  1261. continue;
  1262. }
  1263. /* We're going to copy (sections of) the packet into nvchan->recv_buf;
  1264. * make sure that nvchan->recv_buf is large enough to hold the packet.
  1265. */
  1266. if (unlikely(buflen > net_device->recv_section_size)) {
  1267. nvchan->rsc.cnt = 0;
  1268. status = NVSP_STAT_FAIL;
  1269. netif_err(net_device_ctx, rx_err, ndev,
  1270. "Packet too big: buflen=%u recv_section_size=%u\n",
  1271. buflen, net_device->recv_section_size);
  1272. continue;
  1273. }
  1274. data = recv_buf + offset;
  1275. nvchan->rsc.is_last = (i == count - 1);
  1276. trace_rndis_recv(ndev, q_idx, data);
  1277. /* Pass it to the upper layer */
  1278. ret = rndis_filter_receive(ndev, net_device,
  1279. nvchan, data, buflen);
  1280. if (unlikely(ret != NVSP_STAT_SUCCESS)) {
  1281. /* Drop incomplete packet */
  1282. nvchan->rsc.cnt = 0;
  1283. status = NVSP_STAT_FAIL;
  1284. }
  1285. }
  1286. enq_receive_complete(ndev, net_device, q_idx,
  1287. vmxferpage_packet->d.trans_id, status);
  1288. return count;
  1289. }
  1290. static void netvsc_send_table(struct net_device *ndev,
  1291. struct netvsc_device *nvscdev,
  1292. const struct nvsp_message *nvmsg,
  1293. u32 msglen)
  1294. {
  1295. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  1296. u32 count, offset, *tab;
  1297. int i;
  1298. /* Ensure packet is big enough to read send_table fields */
  1299. if (msglen < sizeof(struct nvsp_message_header) +
  1300. sizeof(struct nvsp_5_send_indirect_table)) {
  1301. netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen);
  1302. return;
  1303. }
  1304. count = nvmsg->msg.v5_msg.send_table.count;
  1305. offset = nvmsg->msg.v5_msg.send_table.offset;
  1306. if (count != VRSS_SEND_TAB_SIZE) {
  1307. netdev_err(ndev, "Received wrong send-table size:%u\n", count);
  1308. return;
  1309. }
  1310. /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be
  1311. * wrong due to a host bug. So fix the offset here.
  1312. */
  1313. if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
  1314. msglen >= sizeof(struct nvsp_message_header) +
  1315. sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
  1316. offset = sizeof(struct nvsp_message_header) +
  1317. sizeof(union nvsp_6_message_uber);
  1318. /* Boundary check for all versions */
  1319. if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
  1320. netdev_err(ndev, "Received send-table offset too big:%u\n",
  1321. offset);
  1322. return;
  1323. }
  1324. tab = (void *)nvmsg + offset;
  1325. for (i = 0; i < count; i++)
  1326. net_device_ctx->tx_table[i] = tab[i];
  1327. }
  1328. static void netvsc_send_vf(struct net_device *ndev,
  1329. const struct nvsp_message *nvmsg,
  1330. u32 msglen)
  1331. {
  1332. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  1333. /* Ensure packet is big enough to read its fields */
  1334. if (msglen < sizeof(struct nvsp_message_header) +
  1335. sizeof(struct nvsp_4_send_vf_association)) {
  1336. netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen);
  1337. return;
  1338. }
  1339. net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
  1340. net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
  1341. if (net_device_ctx->vf_alloc)
  1342. complete(&net_device_ctx->vf_add);
  1343. netdev_info(ndev, "VF slot %u %s\n",
  1344. net_device_ctx->vf_serial,
  1345. net_device_ctx->vf_alloc ? "added" : "removed");
  1346. }
  1347. static void netvsc_receive_inband(struct net_device *ndev,
  1348. struct netvsc_device *nvscdev,
  1349. const struct vmpacket_descriptor *desc)
  1350. {
  1351. const struct nvsp_message *nvmsg = hv_pkt_data(desc);
  1352. u32 msglen = hv_pkt_datalen(desc);
  1353. /* Ensure packet is big enough to read header fields */
  1354. if (msglen < sizeof(struct nvsp_message_header)) {
  1355. netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen);
  1356. return;
  1357. }
  1358. switch (nvmsg->hdr.msg_type) {
  1359. case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
  1360. netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
  1361. break;
  1362. case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
  1363. if (hv_is_isolation_supported())
  1364. netdev_err(ndev, "Ignore VF_ASSOCIATION msg from the host supporting isolation\n");
  1365. else
  1366. netvsc_send_vf(ndev, nvmsg, msglen);
  1367. break;
  1368. }
  1369. }
  1370. static int netvsc_process_raw_pkt(struct hv_device *device,
  1371. struct netvsc_channel *nvchan,
  1372. struct netvsc_device *net_device,
  1373. struct net_device *ndev,
  1374. const struct vmpacket_descriptor *desc,
  1375. int budget)
  1376. {
  1377. struct vmbus_channel *channel = nvchan->channel;
  1378. const struct nvsp_message *nvmsg = hv_pkt_data(desc);
  1379. trace_nvsp_recv(ndev, channel, nvmsg);
  1380. switch (desc->type) {
  1381. case VM_PKT_COMP:
  1382. netvsc_send_completion(ndev, net_device, channel, desc, budget);
  1383. break;
  1384. case VM_PKT_DATA_USING_XFER_PAGES:
  1385. return netvsc_receive(ndev, net_device, nvchan, desc);
  1386. case VM_PKT_DATA_INBAND:
  1387. netvsc_receive_inband(ndev, net_device, desc);
  1388. break;
  1389. default:
  1390. netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
  1391. desc->type, desc->trans_id);
  1392. break;
  1393. }
  1394. return 0;
  1395. }
  1396. static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
  1397. {
  1398. struct vmbus_channel *primary = channel->primary_channel;
  1399. return primary ? primary->device_obj : channel->device_obj;
  1400. }
  1401. /* Network processing softirq
  1402. * Process data in incoming ring buffer from host
  1403. * Stops when ring is empty or budget is met or exceeded.
  1404. */
  1405. int netvsc_poll(struct napi_struct *napi, int budget)
  1406. {
  1407. struct netvsc_channel *nvchan
  1408. = container_of(napi, struct netvsc_channel, napi);
  1409. struct netvsc_device *net_device = nvchan->net_device;
  1410. struct vmbus_channel *channel = nvchan->channel;
  1411. struct hv_device *device = netvsc_channel_to_device(channel);
  1412. struct net_device *ndev = hv_get_drvdata(device);
  1413. int work_done = 0;
  1414. int ret;
  1415. /* If starting a new interval */
  1416. if (!nvchan->desc)
  1417. nvchan->desc = hv_pkt_iter_first(channel);
  1418. nvchan->xdp_flush = false;
  1419. while (nvchan->desc && work_done < budget) {
  1420. work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
  1421. ndev, nvchan->desc, budget);
  1422. nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
  1423. }
  1424. if (nvchan->xdp_flush)
  1425. xdp_do_flush();
  1426. /* Send any pending receive completions */
  1427. ret = send_recv_completions(ndev, net_device, nvchan);
  1428. /* If it did not exhaust NAPI budget this time
  1429. * and not doing busy poll
  1430. * then re-enable host interrupts
  1431. * and reschedule if ring is not empty
  1432. * or sending receive completion failed.
  1433. */
  1434. if (work_done < budget &&
  1435. napi_complete_done(napi, work_done) &&
  1436. (ret || hv_end_read(&channel->inbound)) &&
  1437. napi_schedule_prep(napi)) {
  1438. hv_begin_read(&channel->inbound);
  1439. __napi_schedule(napi);
  1440. }
  1441. /* Driver may overshoot since multiple packets per descriptor */
  1442. return min(work_done, budget);
  1443. }
  1444. /* Call back when data is available in host ring buffer.
  1445. * Processing is deferred until network softirq (NAPI)
  1446. */
  1447. void netvsc_channel_cb(void *context)
  1448. {
  1449. struct netvsc_channel *nvchan = context;
  1450. struct vmbus_channel *channel = nvchan->channel;
  1451. struct hv_ring_buffer_info *rbi = &channel->inbound;
  1452. /* preload first vmpacket descriptor */
  1453. prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
  1454. if (napi_schedule_prep(&nvchan->napi)) {
  1455. /* disable interrupts from host */
  1456. hv_begin_read(rbi);
  1457. __napi_schedule_irqoff(&nvchan->napi);
  1458. }
  1459. }
  1460. /*
  1461. * netvsc_device_add - Callback when the device belonging to this
  1462. * driver is added
  1463. */
  1464. struct netvsc_device *netvsc_device_add(struct hv_device *device,
  1465. const struct netvsc_device_info *device_info)
  1466. {
  1467. int i, ret = 0;
  1468. struct netvsc_device *net_device;
  1469. struct net_device *ndev = hv_get_drvdata(device);
  1470. struct net_device_context *net_device_ctx = netdev_priv(ndev);
  1471. net_device = alloc_net_device();
  1472. if (!net_device)
  1473. return ERR_PTR(-ENOMEM);
  1474. for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
  1475. net_device_ctx->tx_table[i] = 0;
  1476. /* Because the device uses NAPI, all the interrupt batching and
  1477. * control is done via Net softirq, not the channel handling
  1478. */
  1479. set_channel_read_mode(device->channel, HV_CALL_ISR);
  1480. /* If we're reopening the device we may have multiple queues, fill the
  1481. * chn_table with the default channel to use it before subchannels are
  1482. * opened.
  1483. * Initialize the channel state before we open;
  1484. * we can be interrupted as soon as we open the channel.
  1485. */
  1486. for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
  1487. struct netvsc_channel *nvchan = &net_device->chan_table[i];
  1488. nvchan->channel = device->channel;
  1489. nvchan->net_device = net_device;
  1490. u64_stats_init(&nvchan->tx_stats.syncp);
  1491. u64_stats_init(&nvchan->rx_stats.syncp);
  1492. ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i, 0);
  1493. if (ret) {
  1494. netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
  1495. goto cleanup2;
  1496. }
  1497. ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
  1498. MEM_TYPE_PAGE_SHARED, NULL);
  1499. if (ret) {
  1500. netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
  1501. goto cleanup2;
  1502. }
  1503. }
  1504. /* Enable NAPI handler before init callbacks */
  1505. netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll);
  1506. /* Open the channel */
  1507. device->channel->next_request_id_callback = vmbus_next_request_id;
  1508. device->channel->request_addr_callback = vmbus_request_addr;
  1509. device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
  1510. device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
  1511. ret = vmbus_open(device->channel, netvsc_ring_bytes,
  1512. netvsc_ring_bytes, NULL, 0,
  1513. netvsc_channel_cb, net_device->chan_table);
  1514. if (ret != 0) {
  1515. netdev_err(ndev, "unable to open channel: %d\n", ret);
  1516. goto cleanup;
  1517. }
  1518. /* Channel is opened */
  1519. netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
  1520. napi_enable(&net_device->chan_table[0].napi);
  1521. /* Connect with the NetVsp */
  1522. ret = netvsc_connect_vsp(device, net_device, device_info);
  1523. if (ret != 0) {
  1524. netdev_err(ndev,
  1525. "unable to connect to NetVSP - %d\n", ret);
  1526. goto close;
  1527. }
  1528. /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
  1529. * populated.
  1530. */
  1531. rcu_assign_pointer(net_device_ctx->nvdev, net_device);
  1532. return net_device;
  1533. close:
  1534. RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
  1535. napi_disable(&net_device->chan_table[0].napi);
  1536. /* Now, we can close the channel safely */
  1537. vmbus_close(device->channel);
  1538. cleanup:
  1539. netif_napi_del(&net_device->chan_table[0].napi);
  1540. cleanup2:
  1541. if (net_device->recv_original_buf)
  1542. hv_unmap_memory(net_device->recv_buf);
  1543. if (net_device->send_original_buf)
  1544. hv_unmap_memory(net_device->send_buf);
  1545. free_netvsc_device(&net_device->rcu);
  1546. return ERR_PTR(ret);
  1547. }