channel_mgmt.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <[email protected]>
  7. * Hank Janssen <[email protected]>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/kernel.h>
  11. #include <linux/interrupt.h>
  12. #include <linux/sched.h>
  13. #include <linux/wait.h>
  14. #include <linux/mm.h>
  15. #include <linux/slab.h>
  16. #include <linux/list.h>
  17. #include <linux/module.h>
  18. #include <linux/completion.h>
  19. #include <linux/delay.h>
  20. #include <linux/cpu.h>
  21. #include <linux/hyperv.h>
  22. #include <asm/mshyperv.h>
  23. #include <linux/sched/isolation.h>
  24. #include "hyperv_vmbus.h"
  25. static void init_vp_index(struct vmbus_channel *channel);
  26. const struct vmbus_device vmbus_devs[] = {
  27. /* IDE */
  28. { .dev_type = HV_IDE,
  29. HV_IDE_GUID,
  30. .perf_device = true,
  31. .allowed_in_isolated = false,
  32. },
  33. /* SCSI */
  34. { .dev_type = HV_SCSI,
  35. HV_SCSI_GUID,
  36. .perf_device = true,
  37. .allowed_in_isolated = true,
  38. },
  39. /* Fibre Channel */
  40. { .dev_type = HV_FC,
  41. HV_SYNTHFC_GUID,
  42. .perf_device = true,
  43. .allowed_in_isolated = false,
  44. },
  45. /* Synthetic NIC */
  46. { .dev_type = HV_NIC,
  47. HV_NIC_GUID,
  48. .perf_device = true,
  49. .allowed_in_isolated = true,
  50. },
  51. /* Network Direct */
  52. { .dev_type = HV_ND,
  53. HV_ND_GUID,
  54. .perf_device = true,
  55. .allowed_in_isolated = false,
  56. },
  57. /* PCIE */
  58. { .dev_type = HV_PCIE,
  59. HV_PCIE_GUID,
  60. .perf_device = false,
  61. .allowed_in_isolated = false,
  62. },
  63. /* Synthetic Frame Buffer */
  64. { .dev_type = HV_FB,
  65. HV_SYNTHVID_GUID,
  66. .perf_device = false,
  67. .allowed_in_isolated = false,
  68. },
  69. /* Synthetic Keyboard */
  70. { .dev_type = HV_KBD,
  71. HV_KBD_GUID,
  72. .perf_device = false,
  73. .allowed_in_isolated = false,
  74. },
  75. /* Synthetic MOUSE */
  76. { .dev_type = HV_MOUSE,
  77. HV_MOUSE_GUID,
  78. .perf_device = false,
  79. .allowed_in_isolated = false,
  80. },
  81. /* KVP */
  82. { .dev_type = HV_KVP,
  83. HV_KVP_GUID,
  84. .perf_device = false,
  85. .allowed_in_isolated = false,
  86. },
  87. /* Time Synch */
  88. { .dev_type = HV_TS,
  89. HV_TS_GUID,
  90. .perf_device = false,
  91. .allowed_in_isolated = true,
  92. },
  93. /* Heartbeat */
  94. { .dev_type = HV_HB,
  95. HV_HEART_BEAT_GUID,
  96. .perf_device = false,
  97. .allowed_in_isolated = true,
  98. },
  99. /* Shutdown */
  100. { .dev_type = HV_SHUTDOWN,
  101. HV_SHUTDOWN_GUID,
  102. .perf_device = false,
  103. .allowed_in_isolated = true,
  104. },
  105. /* File copy */
  106. { .dev_type = HV_FCOPY,
  107. HV_FCOPY_GUID,
  108. .perf_device = false,
  109. .allowed_in_isolated = false,
  110. },
  111. /* Backup */
  112. { .dev_type = HV_BACKUP,
  113. HV_VSS_GUID,
  114. .perf_device = false,
  115. .allowed_in_isolated = false,
  116. },
  117. /* Dynamic Memory */
  118. { .dev_type = HV_DM,
  119. HV_DM_GUID,
  120. .perf_device = false,
  121. .allowed_in_isolated = false,
  122. },
  123. /* Unknown GUID */
  124. { .dev_type = HV_UNKNOWN,
  125. .perf_device = false,
  126. .allowed_in_isolated = false,
  127. },
  128. };
  129. static const struct {
  130. guid_t guid;
  131. } vmbus_unsupported_devs[] = {
  132. { HV_AVMA1_GUID },
  133. { HV_AVMA2_GUID },
  134. { HV_RDV_GUID },
  135. { HV_IMC_GUID },
  136. };
  137. /*
  138. * The rescinded channel may be blocked waiting for a response from the host;
  139. * take care of that.
  140. */
  141. static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
  142. {
  143. struct vmbus_channel_msginfo *msginfo;
  144. unsigned long flags;
  145. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  146. channel->rescind = true;
  147. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  148. msglistentry) {
  149. if (msginfo->waiting_channel == channel) {
  150. complete(&msginfo->waitevent);
  151. break;
  152. }
  153. }
  154. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  155. }
  156. static bool is_unsupported_vmbus_devs(const guid_t *guid)
  157. {
  158. int i;
  159. for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
  160. if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
  161. return true;
  162. return false;
  163. }
  164. static u16 hv_get_dev_type(const struct vmbus_channel *channel)
  165. {
  166. const guid_t *guid = &channel->offermsg.offer.if_type;
  167. u16 i;
  168. if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
  169. return HV_UNKNOWN;
  170. for (i = HV_IDE; i < HV_UNKNOWN; i++) {
  171. if (guid_equal(guid, &vmbus_devs[i].guid))
  172. return i;
  173. }
  174. pr_info("Unknown GUID: %pUl\n", guid);
  175. return i;
  176. }
  177. /**
  178. * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
  179. * @icmsghdrp: Pointer to msg header structure
  180. * @buf: Raw buffer channel data
  181. * @buflen: Length of the raw buffer channel data.
  182. * @fw_version: The framework versions we can support.
  183. * @fw_vercnt: The size of @fw_version.
  184. * @srv_version: The service versions we can support.
  185. * @srv_vercnt: The size of @srv_version.
  186. * @nego_fw_version: The selected framework version.
  187. * @nego_srv_version: The selected service version.
  188. *
  189. * Note: Versions are given in decreasing order.
  190. *
  191. * Set up and fill in default negotiate response message.
  192. * Mainly used by Hyper-V drivers.
  193. */
  194. bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
  195. u32 buflen, const int *fw_version, int fw_vercnt,
  196. const int *srv_version, int srv_vercnt,
  197. int *nego_fw_version, int *nego_srv_version)
  198. {
  199. int icframe_major, icframe_minor;
  200. int icmsg_major, icmsg_minor;
  201. int fw_major, fw_minor;
  202. int srv_major, srv_minor;
  203. int i, j;
  204. bool found_match = false;
  205. struct icmsg_negotiate *negop;
  206. /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
  207. if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
  208. pr_err_ratelimited("Invalid icmsg negotiate\n");
  209. return false;
  210. }
  211. icmsghdrp->icmsgsize = 0x10;
  212. negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
  213. icframe_major = negop->icframe_vercnt;
  214. icframe_minor = 0;
  215. icmsg_major = negop->icmsg_vercnt;
  216. icmsg_minor = 0;
  217. /* Validate negop packet */
  218. if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  219. icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  220. ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
  221. pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
  222. icframe_major, icmsg_major);
  223. goto fw_error;
  224. }
  225. /*
  226. * Select the framework version number we will
  227. * support.
  228. */
  229. for (i = 0; i < fw_vercnt; i++) {
  230. fw_major = (fw_version[i] >> 16);
  231. fw_minor = (fw_version[i] & 0xFFFF);
  232. for (j = 0; j < negop->icframe_vercnt; j++) {
  233. if ((negop->icversion_data[j].major == fw_major) &&
  234. (negop->icversion_data[j].minor == fw_minor)) {
  235. icframe_major = negop->icversion_data[j].major;
  236. icframe_minor = negop->icversion_data[j].minor;
  237. found_match = true;
  238. break;
  239. }
  240. }
  241. if (found_match)
  242. break;
  243. }
  244. if (!found_match)
  245. goto fw_error;
  246. found_match = false;
  247. for (i = 0; i < srv_vercnt; i++) {
  248. srv_major = (srv_version[i] >> 16);
  249. srv_minor = (srv_version[i] & 0xFFFF);
  250. for (j = negop->icframe_vercnt;
  251. (j < negop->icframe_vercnt + negop->icmsg_vercnt);
  252. j++) {
  253. if ((negop->icversion_data[j].major == srv_major) &&
  254. (negop->icversion_data[j].minor == srv_minor)) {
  255. icmsg_major = negop->icversion_data[j].major;
  256. icmsg_minor = negop->icversion_data[j].minor;
  257. found_match = true;
  258. break;
  259. }
  260. }
  261. if (found_match)
  262. break;
  263. }
  264. /*
  265. * Respond with the framework and service
  266. * version numbers we can support.
  267. */
  268. fw_error:
  269. if (!found_match) {
  270. negop->icframe_vercnt = 0;
  271. negop->icmsg_vercnt = 0;
  272. } else {
  273. negop->icframe_vercnt = 1;
  274. negop->icmsg_vercnt = 1;
  275. }
  276. if (nego_fw_version)
  277. *nego_fw_version = (icframe_major << 16) | icframe_minor;
  278. if (nego_srv_version)
  279. *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
  280. negop->icversion_data[0].major = icframe_major;
  281. negop->icversion_data[0].minor = icframe_minor;
  282. negop->icversion_data[1].major = icmsg_major;
  283. negop->icversion_data[1].minor = icmsg_minor;
  284. return found_match;
  285. }
  286. EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  287. /*
  288. * alloc_channel - Allocate and initialize a vmbus channel object
  289. */
  290. static struct vmbus_channel *alloc_channel(void)
  291. {
  292. struct vmbus_channel *channel;
  293. channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
  294. if (!channel)
  295. return NULL;
  296. spin_lock_init(&channel->sched_lock);
  297. init_completion(&channel->rescind_event);
  298. INIT_LIST_HEAD(&channel->sc_list);
  299. tasklet_init(&channel->callback_event,
  300. vmbus_on_event, (unsigned long)channel);
  301. hv_ringbuffer_pre_init(channel);
  302. return channel;
  303. }
  304. /*
  305. * free_channel - Release the resources used by the vmbus channel object
  306. */
  307. static void free_channel(struct vmbus_channel *channel)
  308. {
  309. tasklet_kill(&channel->callback_event);
  310. vmbus_remove_channel_attr_group(channel);
  311. kobject_put(&channel->kobj);
  312. }
  313. void vmbus_channel_map_relid(struct vmbus_channel *channel)
  314. {
  315. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  316. return;
  317. /*
  318. * The mapping of the channel's relid is visible from the CPUs that
  319. * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
  320. * execute:
  321. *
  322. * (a) In the "normal (i.e., not resuming from hibernation)" path,
  323. * the full barrier in virt_store_mb() guarantees that the store
  324. * is propagated to all CPUs before the add_channel_work work
  325. * is queued. In turn, add_channel_work is queued before the
  326. * channel's ring buffer is allocated/initialized and the
  327. * OPENCHANNEL message for the channel is sent in vmbus_open().
  328. * Hyper-V won't start sending the interrupts for the channel
  329. * before the OPENCHANNEL message is acked. The memory barrier
  330. * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
  331. * that vmbus_chan_sched() must find the channel's relid in
  332. * recv_int_page before retrieving the channel pointer from the
  333. * array of channels.
  334. *
  335. * (b) In the "resuming from hibernation" path, the virt_store_mb()
  336. * guarantees that the store is propagated to all CPUs before
  337. * the VMBus connection is marked as ready for the resume event
  338. * (cf. check_ready_for_resume_event()). The interrupt handler
  339. * of the VMBus driver and vmbus_chan_sched() can not run before
  340. * vmbus_bus_resume() has completed execution (cf. resume_noirq).
  341. */
  342. virt_store_mb(
  343. vmbus_connection.channels[channel->offermsg.child_relid],
  344. channel);
  345. }
  346. void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
  347. {
  348. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  349. return;
  350. WRITE_ONCE(
  351. vmbus_connection.channels[channel->offermsg.child_relid],
  352. NULL);
  353. }
  354. static void vmbus_release_relid(u32 relid)
  355. {
  356. struct vmbus_channel_relid_released msg;
  357. int ret;
  358. memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
  359. msg.child_relid = relid;
  360. msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
  361. ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
  362. true);
  363. trace_vmbus_release_relid(&msg, ret);
  364. }
  365. void hv_process_channel_removal(struct vmbus_channel *channel)
  366. {
  367. lockdep_assert_held(&vmbus_connection.channel_mutex);
  368. BUG_ON(!channel->rescind);
  369. /*
  370. * hv_process_channel_removal() could find INVALID_RELID only for
  371. * hv_sock channels. See the inline comments in vmbus_onoffer().
  372. */
  373. WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
  374. !is_hvsock_channel(channel));
  375. /*
  376. * Upon suspend, an in-use hv_sock channel is removed from the array of
  377. * channels and the relid is invalidated. After hibernation, when the
  378. * user-space application destroys the channel, it's unnecessary and
  379. * unsafe to remove the channel from the array of channels. See also
  380. * the inline comments before the call of vmbus_release_relid() below.
  381. */
  382. if (channel->offermsg.child_relid != INVALID_RELID)
  383. vmbus_channel_unmap_relid(channel);
  384. if (channel->primary_channel == NULL)
  385. list_del(&channel->listentry);
  386. else
  387. list_del(&channel->sc_list);
  388. /*
  389. * If this is a "perf" channel, updates the hv_numa_map[] masks so that
  390. * init_vp_index() can (re-)use the CPU.
  391. */
  392. if (hv_is_perf_channel(channel))
  393. hv_clear_allocated_cpu(channel->target_cpu);
  394. /*
  395. * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
  396. * the relid is invalidated; after hibernation, when the user-space app
  397. * destroys the channel, the relid is INVALID_RELID, and in this case
  398. * it's unnecessary and unsafe to release the old relid, since the same
  399. * relid can refer to a completely different channel now.
  400. */
  401. if (channel->offermsg.child_relid != INVALID_RELID)
  402. vmbus_release_relid(channel->offermsg.child_relid);
  403. free_channel(channel);
  404. }
  405. void vmbus_free_channels(void)
  406. {
  407. struct vmbus_channel *channel, *tmp;
  408. list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
  409. listentry) {
  410. /* hv_process_channel_removal() needs this */
  411. channel->rescind = true;
  412. vmbus_device_unregister(channel->device_obj);
  413. }
  414. }
  415. /* Note: the function can run concurrently for primary/sub channels. */
  416. static void vmbus_add_channel_work(struct work_struct *work)
  417. {
  418. struct vmbus_channel *newchannel =
  419. container_of(work, struct vmbus_channel, add_channel_work);
  420. struct vmbus_channel *primary_channel = newchannel->primary_channel;
  421. int ret;
  422. /*
  423. * This state is used to indicate a successful open
  424. * so that when we do close the channel normally, we
  425. * can cleanup properly.
  426. */
  427. newchannel->state = CHANNEL_OPEN_STATE;
  428. if (primary_channel != NULL) {
  429. /* newchannel is a sub-channel. */
  430. struct hv_device *dev = primary_channel->device_obj;
  431. if (vmbus_add_channel_kobj(dev, newchannel))
  432. goto err_deq_chan;
  433. if (primary_channel->sc_creation_callback != NULL)
  434. primary_channel->sc_creation_callback(newchannel);
  435. newchannel->probe_done = true;
  436. return;
  437. }
  438. /*
  439. * Start the process of binding the primary channel to the driver
  440. */
  441. newchannel->device_obj = vmbus_device_create(
  442. &newchannel->offermsg.offer.if_type,
  443. &newchannel->offermsg.offer.if_instance,
  444. newchannel);
  445. if (!newchannel->device_obj)
  446. goto err_deq_chan;
  447. newchannel->device_obj->device_id = newchannel->device_id;
  448. /*
  449. * Add the new device to the bus. This will kick off device-driver
  450. * binding which eventually invokes the device driver's AddDevice()
  451. * method.
  452. *
  453. * If vmbus_device_register() fails, the 'device_obj' is freed in
  454. * vmbus_device_release() as called by device_unregister() in the
  455. * error path of vmbus_device_register(). In the outside error
  456. * path, there's no need to free it.
  457. */
  458. ret = vmbus_device_register(newchannel->device_obj);
  459. if (ret != 0) {
  460. pr_err("unable to add child device object (relid %d)\n",
  461. newchannel->offermsg.child_relid);
  462. goto err_deq_chan;
  463. }
  464. newchannel->probe_done = true;
  465. return;
  466. err_deq_chan:
  467. mutex_lock(&vmbus_connection.channel_mutex);
  468. /*
  469. * We need to set the flag, otherwise
  470. * vmbus_onoffer_rescind() can be blocked.
  471. */
  472. newchannel->probe_done = true;
  473. if (primary_channel == NULL)
  474. list_del(&newchannel->listentry);
  475. else
  476. list_del(&newchannel->sc_list);
  477. /* vmbus_process_offer() has mapped the channel. */
  478. vmbus_channel_unmap_relid(newchannel);
  479. mutex_unlock(&vmbus_connection.channel_mutex);
  480. vmbus_release_relid(newchannel->offermsg.child_relid);
  481. free_channel(newchannel);
  482. }
  483. /*
  484. * vmbus_process_offer - Process the offer by creating a channel/device
  485. * associated with this offer
  486. */
  487. static void vmbus_process_offer(struct vmbus_channel *newchannel)
  488. {
  489. struct vmbus_channel *channel;
  490. struct workqueue_struct *wq;
  491. bool fnew = true;
  492. /*
  493. * Synchronize vmbus_process_offer() and CPU hotplugging:
  494. *
  495. * CPU1 CPU2
  496. *
  497. * [vmbus_process_offer()] [Hot removal of the CPU]
  498. *
  499. * CPU_READ_LOCK CPUS_WRITE_LOCK
  500. * LOAD cpu_online_mask SEARCH chn_list
  501. * STORE target_cpu LOAD target_cpu
  502. * INSERT chn_list STORE cpu_online_mask
  503. * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
  504. *
  505. * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
  506. * CPU2's SEARCH from *not* seeing CPU1's INSERT
  507. *
  508. * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
  509. * CPU2's LOAD from *not* seing CPU1's STORE
  510. */
  511. cpus_read_lock();
  512. /*
  513. * Serializes the modifications of the chn_list list as well as
  514. * the accesses to next_numa_node_id in init_vp_index().
  515. */
  516. mutex_lock(&vmbus_connection.channel_mutex);
  517. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  518. if (guid_equal(&channel->offermsg.offer.if_type,
  519. &newchannel->offermsg.offer.if_type) &&
  520. guid_equal(&channel->offermsg.offer.if_instance,
  521. &newchannel->offermsg.offer.if_instance)) {
  522. fnew = false;
  523. newchannel->primary_channel = channel;
  524. break;
  525. }
  526. }
  527. init_vp_index(newchannel);
  528. /* Remember the channels that should be cleaned up upon suspend. */
  529. if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
  530. atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
  531. /*
  532. * Now that we have acquired the channel_mutex,
  533. * we can release the potentially racing rescind thread.
  534. */
  535. atomic_dec(&vmbus_connection.offer_in_progress);
  536. if (fnew) {
  537. list_add_tail(&newchannel->listentry,
  538. &vmbus_connection.chn_list);
  539. } else {
  540. /*
  541. * Check to see if this is a valid sub-channel.
  542. */
  543. if (newchannel->offermsg.offer.sub_channel_index == 0) {
  544. mutex_unlock(&vmbus_connection.channel_mutex);
  545. cpus_read_unlock();
  546. /*
  547. * Don't call free_channel(), because newchannel->kobj
  548. * is not initialized yet.
  549. */
  550. kfree(newchannel);
  551. WARN_ON_ONCE(1);
  552. return;
  553. }
  554. /*
  555. * Process the sub-channel.
  556. */
  557. list_add_tail(&newchannel->sc_list, &channel->sc_list);
  558. }
  559. vmbus_channel_map_relid(newchannel);
  560. mutex_unlock(&vmbus_connection.channel_mutex);
  561. cpus_read_unlock();
  562. /*
  563. * vmbus_process_offer() mustn't call channel->sc_creation_callback()
  564. * directly for sub-channels, because sc_creation_callback() ->
  565. * vmbus_open() may never get the host's response to the
  566. * OPEN_CHANNEL message (the host may rescind a channel at any time,
  567. * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
  568. * may not wake up the vmbus_open() as it's blocked due to a non-zero
  569. * vmbus_connection.offer_in_progress, and finally we have a deadlock.
  570. *
  571. * The above is also true for primary channels, if the related device
  572. * drivers use sync probing mode by default.
  573. *
  574. * And, usually the handling of primary channels and sub-channels can
  575. * depend on each other, so we should offload them to different
  576. * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
  577. * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
  578. * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
  579. * and waits for all the sub-channels to appear, but the latter
  580. * can't get the rtnl_lock and this blocks the handling of
  581. * sub-channels.
  582. */
  583. INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
  584. wq = fnew ? vmbus_connection.handle_primary_chan_wq :
  585. vmbus_connection.handle_sub_chan_wq;
  586. queue_work(wq, &newchannel->add_channel_work);
  587. }
  588. /*
  589. * Check if CPUs used by other channels of the same device.
  590. * It should only be called by init_vp_index().
  591. */
  592. static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
  593. {
  594. struct vmbus_channel *primary = chn->primary_channel;
  595. struct vmbus_channel *sc;
  596. lockdep_assert_held(&vmbus_connection.channel_mutex);
  597. if (!primary)
  598. return false;
  599. if (primary->target_cpu == cpu)
  600. return true;
  601. list_for_each_entry(sc, &primary->sc_list, sc_list)
  602. if (sc != chn && sc->target_cpu == cpu)
  603. return true;
  604. return false;
  605. }
  606. /*
  607. * We use this state to statically distribute the channel interrupt load.
  608. */
  609. static int next_numa_node_id;
  610. /*
  611. * We can statically distribute the incoming channel interrupt load
  612. * by binding a channel to VCPU.
  613. *
  614. * For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
  615. * Performance critical channels will be distributed evenly among all
  616. * the available NUMA nodes. Once the node is assigned, we will assign
  617. * the CPU based on a simple round robin scheme.
  618. */
  619. static void init_vp_index(struct vmbus_channel *channel)
  620. {
  621. bool perf_chn = hv_is_perf_channel(channel);
  622. u32 i, ncpu = num_online_cpus();
  623. cpumask_var_t available_mask;
  624. struct cpumask *allocated_mask;
  625. const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
  626. u32 target_cpu;
  627. int numa_node;
  628. if (!perf_chn ||
  629. !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
  630. cpumask_empty(hk_mask)) {
  631. /*
  632. * If the channel is not a performance critical
  633. * channel, bind it to VMBUS_CONNECT_CPU.
  634. * In case alloc_cpumask_var() fails, bind it to
  635. * VMBUS_CONNECT_CPU.
  636. * If all the cpus are isolated, bind it to
  637. * VMBUS_CONNECT_CPU.
  638. */
  639. channel->target_cpu = VMBUS_CONNECT_CPU;
  640. if (perf_chn)
  641. hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
  642. return;
  643. }
  644. for (i = 1; i <= ncpu + 1; i++) {
  645. while (true) {
  646. numa_node = next_numa_node_id++;
  647. if (numa_node == nr_node_ids) {
  648. next_numa_node_id = 0;
  649. continue;
  650. }
  651. if (cpumask_empty(cpumask_of_node(numa_node)))
  652. continue;
  653. break;
  654. }
  655. allocated_mask = &hv_context.hv_numa_map[numa_node];
  656. retry:
  657. cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
  658. cpumask_and(available_mask, available_mask, hk_mask);
  659. if (cpumask_empty(available_mask)) {
  660. /*
  661. * We have cycled through all the CPUs in the node;
  662. * reset the allocated map.
  663. */
  664. cpumask_clear(allocated_mask);
  665. goto retry;
  666. }
  667. target_cpu = cpumask_first(available_mask);
  668. cpumask_set_cpu(target_cpu, allocated_mask);
  669. if (channel->offermsg.offer.sub_channel_index >= ncpu ||
  670. i > ncpu || !hv_cpuself_used(target_cpu, channel))
  671. break;
  672. }
  673. channel->target_cpu = target_cpu;
  674. free_cpumask_var(available_mask);
  675. }
  676. #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */
  677. #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */
  678. #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
  679. #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */
  680. #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
  681. static void vmbus_wait_for_unload(void)
  682. {
  683. int cpu;
  684. void *page_addr;
  685. struct hv_message *msg;
  686. struct vmbus_channel_message_header *hdr;
  687. u32 message_type, i;
  688. /*
  689. * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
  690. * used for initial contact or to CPU0 depending on host version. When
  691. * we're crashing on a different CPU let's hope that IRQ handler on
  692. * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
  693. * functional and vmbus_unload_response() will complete
  694. * vmbus_connection.unload_event. If not, the last thing we can do is
  695. * read message pages for all CPUs directly.
  696. *
  697. * Wait up to 100 seconds since an Azure host must writeback any dirty
  698. * data in its disk cache before the VMbus UNLOAD request will
  699. * complete. This flushing has been empirically observed to take up
  700. * to 50 seconds in cases with a lot of dirty data, so allow additional
  701. * leeway and for inaccuracies in mdelay(). But eventually time out so
  702. * that the panic path can't get hung forever in case the response
  703. * message isn't seen.
  704. */
  705. for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
  706. if (completion_done(&vmbus_connection.unload_event))
  707. goto completed;
  708. for_each_present_cpu(cpu) {
  709. struct hv_per_cpu_context *hv_cpu
  710. = per_cpu_ptr(hv_context.cpu_context, cpu);
  711. /*
  712. * In a CoCo VM the synic_message_page is not allocated
  713. * in hv_synic_alloc(). Instead it is set/cleared in
  714. * hv_synic_enable_regs() and hv_synic_disable_regs()
  715. * such that it is set only when the CPU is online. If
  716. * not all present CPUs are online, the message page
  717. * might be NULL, so skip such CPUs.
  718. */
  719. page_addr = hv_cpu->synic_message_page;
  720. if (!page_addr)
  721. continue;
  722. msg = (struct hv_message *)page_addr
  723. + VMBUS_MESSAGE_SINT;
  724. message_type = READ_ONCE(msg->header.message_type);
  725. if (message_type == HVMSG_NONE)
  726. continue;
  727. hdr = (struct vmbus_channel_message_header *)
  728. msg->u.payload;
  729. if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
  730. complete(&vmbus_connection.unload_event);
  731. vmbus_signal_eom(msg, message_type);
  732. }
  733. /*
  734. * Give a notice periodically so someone watching the
  735. * serial output won't think it is completely hung.
  736. */
  737. if (!(i % UNLOAD_MSG_LOOPS))
  738. pr_notice("Waiting for VMBus UNLOAD to complete\n");
  739. mdelay(UNLOAD_DELAY_UNIT_MS);
  740. }
  741. pr_err("Continuing even though VMBus UNLOAD did not complete\n");
  742. completed:
  743. /*
  744. * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
  745. * maybe-pending messages on all CPUs to be able to receive new
  746. * messages after we reconnect.
  747. */
  748. for_each_present_cpu(cpu) {
  749. struct hv_per_cpu_context *hv_cpu
  750. = per_cpu_ptr(hv_context.cpu_context, cpu);
  751. page_addr = hv_cpu->synic_message_page;
  752. if (!page_addr)
  753. continue;
  754. msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
  755. msg->header.message_type = HVMSG_NONE;
  756. }
  757. }
  758. /*
  759. * vmbus_unload_response - Handler for the unload response.
  760. */
  761. static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
  762. {
  763. /*
  764. * This is a global event; just wakeup the waiting thread.
  765. * Once we successfully unload, we can cleanup the monitor state.
  766. *
  767. * NB. A malicious or compromised Hyper-V could send a spurious
  768. * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
  769. * of the complete() below. Make sure that unload_event has been
  770. * initialized by the time this complete() is executed.
  771. */
  772. complete(&vmbus_connection.unload_event);
  773. }
  774. void vmbus_initiate_unload(bool crash)
  775. {
  776. struct vmbus_channel_message_header hdr;
  777. if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
  778. return;
  779. /* Pre-Win2012R2 hosts don't support reconnect */
  780. if (vmbus_proto_version < VERSION_WIN8_1)
  781. return;
  782. reinit_completion(&vmbus_connection.unload_event);
  783. memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
  784. hdr.msgtype = CHANNELMSG_UNLOAD;
  785. vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
  786. !crash);
  787. /*
  788. * vmbus_initiate_unload() is also called on crash and the crash can be
  789. * happening in an interrupt context, where scheduling is impossible.
  790. */
  791. if (!crash)
  792. wait_for_completion(&vmbus_connection.unload_event);
  793. else
  794. vmbus_wait_for_unload();
  795. }
  796. static void check_ready_for_resume_event(void)
  797. {
  798. /*
  799. * If all the old primary channels have been fixed up, then it's safe
  800. * to resume.
  801. */
  802. if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
  803. complete(&vmbus_connection.ready_for_resume_event);
  804. }
  805. static void vmbus_setup_channel_state(struct vmbus_channel *channel,
  806. struct vmbus_channel_offer_channel *offer)
  807. {
  808. /*
  809. * Setup state for signalling the host.
  810. */
  811. channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
  812. channel->is_dedicated_interrupt =
  813. (offer->is_dedicated_interrupt != 0);
  814. channel->sig_event = offer->connection_id;
  815. memcpy(&channel->offermsg, offer,
  816. sizeof(struct vmbus_channel_offer_channel));
  817. channel->monitor_grp = (u8)offer->monitorid / 32;
  818. channel->monitor_bit = (u8)offer->monitorid % 32;
  819. channel->device_id = hv_get_dev_type(channel);
  820. }
  821. /*
  822. * find_primary_channel_by_offer - Get the channel object given the new offer.
  823. * This is only used in the resume path of hibernation.
  824. */
  825. static struct vmbus_channel *
  826. find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
  827. {
  828. struct vmbus_channel *channel = NULL, *iter;
  829. const guid_t *inst1, *inst2;
  830. /* Ignore sub-channel offers. */
  831. if (offer->offer.sub_channel_index != 0)
  832. return NULL;
  833. mutex_lock(&vmbus_connection.channel_mutex);
  834. list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
  835. inst1 = &iter->offermsg.offer.if_instance;
  836. inst2 = &offer->offer.if_instance;
  837. if (guid_equal(inst1, inst2)) {
  838. channel = iter;
  839. break;
  840. }
  841. }
  842. mutex_unlock(&vmbus_connection.channel_mutex);
  843. return channel;
  844. }
  845. static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
  846. {
  847. const guid_t *guid = &offer->offer.if_type;
  848. u16 i;
  849. if (!hv_is_isolation_supported())
  850. return true;
  851. if (is_hvsock_offer(offer))
  852. return true;
  853. for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
  854. if (guid_equal(guid, &vmbus_devs[i].guid))
  855. return vmbus_devs[i].allowed_in_isolated;
  856. }
  857. return false;
  858. }
  859. /*
  860. * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  861. *
  862. */
  863. static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
  864. {
  865. struct vmbus_channel_offer_channel *offer;
  866. struct vmbus_channel *oldchannel, *newchannel;
  867. size_t offer_sz;
  868. offer = (struct vmbus_channel_offer_channel *)hdr;
  869. trace_vmbus_onoffer(offer);
  870. if (!vmbus_is_valid_offer(offer)) {
  871. pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
  872. offer->child_relid);
  873. atomic_dec(&vmbus_connection.offer_in_progress);
  874. return;
  875. }
  876. oldchannel = find_primary_channel_by_offer(offer);
  877. if (oldchannel != NULL) {
  878. /*
  879. * We're resuming from hibernation: all the sub-channel and
  880. * hv_sock channels we had before the hibernation should have
  881. * been cleaned up, and now we must be seeing a re-offered
  882. * primary channel that we had before the hibernation.
  883. */
  884. /*
  885. * { Initially: channel relid = INVALID_RELID,
  886. * channels[valid_relid] = NULL }
  887. *
  888. * CPU1 CPU2
  889. *
  890. * [vmbus_onoffer()] [vmbus_device_release()]
  891. *
  892. * LOCK channel_mutex LOCK channel_mutex
  893. * STORE channel relid = valid_relid LOAD r1 = channel relid
  894. * MAP_RELID channel if (r1 != INVALID_RELID)
  895. * UNLOCK channel_mutex UNMAP_RELID channel
  896. * UNLOCK channel_mutex
  897. *
  898. * Forbids: r1 == valid_relid &&
  899. * channels[valid_relid] == channel
  900. *
  901. * Note. r1 can be INVALID_RELID only for an hv_sock channel.
  902. * None of the hv_sock channels which were present before the
  903. * suspend are re-offered upon the resume. See the WARN_ON()
  904. * in hv_process_channel_removal().
  905. */
  906. mutex_lock(&vmbus_connection.channel_mutex);
  907. atomic_dec(&vmbus_connection.offer_in_progress);
  908. WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
  909. /* Fix up the relid. */
  910. oldchannel->offermsg.child_relid = offer->child_relid;
  911. offer_sz = sizeof(*offer);
  912. if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
  913. /*
  914. * This is not an error, since the host can also change
  915. * the other field(s) of the offer, e.g. on WS RS5
  916. * (Build 17763), the offer->connection_id of the
  917. * Mellanox VF vmbus device can change when the host
  918. * reoffers the device upon resume.
  919. */
  920. pr_debug("vmbus offer changed: relid=%d\n",
  921. offer->child_relid);
  922. print_hex_dump_debug("Old vmbus offer: ",
  923. DUMP_PREFIX_OFFSET, 16, 4,
  924. &oldchannel->offermsg, offer_sz,
  925. false);
  926. print_hex_dump_debug("New vmbus offer: ",
  927. DUMP_PREFIX_OFFSET, 16, 4,
  928. offer, offer_sz, false);
  929. /* Fix up the old channel. */
  930. vmbus_setup_channel_state(oldchannel, offer);
  931. }
  932. /* Add the channel back to the array of channels. */
  933. vmbus_channel_map_relid(oldchannel);
  934. check_ready_for_resume_event();
  935. mutex_unlock(&vmbus_connection.channel_mutex);
  936. return;
  937. }
  938. /* Allocate the channel object and save this offer. */
  939. newchannel = alloc_channel();
  940. if (!newchannel) {
  941. vmbus_release_relid(offer->child_relid);
  942. atomic_dec(&vmbus_connection.offer_in_progress);
  943. pr_err("Unable to allocate channel object\n");
  944. return;
  945. }
  946. vmbus_setup_channel_state(newchannel, offer);
  947. vmbus_process_offer(newchannel);
  948. }
  949. static void check_ready_for_suspend_event(void)
  950. {
  951. /*
  952. * If all the sub-channels or hv_sock channels have been cleaned up,
  953. * then it's safe to suspend.
  954. */
  955. if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
  956. complete(&vmbus_connection.ready_for_suspend_event);
  957. }
  958. /*
  959. * vmbus_onoffer_rescind - Rescind offer handler.
  960. *
  961. * We queue a work item to process this offer synchronously
  962. */
  963. static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
  964. {
  965. struct vmbus_channel_rescind_offer *rescind;
  966. struct vmbus_channel *channel;
  967. struct device *dev;
  968. bool clean_up_chan_for_suspend;
  969. rescind = (struct vmbus_channel_rescind_offer *)hdr;
  970. trace_vmbus_onoffer_rescind(rescind);
  971. /*
  972. * The offer msg and the corresponding rescind msg
  973. * from the host are guranteed to be ordered -
  974. * offer comes in first and then the rescind.
  975. * Since we process these events in work elements,
  976. * and with preemption, we may end up processing
  977. * the events out of order. We rely on the synchronization
  978. * provided by offer_in_progress and by channel_mutex for
  979. * ordering these events:
  980. *
  981. * { Initially: offer_in_progress = 1 }
  982. *
  983. * CPU1 CPU2
  984. *
  985. * [vmbus_onoffer()] [vmbus_onoffer_rescind()]
  986. *
  987. * LOCK channel_mutex WAIT_ON offer_in_progress == 0
  988. * DECREMENT offer_in_progress LOCK channel_mutex
  989. * STORE channels[] LOAD channels[]
  990. * UNLOCK channel_mutex UNLOCK channel_mutex
  991. *
  992. * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
  993. */
  994. while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
  995. /*
  996. * We wait here until any channel offer is currently
  997. * being processed.
  998. */
  999. msleep(1);
  1000. }
  1001. mutex_lock(&vmbus_connection.channel_mutex);
  1002. channel = relid2channel(rescind->child_relid);
  1003. if (channel != NULL) {
  1004. /*
  1005. * Guarantee that no other instance of vmbus_onoffer_rescind()
  1006. * has got a reference to the channel object. Synchronize on
  1007. * &vmbus_connection.channel_mutex.
  1008. */
  1009. if (channel->rescind_ref) {
  1010. mutex_unlock(&vmbus_connection.channel_mutex);
  1011. return;
  1012. }
  1013. channel->rescind_ref = true;
  1014. }
  1015. mutex_unlock(&vmbus_connection.channel_mutex);
  1016. if (channel == NULL) {
  1017. /*
  1018. * We failed in processing the offer message;
  1019. * we would have cleaned up the relid in that
  1020. * failure path.
  1021. */
  1022. return;
  1023. }
  1024. clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
  1025. is_sub_channel(channel);
  1026. /*
  1027. * Before setting channel->rescind in vmbus_rescind_cleanup(), we
  1028. * should make sure the channel callback is not running any more.
  1029. */
  1030. vmbus_reset_channel_cb(channel);
  1031. /*
  1032. * Now wait for offer handling to complete.
  1033. */
  1034. vmbus_rescind_cleanup(channel);
  1035. while (READ_ONCE(channel->probe_done) == false) {
  1036. /*
  1037. * We wait here until any channel offer is currently
  1038. * being processed.
  1039. */
  1040. msleep(1);
  1041. }
  1042. /*
  1043. * At this point, the rescind handling can proceed safely.
  1044. */
  1045. if (channel->device_obj) {
  1046. if (channel->chn_rescind_callback) {
  1047. channel->chn_rescind_callback(channel);
  1048. if (clean_up_chan_for_suspend)
  1049. check_ready_for_suspend_event();
  1050. return;
  1051. }
  1052. /*
  1053. * We will have to unregister this device from the
  1054. * driver core.
  1055. */
  1056. dev = get_device(&channel->device_obj->device);
  1057. if (dev) {
  1058. vmbus_device_unregister(channel->device_obj);
  1059. put_device(dev);
  1060. }
  1061. } else if (channel->primary_channel != NULL) {
  1062. /*
  1063. * Sub-channel is being rescinded. Following is the channel
  1064. * close sequence when initiated from the driveri (refer to
  1065. * vmbus_close() for details):
  1066. * 1. Close all sub-channels first
  1067. * 2. Then close the primary channel.
  1068. */
  1069. mutex_lock(&vmbus_connection.channel_mutex);
  1070. if (channel->state == CHANNEL_OPEN_STATE) {
  1071. /*
  1072. * The channel is currently not open;
  1073. * it is safe for us to cleanup the channel.
  1074. */
  1075. hv_process_channel_removal(channel);
  1076. } else {
  1077. complete(&channel->rescind_event);
  1078. }
  1079. mutex_unlock(&vmbus_connection.channel_mutex);
  1080. }
  1081. /* The "channel" may have been freed. Do not access it any longer. */
  1082. if (clean_up_chan_for_suspend)
  1083. check_ready_for_suspend_event();
  1084. }
  1085. void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
  1086. {
  1087. BUG_ON(!is_hvsock_channel(channel));
  1088. /* We always get a rescind msg when a connection is closed. */
  1089. while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
  1090. msleep(1);
  1091. vmbus_device_unregister(channel->device_obj);
  1092. }
  1093. EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
  1094. /*
  1095. * vmbus_onoffers_delivered -
  1096. * This is invoked when all offers have been delivered.
  1097. *
  1098. * Nothing to do here.
  1099. */
  1100. static void vmbus_onoffers_delivered(
  1101. struct vmbus_channel_message_header *hdr)
  1102. {
  1103. }
  1104. /*
  1105. * vmbus_onopen_result - Open result handler.
  1106. *
  1107. * This is invoked when we received a response to our channel open request.
  1108. * Find the matching request, copy the response and signal the requesting
  1109. * thread.
  1110. */
  1111. static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
  1112. {
  1113. struct vmbus_channel_open_result *result;
  1114. struct vmbus_channel_msginfo *msginfo;
  1115. struct vmbus_channel_message_header *requestheader;
  1116. struct vmbus_channel_open_channel *openmsg;
  1117. unsigned long flags;
  1118. result = (struct vmbus_channel_open_result *)hdr;
  1119. trace_vmbus_onopen_result(result);
  1120. /*
  1121. * Find the open msg, copy the result and signal/unblock the wait event
  1122. */
  1123. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1124. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1125. msglistentry) {
  1126. requestheader =
  1127. (struct vmbus_channel_message_header *)msginfo->msg;
  1128. if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
  1129. openmsg =
  1130. (struct vmbus_channel_open_channel *)msginfo->msg;
  1131. if (openmsg->child_relid == result->child_relid &&
  1132. openmsg->openid == result->openid) {
  1133. memcpy(&msginfo->response.open_result,
  1134. result,
  1135. sizeof(
  1136. struct vmbus_channel_open_result));
  1137. complete(&msginfo->waitevent);
  1138. break;
  1139. }
  1140. }
  1141. }
  1142. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1143. }
  1144. /*
  1145. * vmbus_ongpadl_created - GPADL created handler.
  1146. *
  1147. * This is invoked when we received a response to our gpadl create request.
  1148. * Find the matching request, copy the response and signal the requesting
  1149. * thread.
  1150. */
  1151. static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
  1152. {
  1153. struct vmbus_channel_gpadl_created *gpadlcreated;
  1154. struct vmbus_channel_msginfo *msginfo;
  1155. struct vmbus_channel_message_header *requestheader;
  1156. struct vmbus_channel_gpadl_header *gpadlheader;
  1157. unsigned long flags;
  1158. gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
  1159. trace_vmbus_ongpadl_created(gpadlcreated);
  1160. /*
  1161. * Find the establish msg, copy the result and signal/unblock the wait
  1162. * event
  1163. */
  1164. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1165. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1166. msglistentry) {
  1167. requestheader =
  1168. (struct vmbus_channel_message_header *)msginfo->msg;
  1169. if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
  1170. gpadlheader =
  1171. (struct vmbus_channel_gpadl_header *)requestheader;
  1172. if ((gpadlcreated->child_relid ==
  1173. gpadlheader->child_relid) &&
  1174. (gpadlcreated->gpadl == gpadlheader->gpadl)) {
  1175. memcpy(&msginfo->response.gpadl_created,
  1176. gpadlcreated,
  1177. sizeof(
  1178. struct vmbus_channel_gpadl_created));
  1179. complete(&msginfo->waitevent);
  1180. break;
  1181. }
  1182. }
  1183. }
  1184. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1185. }
  1186. /*
  1187. * vmbus_onmodifychannel_response - Modify Channel response handler.
  1188. *
  1189. * This is invoked when we received a response to our channel modify request.
  1190. * Find the matching request, copy the response and signal the requesting thread.
  1191. */
  1192. static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
  1193. {
  1194. struct vmbus_channel_modifychannel_response *response;
  1195. struct vmbus_channel_msginfo *msginfo;
  1196. unsigned long flags;
  1197. response = (struct vmbus_channel_modifychannel_response *)hdr;
  1198. trace_vmbus_onmodifychannel_response(response);
  1199. /*
  1200. * Find the modify msg, copy the response and signal/unblock the wait event.
  1201. */
  1202. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1203. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
  1204. struct vmbus_channel_message_header *responseheader =
  1205. (struct vmbus_channel_message_header *)msginfo->msg;
  1206. if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
  1207. struct vmbus_channel_modifychannel *modifymsg;
  1208. modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
  1209. if (modifymsg->child_relid == response->child_relid) {
  1210. memcpy(&msginfo->response.modify_response, response,
  1211. sizeof(*response));
  1212. complete(&msginfo->waitevent);
  1213. break;
  1214. }
  1215. }
  1216. }
  1217. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1218. }
  1219. /*
  1220. * vmbus_ongpadl_torndown - GPADL torndown handler.
  1221. *
  1222. * This is invoked when we received a response to our gpadl teardown request.
  1223. * Find the matching request, copy the response and signal the requesting
  1224. * thread.
  1225. */
  1226. static void vmbus_ongpadl_torndown(
  1227. struct vmbus_channel_message_header *hdr)
  1228. {
  1229. struct vmbus_channel_gpadl_torndown *gpadl_torndown;
  1230. struct vmbus_channel_msginfo *msginfo;
  1231. struct vmbus_channel_message_header *requestheader;
  1232. struct vmbus_channel_gpadl_teardown *gpadl_teardown;
  1233. unsigned long flags;
  1234. gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
  1235. trace_vmbus_ongpadl_torndown(gpadl_torndown);
  1236. /*
  1237. * Find the open msg, copy the result and signal/unblock the wait event
  1238. */
  1239. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1240. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1241. msglistentry) {
  1242. requestheader =
  1243. (struct vmbus_channel_message_header *)msginfo->msg;
  1244. if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
  1245. gpadl_teardown =
  1246. (struct vmbus_channel_gpadl_teardown *)requestheader;
  1247. if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
  1248. memcpy(&msginfo->response.gpadl_torndown,
  1249. gpadl_torndown,
  1250. sizeof(
  1251. struct vmbus_channel_gpadl_torndown));
  1252. complete(&msginfo->waitevent);
  1253. break;
  1254. }
  1255. }
  1256. }
  1257. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1258. }
  1259. /*
  1260. * vmbus_onversion_response - Version response handler
  1261. *
  1262. * This is invoked when we received a response to our initiate contact request.
  1263. * Find the matching request, copy the response and signal the requesting
  1264. * thread.
  1265. */
  1266. static void vmbus_onversion_response(
  1267. struct vmbus_channel_message_header *hdr)
  1268. {
  1269. struct vmbus_channel_msginfo *msginfo;
  1270. struct vmbus_channel_message_header *requestheader;
  1271. struct vmbus_channel_version_response *version_response;
  1272. unsigned long flags;
  1273. version_response = (struct vmbus_channel_version_response *)hdr;
  1274. trace_vmbus_onversion_response(version_response);
  1275. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1276. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1277. msglistentry) {
  1278. requestheader =
  1279. (struct vmbus_channel_message_header *)msginfo->msg;
  1280. if (requestheader->msgtype ==
  1281. CHANNELMSG_INITIATE_CONTACT) {
  1282. memcpy(&msginfo->response.version_response,
  1283. version_response,
  1284. sizeof(struct vmbus_channel_version_response));
  1285. complete(&msginfo->waitevent);
  1286. }
  1287. }
  1288. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1289. }
  1290. /* Channel message dispatch table */
  1291. const struct vmbus_channel_message_table_entry
  1292. channel_message_table[CHANNELMSG_COUNT] = {
  1293. { CHANNELMSG_INVALID, 0, NULL, 0},
  1294. { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer,
  1295. sizeof(struct vmbus_channel_offer_channel)},
  1296. { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind,
  1297. sizeof(struct vmbus_channel_rescind_offer) },
  1298. { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0},
  1299. { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0},
  1300. { CHANNELMSG_OPENCHANNEL, 0, NULL, 0},
  1301. { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result,
  1302. sizeof(struct vmbus_channel_open_result)},
  1303. { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0},
  1304. { CHANNELMSG_GPADL_HEADER, 0, NULL, 0},
  1305. { CHANNELMSG_GPADL_BODY, 0, NULL, 0},
  1306. { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created,
  1307. sizeof(struct vmbus_channel_gpadl_created)},
  1308. { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0},
  1309. { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown,
  1310. sizeof(struct vmbus_channel_gpadl_torndown) },
  1311. { CHANNELMSG_RELID_RELEASED, 0, NULL, 0},
  1312. { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0},
  1313. { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response,
  1314. sizeof(struct vmbus_channel_version_response)},
  1315. { CHANNELMSG_UNLOAD, 0, NULL, 0},
  1316. { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0},
  1317. { CHANNELMSG_18, 0, NULL, 0},
  1318. { CHANNELMSG_19, 0, NULL, 0},
  1319. { CHANNELMSG_20, 0, NULL, 0},
  1320. { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
  1321. { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
  1322. { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
  1323. { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response,
  1324. sizeof(struct vmbus_channel_modifychannel_response)},
  1325. };
  1326. /*
  1327. * vmbus_onmessage - Handler for channel protocol messages.
  1328. *
  1329. * This is invoked in the vmbus worker thread context.
  1330. */
  1331. void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
  1332. {
  1333. trace_vmbus_on_message(hdr);
  1334. /*
  1335. * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
  1336. * out of bound and the message_handler pointer can not be NULL.
  1337. */
  1338. channel_message_table[hdr->msgtype].message_handler(hdr);
  1339. }
  1340. /*
  1341. * vmbus_request_offers - Send a request to get all our pending offers.
  1342. */
  1343. int vmbus_request_offers(void)
  1344. {
  1345. struct vmbus_channel_message_header *msg;
  1346. struct vmbus_channel_msginfo *msginfo;
  1347. int ret;
  1348. msginfo = kzalloc(sizeof(*msginfo) +
  1349. sizeof(struct vmbus_channel_message_header),
  1350. GFP_KERNEL);
  1351. if (!msginfo)
  1352. return -ENOMEM;
  1353. msg = (struct vmbus_channel_message_header *)msginfo->msg;
  1354. msg->msgtype = CHANNELMSG_REQUESTOFFERS;
  1355. ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
  1356. true);
  1357. trace_vmbus_request_offers(ret);
  1358. if (ret != 0) {
  1359. pr_err("Unable to request offers - %d\n", ret);
  1360. goto cleanup;
  1361. }
  1362. cleanup:
  1363. kfree(msginfo);
  1364. return ret;
  1365. }
  1366. void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
  1367. void (*sc_cr_cb)(struct vmbus_channel *new_sc))
  1368. {
  1369. primary_channel->sc_creation_callback = sc_cr_cb;
  1370. }
  1371. EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
  1372. void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
  1373. void (*chn_rescind_cb)(struct vmbus_channel *))
  1374. {
  1375. channel->chn_rescind_callback = chn_rescind_cb;
  1376. }
  1377. EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);