hv.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <[email protected]>
  7. * Hank Janssen <[email protected]>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/io.h>
  11. #include <linux/kernel.h>
  12. #include <linux/mm.h>
  13. #include <linux/slab.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/hyperv.h>
  16. #include <linux/random.h>
  17. #include <linux/clockchips.h>
  18. #include <linux/delay.h>
  19. #include <linux/interrupt.h>
  20. #include <clocksource/hyperv_timer.h>
  21. #include <asm/mshyperv.h>
  22. #include "hyperv_vmbus.h"
  23. /* The one and only */
  24. struct hv_context hv_context;
  25. /*
  26. * hv_init - Main initialization routine.
  27. *
  28. * This routine must be called before any other routines in here are called
  29. */
  30. int hv_init(void)
  31. {
  32. hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
  33. if (!hv_context.cpu_context)
  34. return -ENOMEM;
  35. return 0;
  36. }
  37. /*
  38. * Functions for allocating and freeing memory with size and
  39. * alignment HV_HYP_PAGE_SIZE. These functions are needed because
  40. * the guest page size may not be the same as the Hyper-V page
  41. * size. We depend upon kmalloc() aligning power-of-two size
  42. * allocations to the allocation size boundary, so that the
  43. * allocated memory appears to Hyper-V as a page of the size
  44. * it expects.
  45. */
  46. void *hv_alloc_hyperv_page(void)
  47. {
  48. BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
  49. if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  50. return (void *)__get_free_page(GFP_KERNEL);
  51. else
  52. return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  53. }
  54. void *hv_alloc_hyperv_zeroed_page(void)
  55. {
  56. if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  57. return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
  58. else
  59. return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  60. }
  61. void hv_free_hyperv_page(unsigned long addr)
  62. {
  63. if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  64. free_page(addr);
  65. else
  66. kfree((void *)addr);
  67. }
  68. /*
  69. * hv_post_message - Post a message using the hypervisor message IPC.
  70. *
  71. * This involves a hypercall.
  72. */
  73. int hv_post_message(union hv_connection_id connection_id,
  74. enum hv_message_type message_type,
  75. void *payload, size_t payload_size)
  76. {
  77. struct hv_input_post_message *aligned_msg;
  78. struct hv_per_cpu_context *hv_cpu;
  79. u64 status;
  80. if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
  81. return -EMSGSIZE;
  82. hv_cpu = get_cpu_ptr(hv_context.cpu_context);
  83. aligned_msg = hv_cpu->post_msg_page;
  84. aligned_msg->connectionid = connection_id;
  85. aligned_msg->reserved = 0;
  86. aligned_msg->message_type = message_type;
  87. aligned_msg->payload_size = payload_size;
  88. memcpy((void *)aligned_msg->payload, payload, payload_size);
  89. if (hv_isolation_type_snp())
  90. status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
  91. (void *)aligned_msg, NULL,
  92. sizeof(*aligned_msg));
  93. else
  94. status = hv_do_hypercall(HVCALL_POST_MESSAGE,
  95. aligned_msg, NULL);
  96. /* Preemption must remain disabled until after the hypercall
  97. * so some other thread can't get scheduled onto this cpu and
  98. * corrupt the per-cpu post_msg_page
  99. */
  100. put_cpu_ptr(hv_cpu);
  101. return hv_result(status);
  102. }
  103. int hv_synic_alloc(void)
  104. {
  105. int cpu;
  106. struct hv_per_cpu_context *hv_cpu;
  107. /*
  108. * First, zero all per-cpu memory areas so hv_synic_free() can
  109. * detect what memory has been allocated and cleanup properly
  110. * after any failures.
  111. */
  112. for_each_present_cpu(cpu) {
  113. hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
  114. memset(hv_cpu, 0, sizeof(*hv_cpu));
  115. }
  116. hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
  117. GFP_KERNEL);
  118. if (hv_context.hv_numa_map == NULL) {
  119. pr_err("Unable to allocate NUMA map\n");
  120. goto err;
  121. }
  122. for_each_present_cpu(cpu) {
  123. hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
  124. tasklet_init(&hv_cpu->msg_dpc,
  125. vmbus_on_msg_dpc, (unsigned long) hv_cpu);
  126. /*
  127. * Synic message and event pages are allocated by paravisor.
  128. * Skip these pages allocation here.
  129. */
  130. if (!hv_isolation_type_snp()) {
  131. hv_cpu->synic_message_page =
  132. (void *)get_zeroed_page(GFP_ATOMIC);
  133. if (hv_cpu->synic_message_page == NULL) {
  134. pr_err("Unable to allocate SYNIC message page\n");
  135. goto err;
  136. }
  137. hv_cpu->synic_event_page =
  138. (void *)get_zeroed_page(GFP_ATOMIC);
  139. if (hv_cpu->synic_event_page == NULL) {
  140. pr_err("Unable to allocate SYNIC event page\n");
  141. goto err;
  142. }
  143. }
  144. hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
  145. if (hv_cpu->post_msg_page == NULL) {
  146. pr_err("Unable to allocate post msg page\n");
  147. goto err;
  148. }
  149. }
  150. return 0;
  151. err:
  152. /*
  153. * Any memory allocations that succeeded will be freed when
  154. * the caller cleans up by calling hv_synic_free()
  155. */
  156. return -ENOMEM;
  157. }
  158. void hv_synic_free(void)
  159. {
  160. int cpu;
  161. for_each_present_cpu(cpu) {
  162. struct hv_per_cpu_context *hv_cpu
  163. = per_cpu_ptr(hv_context.cpu_context, cpu);
  164. free_page((unsigned long)hv_cpu->synic_event_page);
  165. free_page((unsigned long)hv_cpu->synic_message_page);
  166. free_page((unsigned long)hv_cpu->post_msg_page);
  167. }
  168. kfree(hv_context.hv_numa_map);
  169. }
  170. /*
  171. * hv_synic_init - Initialize the Synthetic Interrupt Controller.
  172. *
  173. * If it is already initialized by another entity (ie x2v shim), we need to
  174. * retrieve the initialized message and event pages. Otherwise, we create and
  175. * initialize the message and event pages.
  176. */
  177. void hv_synic_enable_regs(unsigned int cpu)
  178. {
  179. struct hv_per_cpu_context *hv_cpu
  180. = per_cpu_ptr(hv_context.cpu_context, cpu);
  181. union hv_synic_simp simp;
  182. union hv_synic_siefp siefp;
  183. union hv_synic_sint shared_sint;
  184. union hv_synic_scontrol sctrl;
  185. /* Setup the Synic's message page */
  186. simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
  187. simp.simp_enabled = 1;
  188. if (hv_isolation_type_snp()) {
  189. hv_cpu->synic_message_page
  190. = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
  191. HV_HYP_PAGE_SIZE, MEMREMAP_WB);
  192. if (!hv_cpu->synic_message_page)
  193. pr_err("Fail to map syinc message page.\n");
  194. } else {
  195. simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
  196. >> HV_HYP_PAGE_SHIFT;
  197. }
  198. hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
  199. /* Setup the Synic's event page */
  200. siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
  201. siefp.siefp_enabled = 1;
  202. if (hv_isolation_type_snp()) {
  203. hv_cpu->synic_event_page =
  204. memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
  205. HV_HYP_PAGE_SIZE, MEMREMAP_WB);
  206. if (!hv_cpu->synic_event_page)
  207. pr_err("Fail to map syinc event page.\n");
  208. } else {
  209. siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
  210. >> HV_HYP_PAGE_SHIFT;
  211. }
  212. hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
  213. /* Setup the shared SINT. */
  214. if (vmbus_irq != -1)
  215. enable_percpu_irq(vmbus_irq, 0);
  216. shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
  217. VMBUS_MESSAGE_SINT);
  218. shared_sint.vector = vmbus_interrupt;
  219. shared_sint.masked = false;
  220. /*
  221. * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
  222. * it doesn't provide a recommendation flag and AEOI must be disabled.
  223. */
  224. #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
  225. shared_sint.auto_eoi =
  226. !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
  227. #else
  228. shared_sint.auto_eoi = 0;
  229. #endif
  230. hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
  231. shared_sint.as_uint64);
  232. /* Enable the global synic bit */
  233. sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
  234. sctrl.enable = 1;
  235. hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
  236. }
  237. int hv_synic_init(unsigned int cpu)
  238. {
  239. hv_synic_enable_regs(cpu);
  240. hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
  241. return 0;
  242. }
  243. /*
  244. * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  245. */
  246. void hv_synic_disable_regs(unsigned int cpu)
  247. {
  248. struct hv_per_cpu_context *hv_cpu
  249. = per_cpu_ptr(hv_context.cpu_context, cpu);
  250. union hv_synic_sint shared_sint;
  251. union hv_synic_simp simp;
  252. union hv_synic_siefp siefp;
  253. union hv_synic_scontrol sctrl;
  254. shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
  255. VMBUS_MESSAGE_SINT);
  256. shared_sint.masked = 1;
  257. /* Need to correctly cleanup in the case of SMP!!! */
  258. /* Disable the interrupt */
  259. hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
  260. shared_sint.as_uint64);
  261. simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
  262. /*
  263. * In Isolation VM, sim and sief pages are allocated by
  264. * paravisor. These pages also will be used by kdump
  265. * kernel. So just reset enable bit here and keep page
  266. * addresses.
  267. */
  268. simp.simp_enabled = 0;
  269. if (hv_isolation_type_snp())
  270. memunmap(hv_cpu->synic_message_page);
  271. else
  272. simp.base_simp_gpa = 0;
  273. hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
  274. siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
  275. siefp.siefp_enabled = 0;
  276. if (hv_isolation_type_snp())
  277. memunmap(hv_cpu->synic_event_page);
  278. else
  279. siefp.base_siefp_gpa = 0;
  280. hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
  281. /* Disable the global synic bit */
  282. sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
  283. sctrl.enable = 0;
  284. hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
  285. if (vmbus_irq != -1)
  286. disable_percpu_irq(vmbus_irq);
  287. }
  288. #define HV_MAX_TRIES 3
  289. /*
  290. * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
  291. * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
  292. * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
  293. *
  294. * If a bit is set, that means there is a pending channel interrupt. The expectation is
  295. * that the normal interrupt handling mechanism will find and process the channel interrupt
  296. * "very soon", and in the process clear the bit.
  297. */
  298. static bool hv_synic_event_pending(void)
  299. {
  300. struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
  301. union hv_synic_event_flags *event =
  302. (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
  303. unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
  304. bool pending;
  305. u32 relid;
  306. int tries = 0;
  307. retry:
  308. pending = false;
  309. for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
  310. /* Special case - VMBus channel protocol messages */
  311. if (relid == 0)
  312. continue;
  313. pending = true;
  314. break;
  315. }
  316. if (pending && tries++ < HV_MAX_TRIES) {
  317. usleep_range(10000, 20000);
  318. goto retry;
  319. }
  320. return pending;
  321. }
  322. int hv_synic_cleanup(unsigned int cpu)
  323. {
  324. struct vmbus_channel *channel, *sc;
  325. bool channel_found = false;
  326. if (vmbus_connection.conn_state != CONNECTED)
  327. goto always_cleanup;
  328. /*
  329. * Hyper-V does not provide a way to change the connect CPU once
  330. * it is set; we must prevent the connect CPU from going offline
  331. * while the VM is running normally. But in the panic or kexec()
  332. * path where the vmbus is already disconnected, the CPU must be
  333. * allowed to shut down.
  334. */
  335. if (cpu == VMBUS_CONNECT_CPU)
  336. return -EBUSY;
  337. /*
  338. * Search for channels which are bound to the CPU we're about to
  339. * cleanup. In case we find one and vmbus is still connected, we
  340. * fail; this will effectively prevent CPU offlining.
  341. *
  342. * TODO: Re-bind the channels to different CPUs.
  343. */
  344. mutex_lock(&vmbus_connection.channel_mutex);
  345. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  346. if (channel->target_cpu == cpu) {
  347. channel_found = true;
  348. break;
  349. }
  350. list_for_each_entry(sc, &channel->sc_list, sc_list) {
  351. if (sc->target_cpu == cpu) {
  352. channel_found = true;
  353. break;
  354. }
  355. }
  356. if (channel_found)
  357. break;
  358. }
  359. mutex_unlock(&vmbus_connection.channel_mutex);
  360. if (channel_found)
  361. return -EBUSY;
  362. /*
  363. * channel_found == false means that any channels that were previously
  364. * assigned to the CPU have been reassigned elsewhere with a call of
  365. * vmbus_send_modifychannel(). Scan the event flags page looking for
  366. * bits that are set and waiting with a timeout for vmbus_chan_sched()
  367. * to process such bits. If bits are still set after this operation
  368. * and VMBus is connected, fail the CPU offlining operation.
  369. */
  370. if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
  371. return -EBUSY;
  372. always_cleanup:
  373. hv_stimer_legacy_cleanup(cpu);
  374. hv_synic_disable_regs(cpu);
  375. return 0;
  376. }