mshyperv.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * HyperV Detection code.
  4. *
  5. * Copyright (C) 2010, Novell, Inc.
  6. * Author : K. Y. Srinivasan <[email protected]>
  7. */
  8. #include <linux/types.h>
  9. #include <linux/time.h>
  10. #include <linux/clocksource.h>
  11. #include <linux/init.h>
  12. #include <linux/export.h>
  13. #include <linux/hardirq.h>
  14. #include <linux/efi.h>
  15. #include <linux/interrupt.h>
  16. #include <linux/irq.h>
  17. #include <linux/kexec.h>
  18. #include <linux/i8253.h>
  19. #include <linux/random.h>
  20. #include <linux/swiotlb.h>
  21. #include <asm/processor.h>
  22. #include <asm/hypervisor.h>
  23. #include <asm/hyperv-tlfs.h>
  24. #include <asm/mshyperv.h>
  25. #include <asm/desc.h>
  26. #include <asm/idtentry.h>
  27. #include <asm/irq_regs.h>
  28. #include <asm/i8259.h>
  29. #include <asm/apic.h>
  30. #include <asm/timer.h>
  31. #include <asm/reboot.h>
  32. #include <asm/nmi.h>
  33. #include <clocksource/hyperv_timer.h>
  34. #include <asm/numa.h>
  35. #include <asm/coco.h>
  36. /* Is Linux running as the root partition? */
  37. bool hv_root_partition;
  38. struct ms_hyperv_info ms_hyperv;
  39. #if IS_ENABLED(CONFIG_HYPERV)
  40. static void (*vmbus_handler)(void);
  41. static void (*hv_stimer0_handler)(void);
  42. static void (*hv_kexec_handler)(void);
  43. static void (*hv_crash_handler)(struct pt_regs *regs);
  44. DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
  45. {
  46. struct pt_regs *old_regs = set_irq_regs(regs);
  47. inc_irq_stat(irq_hv_callback_count);
  48. if (vmbus_handler)
  49. vmbus_handler();
  50. if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
  51. ack_APIC_irq();
  52. set_irq_regs(old_regs);
  53. }
  54. void hv_setup_vmbus_handler(void (*handler)(void))
  55. {
  56. vmbus_handler = handler;
  57. }
  58. void hv_remove_vmbus_handler(void)
  59. {
  60. /* We have no way to deallocate the interrupt gate */
  61. vmbus_handler = NULL;
  62. }
  63. /*
  64. * Routines to do per-architecture handling of stimer0
  65. * interrupts when in Direct Mode
  66. */
  67. DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
  68. {
  69. struct pt_regs *old_regs = set_irq_regs(regs);
  70. inc_irq_stat(hyperv_stimer0_count);
  71. if (hv_stimer0_handler)
  72. hv_stimer0_handler();
  73. add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
  74. ack_APIC_irq();
  75. set_irq_regs(old_regs);
  76. }
  77. /* For x86/x64, override weak placeholders in hyperv_timer.c */
  78. void hv_setup_stimer0_handler(void (*handler)(void))
  79. {
  80. hv_stimer0_handler = handler;
  81. }
  82. void hv_remove_stimer0_handler(void)
  83. {
  84. /* We have no way to deallocate the interrupt gate */
  85. hv_stimer0_handler = NULL;
  86. }
  87. void hv_setup_kexec_handler(void (*handler)(void))
  88. {
  89. hv_kexec_handler = handler;
  90. }
  91. void hv_remove_kexec_handler(void)
  92. {
  93. hv_kexec_handler = NULL;
  94. }
  95. void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
  96. {
  97. hv_crash_handler = handler;
  98. }
  99. void hv_remove_crash_handler(void)
  100. {
  101. hv_crash_handler = NULL;
  102. }
  103. #ifdef CONFIG_KEXEC_CORE
  104. static void hv_machine_shutdown(void)
  105. {
  106. if (kexec_in_progress && hv_kexec_handler)
  107. hv_kexec_handler();
  108. /*
  109. * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
  110. * corrupts the old VP Assist Pages and can crash the kexec kernel.
  111. */
  112. if (kexec_in_progress && hyperv_init_cpuhp > 0)
  113. cpuhp_remove_state(hyperv_init_cpuhp);
  114. /* The function calls stop_other_cpus(). */
  115. native_machine_shutdown();
  116. /* Disable the hypercall page when there is only 1 active CPU. */
  117. if (kexec_in_progress)
  118. hyperv_cleanup();
  119. }
  120. static void hv_machine_crash_shutdown(struct pt_regs *regs)
  121. {
  122. if (hv_crash_handler)
  123. hv_crash_handler(regs);
  124. /* The function calls crash_smp_send_stop(). */
  125. native_machine_crash_shutdown(regs);
  126. /* Disable the hypercall page when there is only 1 active CPU. */
  127. hyperv_cleanup();
  128. }
  129. #endif /* CONFIG_KEXEC_CORE */
  130. #endif /* CONFIG_HYPERV */
  131. static uint32_t __init ms_hyperv_platform(void)
  132. {
  133. u32 eax;
  134. u32 hyp_signature[3];
  135. if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
  136. return 0;
  137. cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
  138. &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
  139. if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
  140. memcmp("Microsoft Hv", hyp_signature, 12))
  141. return 0;
  142. /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
  143. eax = cpuid_eax(HYPERV_CPUID_FEATURES);
  144. if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
  145. pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
  146. return 0;
  147. }
  148. if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
  149. pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
  150. return 0;
  151. }
  152. return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
  153. }
  154. static unsigned char hv_get_nmi_reason(void)
  155. {
  156. return 0;
  157. }
  158. #ifdef CONFIG_X86_LOCAL_APIC
  159. /*
  160. * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
  161. * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle
  162. * unknown NMI on the first CPU which gets it.
  163. */
  164. static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
  165. {
  166. static atomic_t nmi_cpu = ATOMIC_INIT(-1);
  167. if (!unknown_nmi_panic)
  168. return NMI_DONE;
  169. if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
  170. return NMI_HANDLED;
  171. return NMI_DONE;
  172. }
  173. #endif
  174. static unsigned long hv_get_tsc_khz(void)
  175. {
  176. unsigned long freq;
  177. rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
  178. return freq / 1000;
  179. }
  180. #if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV)
  181. static void __init hv_smp_prepare_boot_cpu(void)
  182. {
  183. native_smp_prepare_boot_cpu();
  184. #if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS)
  185. hv_init_spinlocks();
  186. #endif
  187. }
  188. static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
  189. {
  190. #ifdef CONFIG_X86_64
  191. int i;
  192. int ret;
  193. #endif
  194. native_smp_prepare_cpus(max_cpus);
  195. #ifdef CONFIG_X86_64
  196. for_each_present_cpu(i) {
  197. if (i == 0)
  198. continue;
  199. ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
  200. BUG_ON(ret);
  201. }
  202. for_each_present_cpu(i) {
  203. if (i == 0)
  204. continue;
  205. ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i);
  206. BUG_ON(ret);
  207. }
  208. #endif
  209. }
  210. #endif
  211. static void __init ms_hyperv_init_platform(void)
  212. {
  213. int hv_max_functions_eax;
  214. int hv_host_info_eax;
  215. int hv_host_info_ebx;
  216. int hv_host_info_ecx;
  217. int hv_host_info_edx;
  218. #ifdef CONFIG_PARAVIRT
  219. pv_info.name = "Hyper-V";
  220. #endif
  221. /*
  222. * Extract the features and hints
  223. */
  224. ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
  225. ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
  226. ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
  227. ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
  228. hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
  229. pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
  230. ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
  231. ms_hyperv.misc_features);
  232. ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
  233. ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
  234. pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
  235. ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
  236. /*
  237. * Check CPU management privilege.
  238. *
  239. * To mirror what Windows does we should extract CPU management
  240. * features and use the ReservedIdentityBit to detect if Linux is the
  241. * root partition. But that requires negotiating CPU management
  242. * interface (a process to be finalized). For now, use the privilege
  243. * flag as the indicator for running as root.
  244. *
  245. * Hyper-V should never specify running as root and as a Confidential
  246. * VM. But to protect against a compromised/malicious Hyper-V trying
  247. * to exploit root behavior to expose Confidential VM memory, ignore
  248. * the root partition setting if also a Confidential VM.
  249. */
  250. if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
  251. !(ms_hyperv.priv_high & HV_ISOLATION)) {
  252. hv_root_partition = true;
  253. pr_info("Hyper-V: running as root partition\n");
  254. }
  255. /*
  256. * Extract host information.
  257. */
  258. if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
  259. hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
  260. hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
  261. hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
  262. hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
  263. pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
  264. hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
  265. hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
  266. hv_host_info_ecx, hv_host_info_edx >> 24);
  267. }
  268. if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
  269. ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
  270. x86_platform.calibrate_tsc = hv_get_tsc_khz;
  271. x86_platform.calibrate_cpu = hv_get_tsc_khz;
  272. }
  273. if (ms_hyperv.priv_high & HV_ISOLATION) {
  274. ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
  275. ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
  276. ms_hyperv.shared_gpa_boundary =
  277. BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
  278. pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
  279. ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
  280. if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
  281. static_branch_enable(&isolation_type_snp);
  282. #ifdef CONFIG_SWIOTLB
  283. swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
  284. #endif
  285. }
  286. /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
  287. if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
  288. if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
  289. cc_set_vendor(CC_VENDOR_HYPERV);
  290. }
  291. }
  292. if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
  293. ms_hyperv.nested_features =
  294. cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
  295. pr_info("Hyper-V: Nested features: 0x%x\n",
  296. ms_hyperv.nested_features);
  297. }
  298. #ifdef CONFIG_X86_LOCAL_APIC
  299. if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
  300. ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
  301. /*
  302. * Get the APIC frequency.
  303. */
  304. u64 hv_lapic_frequency;
  305. rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
  306. hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
  307. lapic_timer_period = hv_lapic_frequency;
  308. pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
  309. lapic_timer_period);
  310. }
  311. register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
  312. "hv_nmi_unknown");
  313. #endif
  314. #ifdef CONFIG_X86_IO_APIC
  315. no_timer_check = 1;
  316. #endif
  317. #if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
  318. machine_ops.shutdown = hv_machine_shutdown;
  319. machine_ops.crash_shutdown = hv_machine_crash_shutdown;
  320. #endif
  321. if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
  322. /*
  323. * Writing to synthetic MSR 0x40000118 updates/changes the
  324. * guest visible CPUIDs. Setting bit 0 of this MSR enables
  325. * guests to report invariant TSC feature through CPUID
  326. * instruction, CPUID 0x800000007/EDX, bit 8. See code in
  327. * early_init_intel() where this bit is examined. The
  328. * setting of this MSR bit should happen before init_intel()
  329. * is called.
  330. */
  331. wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
  332. setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
  333. }
  334. /*
  335. * Generation 2 instances don't support reading the NMI status from
  336. * 0x61 port.
  337. */
  338. if (efi_enabled(EFI_BOOT))
  339. x86_platform.get_nmi_reason = hv_get_nmi_reason;
  340. /*
  341. * Hyper-V VMs have a PIT emulation quirk such that zeroing the
  342. * counter register during PIT shutdown restarts the PIT. So it
  343. * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
  344. * to false tells pit_shutdown() not to zero the counter so that
  345. * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
  346. * and setting this value has no effect.
  347. */
  348. i8253_clear_counter_on_shutdown = false;
  349. #if IS_ENABLED(CONFIG_HYPERV)
  350. /*
  351. * Setup the hook to get control post apic initialization.
  352. */
  353. x86_platform.apic_post_init = hyperv_init;
  354. hyperv_setup_mmu_ops();
  355. /* Setup the IDT for hypervisor callback */
  356. alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
  357. /* Setup the IDT for reenlightenment notifications */
  358. if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) {
  359. alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
  360. asm_sysvec_hyperv_reenlightenment);
  361. }
  362. /* Setup the IDT for stimer0 */
  363. if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
  364. alloc_intr_gate(HYPERV_STIMER0_VECTOR,
  365. asm_sysvec_hyperv_stimer0);
  366. }
  367. # ifdef CONFIG_SMP
  368. smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
  369. if (hv_root_partition)
  370. smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
  371. # endif
  372. /*
  373. * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
  374. * set x2apic destination mode to physical mode when x2apic is available
  375. * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
  376. * have 8-bit APIC id.
  377. */
  378. # ifdef CONFIG_X86_X2APIC
  379. if (x2apic_supported())
  380. x2apic_phys = 1;
  381. # endif
  382. /* Register Hyper-V specific clocksource */
  383. hv_init_clocksource();
  384. #endif
  385. /*
  386. * TSC should be marked as unstable only after Hyper-V
  387. * clocksource has been initialized. This ensures that the
  388. * stability of the sched_clock is not altered.
  389. */
  390. if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
  391. mark_tsc_unstable("running on Hyper-V");
  392. hardlockup_detector_disable();
  393. }
  394. static bool __init ms_hyperv_x2apic_available(void)
  395. {
  396. return x2apic_supported();
  397. }
  398. /*
  399. * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping()
  400. * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the
  401. * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg().
  402. *
  403. * Note: for a VM on Hyper-V, the I/O-APIC is the only device which
  404. * (logically) generates MSIs directly to the system APIC irq domain.
  405. * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
  406. * pci-hyperv host bridge.
  407. */
  408. static bool __init ms_hyperv_msi_ext_dest_id(void)
  409. {
  410. u32 eax;
  411. eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE);
  412. if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE)
  413. return false;
  414. eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES);
  415. return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
  416. }
  417. const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
  418. .name = "Microsoft Hyper-V",
  419. .detect = ms_hyperv_platform,
  420. .type = X86_HYPER_MS_HYPERV,
  421. .init.x2apic_available = ms_hyperv_x2apic_available,
  422. .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
  423. .init.init_platform = ms_hyperv_init_platform,
  424. };