vmware.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. /*
  2. * VMware Detection code.
  3. *
  4. * Copyright (C) 2008, VMware, Inc.
  5. * Author : Alok N Kataria <[email protected]>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  15. * NON INFRINGEMENT. See the GNU General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21. *
  22. */
  23. #include <linux/dmi.h>
  24. #include <linux/init.h>
  25. #include <linux/export.h>
  26. #include <linux/clocksource.h>
  27. #include <linux/cpu.h>
  28. #include <linux/reboot.h>
  29. #include <linux/static_call.h>
  30. #include <asm/div64.h>
  31. #include <asm/x86_init.h>
  32. #include <asm/hypervisor.h>
  33. #include <asm/timer.h>
  34. #include <asm/apic.h>
  35. #include <asm/vmware.h>
  36. #include <asm/svm.h>
  37. #undef pr_fmt
  38. #define pr_fmt(fmt) "vmware: " fmt
  39. #define CPUID_VMWARE_INFO_LEAF 0x40000000
  40. #define CPUID_VMWARE_FEATURES_LEAF 0x40000010
  41. #define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
  42. #define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
  43. #define VMWARE_HYPERVISOR_MAGIC 0x564D5868
  44. #define VMWARE_CMD_GETVERSION 10
  45. #define VMWARE_CMD_GETHZ 45
  46. #define VMWARE_CMD_GETVCPU_INFO 68
  47. #define VMWARE_CMD_LEGACY_X2APIC 3
  48. #define VMWARE_CMD_VCPU_RESERVED 31
  49. #define VMWARE_CMD_STEALCLOCK 91
  50. #define STEALCLOCK_NOT_AVAILABLE (-1)
  51. #define STEALCLOCK_DISABLED 0
  52. #define STEALCLOCK_ENABLED 1
  53. #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
  54. __asm__("inl (%%dx), %%eax" : \
  55. "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
  56. "a"(VMWARE_HYPERVISOR_MAGIC), \
  57. "c"(VMWARE_CMD_##cmd), \
  58. "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \
  59. "memory")
  60. #define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \
  61. __asm__("vmcall" : \
  62. "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
  63. "a"(VMWARE_HYPERVISOR_MAGIC), \
  64. "c"(VMWARE_CMD_##cmd), \
  65. "d"(0), "b"(UINT_MAX) : \
  66. "memory")
  67. #define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \
  68. __asm__("vmmcall" : \
  69. "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
  70. "a"(VMWARE_HYPERVISOR_MAGIC), \
  71. "c"(VMWARE_CMD_##cmd), \
  72. "d"(0), "b"(UINT_MAX) : \
  73. "memory")
  74. #define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \
  75. switch (vmware_hypercall_mode) { \
  76. case CPUID_VMWARE_FEATURES_ECX_VMCALL: \
  77. VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \
  78. break; \
  79. case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \
  80. VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \
  81. break; \
  82. default: \
  83. VMWARE_PORT(cmd, eax, ebx, ecx, edx); \
  84. break; \
  85. } \
  86. } while (0)
  87. struct vmware_steal_time {
  88. union {
  89. uint64_t clock; /* stolen time counter in units of vtsc */
  90. struct {
  91. /* only for little-endian */
  92. uint32_t clock_low;
  93. uint32_t clock_high;
  94. };
  95. };
  96. uint64_t reserved[7];
  97. };
  98. static unsigned long vmware_tsc_khz __ro_after_init;
  99. static u8 vmware_hypercall_mode __ro_after_init;
  100. static inline int __vmware_platform(void)
  101. {
  102. uint32_t eax, ebx, ecx, edx;
  103. VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
  104. return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
  105. }
  106. static unsigned long vmware_get_tsc_khz(void)
  107. {
  108. return vmware_tsc_khz;
  109. }
  110. #ifdef CONFIG_PARAVIRT
  111. static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
  112. static bool vmw_sched_clock __initdata = true;
  113. static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
  114. static bool has_steal_clock;
  115. static bool steal_acc __initdata = true; /* steal time accounting */
  116. static __init int setup_vmw_sched_clock(char *s)
  117. {
  118. vmw_sched_clock = false;
  119. return 0;
  120. }
  121. early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
  122. static __init int parse_no_stealacc(char *arg)
  123. {
  124. steal_acc = false;
  125. return 0;
  126. }
  127. early_param("no-steal-acc", parse_no_stealacc);
  128. static unsigned long long notrace vmware_sched_clock(void)
  129. {
  130. unsigned long long ns;
  131. ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
  132. vmware_cyc2ns.cyc2ns_shift);
  133. ns -= vmware_cyc2ns.cyc2ns_offset;
  134. return ns;
  135. }
  136. static void __init vmware_cyc2ns_setup(void)
  137. {
  138. struct cyc2ns_data *d = &vmware_cyc2ns;
  139. unsigned long long tsc_now = rdtsc();
  140. clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
  141. vmware_tsc_khz, NSEC_PER_MSEC, 0);
  142. d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
  143. d->cyc2ns_shift);
  144. pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
  145. }
  146. static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
  147. {
  148. uint32_t result, info;
  149. asm volatile (VMWARE_HYPERCALL :
  150. "=a"(result),
  151. "=c"(info) :
  152. "a"(VMWARE_HYPERVISOR_MAGIC),
  153. "b"(0),
  154. "c"(VMWARE_CMD_STEALCLOCK),
  155. "d"(0),
  156. "S"(arg1),
  157. "D"(arg2) :
  158. "memory");
  159. return result;
  160. }
  161. static bool stealclock_enable(phys_addr_t pa)
  162. {
  163. return vmware_cmd_stealclock(upper_32_bits(pa),
  164. lower_32_bits(pa)) == STEALCLOCK_ENABLED;
  165. }
  166. static int __stealclock_disable(void)
  167. {
  168. return vmware_cmd_stealclock(0, 1);
  169. }
  170. static void stealclock_disable(void)
  171. {
  172. __stealclock_disable();
  173. }
  174. static bool vmware_is_stealclock_available(void)
  175. {
  176. return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
  177. }
  178. /**
  179. * vmware_steal_clock() - read the per-cpu steal clock
  180. * @cpu: the cpu number whose steal clock we want to read
  181. *
  182. * The function reads the steal clock if we are on a 64-bit system, otherwise
  183. * reads it in parts, checking that the high part didn't change in the
  184. * meantime.
  185. *
  186. * Return:
  187. * The steal clock reading in ns.
  188. */
  189. static uint64_t vmware_steal_clock(int cpu)
  190. {
  191. struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
  192. uint64_t clock;
  193. if (IS_ENABLED(CONFIG_64BIT))
  194. clock = READ_ONCE(steal->clock);
  195. else {
  196. uint32_t initial_high, low, high;
  197. do {
  198. initial_high = READ_ONCE(steal->clock_high);
  199. /* Do not reorder initial_high and high readings */
  200. virt_rmb();
  201. low = READ_ONCE(steal->clock_low);
  202. /* Keep low reading in between */
  203. virt_rmb();
  204. high = READ_ONCE(steal->clock_high);
  205. } while (initial_high != high);
  206. clock = ((uint64_t)high << 32) | low;
  207. }
  208. return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
  209. vmware_cyc2ns.cyc2ns_shift);
  210. }
  211. static void vmware_register_steal_time(void)
  212. {
  213. int cpu = smp_processor_id();
  214. struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
  215. if (!has_steal_clock)
  216. return;
  217. if (!stealclock_enable(slow_virt_to_phys(st))) {
  218. has_steal_clock = false;
  219. return;
  220. }
  221. pr_info("vmware-stealtime: cpu %d, pa %llx\n",
  222. cpu, (unsigned long long) slow_virt_to_phys(st));
  223. }
  224. static void vmware_disable_steal_time(void)
  225. {
  226. if (!has_steal_clock)
  227. return;
  228. stealclock_disable();
  229. }
  230. static void vmware_guest_cpu_init(void)
  231. {
  232. if (has_steal_clock)
  233. vmware_register_steal_time();
  234. }
  235. static void vmware_pv_guest_cpu_reboot(void *unused)
  236. {
  237. vmware_disable_steal_time();
  238. }
  239. static int vmware_pv_reboot_notify(struct notifier_block *nb,
  240. unsigned long code, void *unused)
  241. {
  242. if (code == SYS_RESTART)
  243. on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
  244. return NOTIFY_DONE;
  245. }
  246. static struct notifier_block vmware_pv_reboot_nb = {
  247. .notifier_call = vmware_pv_reboot_notify,
  248. };
  249. #ifdef CONFIG_SMP
  250. static void __init vmware_smp_prepare_boot_cpu(void)
  251. {
  252. vmware_guest_cpu_init();
  253. native_smp_prepare_boot_cpu();
  254. }
  255. static int vmware_cpu_online(unsigned int cpu)
  256. {
  257. local_irq_disable();
  258. vmware_guest_cpu_init();
  259. local_irq_enable();
  260. return 0;
  261. }
  262. static int vmware_cpu_down_prepare(unsigned int cpu)
  263. {
  264. local_irq_disable();
  265. vmware_disable_steal_time();
  266. local_irq_enable();
  267. return 0;
  268. }
  269. #endif
  270. static __init int activate_jump_labels(void)
  271. {
  272. if (has_steal_clock) {
  273. static_key_slow_inc(&paravirt_steal_enabled);
  274. if (steal_acc)
  275. static_key_slow_inc(&paravirt_steal_rq_enabled);
  276. }
  277. return 0;
  278. }
  279. arch_initcall(activate_jump_labels);
  280. static void __init vmware_paravirt_ops_setup(void)
  281. {
  282. pv_info.name = "VMware hypervisor";
  283. pv_ops.cpu.io_delay = paravirt_nop;
  284. if (vmware_tsc_khz == 0)
  285. return;
  286. vmware_cyc2ns_setup();
  287. if (vmw_sched_clock)
  288. paravirt_set_sched_clock(vmware_sched_clock);
  289. if (vmware_is_stealclock_available()) {
  290. has_steal_clock = true;
  291. static_call_update(pv_steal_clock, vmware_steal_clock);
  292. /* We use reboot notifier only to disable steal clock */
  293. register_reboot_notifier(&vmware_pv_reboot_nb);
  294. #ifdef CONFIG_SMP
  295. smp_ops.smp_prepare_boot_cpu =
  296. vmware_smp_prepare_boot_cpu;
  297. if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  298. "x86/vmware:online",
  299. vmware_cpu_online,
  300. vmware_cpu_down_prepare) < 0)
  301. pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
  302. #else
  303. vmware_guest_cpu_init();
  304. #endif
  305. }
  306. }
  307. #else
  308. #define vmware_paravirt_ops_setup() do {} while (0)
  309. #endif
  310. /*
  311. * VMware hypervisor takes care of exporting a reliable TSC to the guest.
  312. * Still, due to timing difference when running on virtual cpus, the TSC can
  313. * be marked as unstable in some cases. For example, the TSC sync check at
  314. * bootup can fail due to a marginal offset between vcpus' TSCs (though the
  315. * TSCs do not drift from each other). Also, the ACPI PM timer clocksource
  316. * is not suitable as a watchdog when running on a hypervisor because the
  317. * kernel may miss a wrap of the counter if the vcpu is descheduled for a
  318. * long time. To skip these checks at runtime we set these capability bits,
  319. * so that the kernel could just trust the hypervisor with providing a
  320. * reliable virtual TSC that is suitable for timekeeping.
  321. */
  322. static void __init vmware_set_capabilities(void)
  323. {
  324. setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
  325. setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
  326. if (vmware_tsc_khz)
  327. setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
  328. if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
  329. setup_force_cpu_cap(X86_FEATURE_VMCALL);
  330. else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
  331. setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
  332. }
  333. static void __init vmware_platform_setup(void)
  334. {
  335. uint32_t eax, ebx, ecx, edx;
  336. uint64_t lpj, tsc_khz;
  337. VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
  338. if (ebx != UINT_MAX) {
  339. lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
  340. do_div(tsc_khz, 1000);
  341. WARN_ON(tsc_khz >> 32);
  342. pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
  343. (unsigned long) tsc_khz / 1000,
  344. (unsigned long) tsc_khz % 1000);
  345. if (!preset_lpj) {
  346. do_div(lpj, HZ);
  347. preset_lpj = lpj;
  348. }
  349. vmware_tsc_khz = tsc_khz;
  350. x86_platform.calibrate_tsc = vmware_get_tsc_khz;
  351. x86_platform.calibrate_cpu = vmware_get_tsc_khz;
  352. #ifdef CONFIG_X86_LOCAL_APIC
  353. /* Skip lapic calibration since we know the bus frequency. */
  354. lapic_timer_period = ecx / HZ;
  355. pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
  356. ecx);
  357. #endif
  358. } else {
  359. pr_warn("Failed to get TSC freq from the hypervisor\n");
  360. }
  361. vmware_paravirt_ops_setup();
  362. #ifdef CONFIG_X86_IO_APIC
  363. no_timer_check = 1;
  364. #endif
  365. vmware_set_capabilities();
  366. }
  367. static u8 __init vmware_select_hypercall(void)
  368. {
  369. int eax, ebx, ecx, edx;
  370. cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
  371. return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
  372. CPUID_VMWARE_FEATURES_ECX_VMCALL));
  373. }
  374. /*
  375. * While checking the dmi string information, just checking the product
  376. * serial key should be enough, as this will always have a VMware
  377. * specific string when running under VMware hypervisor.
  378. * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
  379. * intentionally defaults to 0.
  380. */
  381. static uint32_t __init vmware_platform(void)
  382. {
  383. if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
  384. unsigned int eax;
  385. unsigned int hyper_vendor_id[3];
  386. cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
  387. &hyper_vendor_id[1], &hyper_vendor_id[2]);
  388. if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
  389. if (eax >= CPUID_VMWARE_FEATURES_LEAF)
  390. vmware_hypercall_mode =
  391. vmware_select_hypercall();
  392. pr_info("hypercall mode: 0x%02x\n",
  393. (unsigned int) vmware_hypercall_mode);
  394. return CPUID_VMWARE_INFO_LEAF;
  395. }
  396. } else if (dmi_available && dmi_name_in_serial("VMware") &&
  397. __vmware_platform())
  398. return 1;
  399. return 0;
  400. }
  401. /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
  402. static bool __init vmware_legacy_x2apic_available(void)
  403. {
  404. uint32_t eax, ebx, ecx, edx;
  405. VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
  406. return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
  407. (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
  408. }
  409. #ifdef CONFIG_AMD_MEM_ENCRYPT
  410. static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
  411. struct pt_regs *regs)
  412. {
  413. /* Copy VMWARE specific Hypercall parameters to the GHCB */
  414. ghcb_set_rip(ghcb, regs->ip);
  415. ghcb_set_rbx(ghcb, regs->bx);
  416. ghcb_set_rcx(ghcb, regs->cx);
  417. ghcb_set_rdx(ghcb, regs->dx);
  418. ghcb_set_rsi(ghcb, regs->si);
  419. ghcb_set_rdi(ghcb, regs->di);
  420. ghcb_set_rbp(ghcb, regs->bp);
  421. }
  422. static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
  423. {
  424. if (!(ghcb_rbx_is_valid(ghcb) &&
  425. ghcb_rcx_is_valid(ghcb) &&
  426. ghcb_rdx_is_valid(ghcb) &&
  427. ghcb_rsi_is_valid(ghcb) &&
  428. ghcb_rdi_is_valid(ghcb) &&
  429. ghcb_rbp_is_valid(ghcb)))
  430. return false;
  431. regs->bx = ghcb_get_rbx(ghcb);
  432. regs->cx = ghcb_get_rcx(ghcb);
  433. regs->dx = ghcb_get_rdx(ghcb);
  434. regs->si = ghcb_get_rsi(ghcb);
  435. regs->di = ghcb_get_rdi(ghcb);
  436. regs->bp = ghcb_get_rbp(ghcb);
  437. return true;
  438. }
  439. #endif
  440. const __initconst struct hypervisor_x86 x86_hyper_vmware = {
  441. .name = "VMware",
  442. .detect = vmware_platform,
  443. .type = X86_HYPER_VMWARE,
  444. .init.init_platform = vmware_platform_setup,
  445. .init.x2apic_available = vmware_legacy_x2apic_available,
  446. #ifdef CONFIG_AMD_MEM_ENCRYPT
  447. .runtime.sev_es_hcall_prepare = vmware_sev_es_hcall_prepare,
  448. .runtime.sev_es_hcall_finish = vmware_sev_es_hcall_finish,
  449. #endif
  450. };