sev-shared.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * AMD Encrypted Register State Support
  4. *
  5. * Author: Joerg Roedel <[email protected]>
  6. *
  7. * This file is not compiled stand-alone. It contains code shared
  8. * between the pre-decompression boot code and the running Linux kernel
  9. * and is included directly into both code-bases.
  10. */
  11. #ifndef __BOOT_COMPRESSED
  12. #define error(v) pr_err(v)
  13. #define has_cpuflag(f) boot_cpu_has(f)
  14. #endif
  15. /* I/O parameters for CPUID-related helpers */
  16. struct cpuid_leaf {
  17. u32 fn;
  18. u32 subfn;
  19. u32 eax;
  20. u32 ebx;
  21. u32 ecx;
  22. u32 edx;
  23. };
  24. /*
  25. * Individual entries of the SNP CPUID table, as defined by the SNP
  26. * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
  27. */
  28. struct snp_cpuid_fn {
  29. u32 eax_in;
  30. u32 ecx_in;
  31. u64 xcr0_in;
  32. u64 xss_in;
  33. u32 eax;
  34. u32 ebx;
  35. u32 ecx;
  36. u32 edx;
  37. u64 __reserved;
  38. } __packed;
  39. /*
  40. * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
  41. * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
  42. * of 64 entries per CPUID table.
  43. */
  44. #define SNP_CPUID_COUNT_MAX 64
  45. struct snp_cpuid_table {
  46. u32 count;
  47. u32 __reserved1;
  48. u64 __reserved2;
  49. struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
  50. } __packed;
  51. /*
  52. * Since feature negotiation related variables are set early in the boot
  53. * process they must reside in the .data section so as not to be zeroed
  54. * out when the .bss section is later cleared.
  55. *
  56. * GHCB protocol version negotiated with the hypervisor.
  57. */
  58. static u16 ghcb_version __ro_after_init;
  59. /* Copy of the SNP firmware's CPUID page. */
  60. static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
  61. /*
  62. * These will be initialized based on CPUID table so that non-present
  63. * all-zero leaves (for sparse tables) can be differentiated from
  64. * invalid/out-of-range leaves. This is needed since all-zero leaves
  65. * still need to be post-processed.
  66. */
  67. static u32 cpuid_std_range_max __ro_after_init;
  68. static u32 cpuid_hyp_range_max __ro_after_init;
  69. static u32 cpuid_ext_range_max __ro_after_init;
  70. static bool __init sev_es_check_cpu_features(void)
  71. {
  72. if (!has_cpuflag(X86_FEATURE_RDRAND)) {
  73. error("RDRAND instruction not supported - no trusted source of randomness available\n");
  74. return false;
  75. }
  76. return true;
  77. }
  78. static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
  79. {
  80. u64 val = GHCB_MSR_TERM_REQ;
  81. /* Tell the hypervisor what went wrong. */
  82. val |= GHCB_SEV_TERM_REASON(set, reason);
  83. /* Request Guest Termination from Hypvervisor */
  84. sev_es_wr_ghcb_msr(val);
  85. VMGEXIT();
  86. while (true)
  87. asm volatile("hlt\n" : : : "memory");
  88. }
  89. /*
  90. * The hypervisor features are available from GHCB version 2 onward.
  91. */
  92. static u64 get_hv_features(void)
  93. {
  94. u64 val;
  95. if (ghcb_version < 2)
  96. return 0;
  97. sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
  98. VMGEXIT();
  99. val = sev_es_rd_ghcb_msr();
  100. if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
  101. return 0;
  102. return GHCB_MSR_HV_FT_RESP_VAL(val);
  103. }
  104. static void snp_register_ghcb_early(unsigned long paddr)
  105. {
  106. unsigned long pfn = paddr >> PAGE_SHIFT;
  107. u64 val;
  108. sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
  109. VMGEXIT();
  110. val = sev_es_rd_ghcb_msr();
  111. /* If the response GPA is not ours then abort the guest */
  112. if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
  113. (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
  114. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
  115. }
  116. static bool sev_es_negotiate_protocol(void)
  117. {
  118. u64 val;
  119. /* Do the GHCB protocol version negotiation */
  120. sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
  121. VMGEXIT();
  122. val = sev_es_rd_ghcb_msr();
  123. if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
  124. return false;
  125. if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
  126. GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
  127. return false;
  128. ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
  129. return true;
  130. }
  131. static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
  132. {
  133. ghcb->save.sw_exit_code = 0;
  134. __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
  135. }
  136. static bool vc_decoding_needed(unsigned long exit_code)
  137. {
  138. /* Exceptions don't require to decode the instruction */
  139. return !(exit_code >= SVM_EXIT_EXCP_BASE &&
  140. exit_code <= SVM_EXIT_LAST_EXCP);
  141. }
  142. static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
  143. struct pt_regs *regs,
  144. unsigned long exit_code)
  145. {
  146. enum es_result ret = ES_OK;
  147. memset(ctxt, 0, sizeof(*ctxt));
  148. ctxt->regs = regs;
  149. if (vc_decoding_needed(exit_code))
  150. ret = vc_decode_insn(ctxt);
  151. return ret;
  152. }
  153. static void vc_finish_insn(struct es_em_ctxt *ctxt)
  154. {
  155. ctxt->regs->ip += ctxt->insn.length;
  156. }
  157. static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  158. {
  159. u32 ret;
  160. ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
  161. if (!ret)
  162. return ES_OK;
  163. if (ret == 1) {
  164. u64 info = ghcb->save.sw_exit_info_2;
  165. unsigned long v = info & SVM_EVTINJ_VEC_MASK;
  166. /* Check if exception information from hypervisor is sane. */
  167. if ((info & SVM_EVTINJ_VALID) &&
  168. ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
  169. ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
  170. ctxt->fi.vector = v;
  171. if (info & SVM_EVTINJ_VALID_ERR)
  172. ctxt->fi.error_code = info >> 32;
  173. return ES_EXCEPTION;
  174. }
  175. }
  176. return ES_VMM_ERROR;
  177. }
  178. static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
  179. struct es_em_ctxt *ctxt,
  180. u64 exit_code, u64 exit_info_1,
  181. u64 exit_info_2)
  182. {
  183. /* Fill in protocol and format specifiers */
  184. ghcb->protocol_version = ghcb_version;
  185. ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
  186. ghcb_set_sw_exit_code(ghcb, exit_code);
  187. ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
  188. ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
  189. sev_es_wr_ghcb_msr(__pa(ghcb));
  190. VMGEXIT();
  191. return verify_exception_info(ghcb, ctxt);
  192. }
  193. static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
  194. {
  195. u64 val;
  196. sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
  197. VMGEXIT();
  198. val = sev_es_rd_ghcb_msr();
  199. if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
  200. return -EIO;
  201. *reg = (val >> 32);
  202. return 0;
  203. }
  204. static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
  205. {
  206. int ret;
  207. /*
  208. * MSR protocol does not support fetching non-zero subfunctions, but is
  209. * sufficient to handle current early-boot cases. Should that change,
  210. * make sure to report an error rather than ignoring the index and
  211. * grabbing random values. If this issue arises in the future, handling
  212. * can be added here to use GHCB-page protocol for cases that occur late
  213. * enough in boot that GHCB page is available.
  214. */
  215. if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
  216. return -EINVAL;
  217. ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
  218. ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
  219. ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
  220. ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
  221. return ret;
  222. }
  223. static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
  224. {
  225. u32 cr4 = native_read_cr4();
  226. int ret;
  227. ghcb_set_rax(ghcb, leaf->fn);
  228. ghcb_set_rcx(ghcb, leaf->subfn);
  229. if (cr4 & X86_CR4_OSXSAVE)
  230. /* Safe to read xcr0 */
  231. ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
  232. else
  233. /* xgetbv will cause #UD - use reset value for xcr0 */
  234. ghcb_set_xcr0(ghcb, 1);
  235. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
  236. if (ret != ES_OK)
  237. return ret;
  238. if (!(ghcb_rax_is_valid(ghcb) &&
  239. ghcb_rbx_is_valid(ghcb) &&
  240. ghcb_rcx_is_valid(ghcb) &&
  241. ghcb_rdx_is_valid(ghcb)))
  242. return ES_VMM_ERROR;
  243. leaf->eax = ghcb->save.rax;
  244. leaf->ebx = ghcb->save.rbx;
  245. leaf->ecx = ghcb->save.rcx;
  246. leaf->edx = ghcb->save.rdx;
  247. return ES_OK;
  248. }
  249. static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
  250. {
  251. return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
  252. : __sev_cpuid_hv_msr(leaf);
  253. }
  254. /*
  255. * This may be called early while still running on the initial identity
  256. * mapping. Use RIP-relative addressing to obtain the correct address
  257. * while running with the initial identity mapping as well as the
  258. * switch-over to kernel virtual addresses later.
  259. */
  260. static const struct snp_cpuid_table *snp_cpuid_get_table(void)
  261. {
  262. void *ptr;
  263. asm ("lea cpuid_table_copy(%%rip), %0"
  264. : "=r" (ptr)
  265. : "p" (&cpuid_table_copy));
  266. return ptr;
  267. }
  268. /*
  269. * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
  270. * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
  271. * and 1 based on the corresponding features enabled by a particular
  272. * combination of XCR0 and XSS registers so that a guest can look up the
  273. * version corresponding to the features currently enabled in its XCR0/XSS
  274. * registers. The only values that differ between these versions/table
  275. * entries is the enabled XSAVE area size advertised via EBX.
  276. *
  277. * While hypervisors may choose to make use of this support, it is more
  278. * robust/secure for a guest to simply find the entry corresponding to the
  279. * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
  280. * XSAVE area size using subfunctions 2 through 64, as documented in APM
  281. * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
  282. *
  283. * Since base/legacy XSAVE area size is documented as 0x240, use that value
  284. * directly rather than relying on the base size in the CPUID table.
  285. *
  286. * Return: XSAVE area size on success, 0 otherwise.
  287. */
  288. static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
  289. {
  290. const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
  291. u64 xfeatures_found = 0;
  292. u32 xsave_size = 0x240;
  293. int i;
  294. for (i = 0; i < cpuid_table->count; i++) {
  295. const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
  296. if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
  297. continue;
  298. if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
  299. continue;
  300. if (xfeatures_found & (BIT_ULL(e->ecx_in)))
  301. continue;
  302. xfeatures_found |= (BIT_ULL(e->ecx_in));
  303. if (compacted)
  304. xsave_size += e->eax;
  305. else
  306. xsave_size = max(xsave_size, e->eax + e->ebx);
  307. }
  308. /*
  309. * Either the guest set unsupported XCR0/XSS bits, or the corresponding
  310. * entries in the CPUID table were not present. This is not a valid
  311. * state to be in.
  312. */
  313. if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
  314. return 0;
  315. return xsave_size;
  316. }
  317. static bool
  318. snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
  319. {
  320. const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
  321. int i;
  322. for (i = 0; i < cpuid_table->count; i++) {
  323. const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
  324. if (e->eax_in != leaf->fn)
  325. continue;
  326. if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
  327. continue;
  328. /*
  329. * For 0xD subfunctions 0 and 1, only use the entry corresponding
  330. * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
  331. * See the comments above snp_cpuid_calc_xsave_size() for more
  332. * details.
  333. */
  334. if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
  335. if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
  336. continue;
  337. leaf->eax = e->eax;
  338. leaf->ebx = e->ebx;
  339. leaf->ecx = e->ecx;
  340. leaf->edx = e->edx;
  341. return true;
  342. }
  343. return false;
  344. }
  345. static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
  346. {
  347. if (sev_cpuid_hv(ghcb, ctxt, leaf))
  348. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
  349. }
  350. static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
  351. struct cpuid_leaf *leaf)
  352. {
  353. struct cpuid_leaf leaf_hv = *leaf;
  354. switch (leaf->fn) {
  355. case 0x1:
  356. snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
  357. /* initial APIC ID */
  358. leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
  359. /* APIC enabled bit */
  360. leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
  361. /* OSXSAVE enabled bit */
  362. if (native_read_cr4() & X86_CR4_OSXSAVE)
  363. leaf->ecx |= BIT(27);
  364. break;
  365. case 0x7:
  366. /* OSPKE enabled bit */
  367. leaf->ecx &= ~BIT(4);
  368. if (native_read_cr4() & X86_CR4_PKE)
  369. leaf->ecx |= BIT(4);
  370. break;
  371. case 0xB:
  372. leaf_hv.subfn = 0;
  373. snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
  374. /* extended APIC ID */
  375. leaf->edx = leaf_hv.edx;
  376. break;
  377. case 0xD: {
  378. bool compacted = false;
  379. u64 xcr0 = 1, xss = 0;
  380. u32 xsave_size;
  381. if (leaf->subfn != 0 && leaf->subfn != 1)
  382. return 0;
  383. if (native_read_cr4() & X86_CR4_OSXSAVE)
  384. xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
  385. if (leaf->subfn == 1) {
  386. /* Get XSS value if XSAVES is enabled. */
  387. if (leaf->eax & BIT(3)) {
  388. unsigned long lo, hi;
  389. asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
  390. : "c" (MSR_IA32_XSS));
  391. xss = (hi << 32) | lo;
  392. }
  393. /*
  394. * The PPR and APM aren't clear on what size should be
  395. * encoded in 0xD:0x1:EBX when compaction is not enabled
  396. * by either XSAVEC (feature bit 1) or XSAVES (feature
  397. * bit 3) since SNP-capable hardware has these feature
  398. * bits fixed as 1. KVM sets it to 0 in this case, but
  399. * to avoid this becoming an issue it's safer to simply
  400. * treat this as unsupported for SNP guests.
  401. */
  402. if (!(leaf->eax & (BIT(1) | BIT(3))))
  403. return -EINVAL;
  404. compacted = true;
  405. }
  406. xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
  407. if (!xsave_size)
  408. return -EINVAL;
  409. leaf->ebx = xsave_size;
  410. }
  411. break;
  412. case 0x8000001E:
  413. snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
  414. /* extended APIC ID */
  415. leaf->eax = leaf_hv.eax;
  416. /* compute ID */
  417. leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
  418. /* node ID */
  419. leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
  420. break;
  421. default:
  422. /* No fix-ups needed, use values as-is. */
  423. break;
  424. }
  425. return 0;
  426. }
  427. /*
  428. * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
  429. * should be treated as fatal by caller.
  430. */
  431. static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
  432. {
  433. const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
  434. if (!cpuid_table->count)
  435. return -EOPNOTSUPP;
  436. if (!snp_cpuid_get_validated_func(leaf)) {
  437. /*
  438. * Some hypervisors will avoid keeping track of CPUID entries
  439. * where all values are zero, since they can be handled the
  440. * same as out-of-range values (all-zero). This is useful here
  441. * as well as it allows virtually all guest configurations to
  442. * work using a single SNP CPUID table.
  443. *
  444. * To allow for this, there is a need to distinguish between
  445. * out-of-range entries and in-range zero entries, since the
  446. * CPUID table entries are only a template that may need to be
  447. * augmented with additional values for things like
  448. * CPU-specific information during post-processing. So if it's
  449. * not in the table, set the values to zero. Then, if they are
  450. * within a valid CPUID range, proceed with post-processing
  451. * using zeros as the initial values. Otherwise, skip
  452. * post-processing and just return zeros immediately.
  453. */
  454. leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
  455. /* Skip post-processing for out-of-range zero leafs. */
  456. if (!(leaf->fn <= cpuid_std_range_max ||
  457. (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
  458. (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
  459. return 0;
  460. }
  461. return snp_cpuid_postprocess(ghcb, ctxt, leaf);
  462. }
  463. /*
  464. * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
  465. * page yet, so it only supports the MSR based communication with the
  466. * hypervisor and only the CPUID exit-code.
  467. */
  468. void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
  469. {
  470. unsigned int subfn = lower_bits(regs->cx, 32);
  471. unsigned int fn = lower_bits(regs->ax, 32);
  472. struct cpuid_leaf leaf;
  473. int ret;
  474. /* Only CPUID is supported via MSR protocol */
  475. if (exit_code != SVM_EXIT_CPUID)
  476. goto fail;
  477. leaf.fn = fn;
  478. leaf.subfn = subfn;
  479. ret = snp_cpuid(NULL, NULL, &leaf);
  480. if (!ret)
  481. goto cpuid_done;
  482. if (ret != -EOPNOTSUPP)
  483. goto fail;
  484. if (__sev_cpuid_hv_msr(&leaf))
  485. goto fail;
  486. cpuid_done:
  487. regs->ax = leaf.eax;
  488. regs->bx = leaf.ebx;
  489. regs->cx = leaf.ecx;
  490. regs->dx = leaf.edx;
  491. /*
  492. * This is a VC handler and the #VC is only raised when SEV-ES is
  493. * active, which means SEV must be active too. Do sanity checks on the
  494. * CPUID results to make sure the hypervisor does not trick the kernel
  495. * into the no-sev path. This could map sensitive data unencrypted and
  496. * make it accessible to the hypervisor.
  497. *
  498. * In particular, check for:
  499. * - Availability of CPUID leaf 0x8000001f
  500. * - SEV CPUID bit.
  501. *
  502. * The hypervisor might still report the wrong C-bit position, but this
  503. * can't be checked here.
  504. */
  505. if (fn == 0x80000000 && (regs->ax < 0x8000001f))
  506. /* SEV leaf check */
  507. goto fail;
  508. else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
  509. /* SEV bit */
  510. goto fail;
  511. /* Skip over the CPUID two-byte opcode */
  512. regs->ip += 2;
  513. return;
  514. fail:
  515. /* Terminate the guest */
  516. sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
  517. }
  518. static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
  519. unsigned long address,
  520. bool write)
  521. {
  522. if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
  523. ctxt->fi.vector = X86_TRAP_PF;
  524. ctxt->fi.error_code = X86_PF_USER;
  525. ctxt->fi.cr2 = address;
  526. if (write)
  527. ctxt->fi.error_code |= X86_PF_WRITE;
  528. return ES_EXCEPTION;
  529. }
  530. return ES_OK;
  531. }
  532. static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
  533. void *src, char *buf,
  534. unsigned int data_size,
  535. unsigned int count,
  536. bool backwards)
  537. {
  538. int i, b = backwards ? -1 : 1;
  539. unsigned long address = (unsigned long)src;
  540. enum es_result ret;
  541. ret = vc_insn_string_check(ctxt, address, false);
  542. if (ret != ES_OK)
  543. return ret;
  544. for (i = 0; i < count; i++) {
  545. void *s = src + (i * data_size * b);
  546. char *d = buf + (i * data_size);
  547. ret = vc_read_mem(ctxt, s, d, data_size);
  548. if (ret != ES_OK)
  549. break;
  550. }
  551. return ret;
  552. }
  553. static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
  554. void *dst, char *buf,
  555. unsigned int data_size,
  556. unsigned int count,
  557. bool backwards)
  558. {
  559. int i, s = backwards ? -1 : 1;
  560. unsigned long address = (unsigned long)dst;
  561. enum es_result ret;
  562. ret = vc_insn_string_check(ctxt, address, true);
  563. if (ret != ES_OK)
  564. return ret;
  565. for (i = 0; i < count; i++) {
  566. void *d = dst + (i * data_size * s);
  567. char *b = buf + (i * data_size);
  568. ret = vc_write_mem(ctxt, d, b, data_size);
  569. if (ret != ES_OK)
  570. break;
  571. }
  572. return ret;
  573. }
  574. #define IOIO_TYPE_STR BIT(2)
  575. #define IOIO_TYPE_IN 1
  576. #define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
  577. #define IOIO_TYPE_OUT 0
  578. #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
  579. #define IOIO_REP BIT(3)
  580. #define IOIO_ADDR_64 BIT(9)
  581. #define IOIO_ADDR_32 BIT(8)
  582. #define IOIO_ADDR_16 BIT(7)
  583. #define IOIO_DATA_32 BIT(6)
  584. #define IOIO_DATA_16 BIT(5)
  585. #define IOIO_DATA_8 BIT(4)
  586. #define IOIO_SEG_ES (0 << 10)
  587. #define IOIO_SEG_DS (3 << 10)
  588. static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
  589. {
  590. struct insn *insn = &ctxt->insn;
  591. size_t size;
  592. u64 port;
  593. *exitinfo = 0;
  594. switch (insn->opcode.bytes[0]) {
  595. /* INS opcodes */
  596. case 0x6c:
  597. case 0x6d:
  598. *exitinfo |= IOIO_TYPE_INS;
  599. *exitinfo |= IOIO_SEG_ES;
  600. port = ctxt->regs->dx & 0xffff;
  601. break;
  602. /* OUTS opcodes */
  603. case 0x6e:
  604. case 0x6f:
  605. *exitinfo |= IOIO_TYPE_OUTS;
  606. *exitinfo |= IOIO_SEG_DS;
  607. port = ctxt->regs->dx & 0xffff;
  608. break;
  609. /* IN immediate opcodes */
  610. case 0xe4:
  611. case 0xe5:
  612. *exitinfo |= IOIO_TYPE_IN;
  613. port = (u8)insn->immediate.value & 0xffff;
  614. break;
  615. /* OUT immediate opcodes */
  616. case 0xe6:
  617. case 0xe7:
  618. *exitinfo |= IOIO_TYPE_OUT;
  619. port = (u8)insn->immediate.value & 0xffff;
  620. break;
  621. /* IN register opcodes */
  622. case 0xec:
  623. case 0xed:
  624. *exitinfo |= IOIO_TYPE_IN;
  625. port = ctxt->regs->dx & 0xffff;
  626. break;
  627. /* OUT register opcodes */
  628. case 0xee:
  629. case 0xef:
  630. *exitinfo |= IOIO_TYPE_OUT;
  631. port = ctxt->regs->dx & 0xffff;
  632. break;
  633. default:
  634. return ES_DECODE_FAILED;
  635. }
  636. *exitinfo |= port << 16;
  637. switch (insn->opcode.bytes[0]) {
  638. case 0x6c:
  639. case 0x6e:
  640. case 0xe4:
  641. case 0xe6:
  642. case 0xec:
  643. case 0xee:
  644. /* Single byte opcodes */
  645. *exitinfo |= IOIO_DATA_8;
  646. size = 1;
  647. break;
  648. default:
  649. /* Length determined by instruction parsing */
  650. *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
  651. : IOIO_DATA_32;
  652. size = (insn->opnd_bytes == 2) ? 2 : 4;
  653. }
  654. switch (insn->addr_bytes) {
  655. case 2:
  656. *exitinfo |= IOIO_ADDR_16;
  657. break;
  658. case 4:
  659. *exitinfo |= IOIO_ADDR_32;
  660. break;
  661. case 8:
  662. *exitinfo |= IOIO_ADDR_64;
  663. break;
  664. }
  665. if (insn_has_rep_prefix(insn))
  666. *exitinfo |= IOIO_REP;
  667. return vc_ioio_check(ctxt, (u16)port, size);
  668. }
  669. static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  670. {
  671. struct pt_regs *regs = ctxt->regs;
  672. u64 exit_info_1, exit_info_2;
  673. enum es_result ret;
  674. ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
  675. if (ret != ES_OK)
  676. return ret;
  677. if (exit_info_1 & IOIO_TYPE_STR) {
  678. /* (REP) INS/OUTS */
  679. bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
  680. unsigned int io_bytes, exit_bytes;
  681. unsigned int ghcb_count, op_count;
  682. unsigned long es_base;
  683. u64 sw_scratch;
  684. /*
  685. * For the string variants with rep prefix the amount of in/out
  686. * operations per #VC exception is limited so that the kernel
  687. * has a chance to take interrupts and re-schedule while the
  688. * instruction is emulated.
  689. */
  690. io_bytes = (exit_info_1 >> 4) & 0x7;
  691. ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
  692. op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
  693. exit_info_2 = min(op_count, ghcb_count);
  694. exit_bytes = exit_info_2 * io_bytes;
  695. es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
  696. /* Read bytes of OUTS into the shared buffer */
  697. if (!(exit_info_1 & IOIO_TYPE_IN)) {
  698. ret = vc_insn_string_read(ctxt,
  699. (void *)(es_base + regs->si),
  700. ghcb->shared_buffer, io_bytes,
  701. exit_info_2, df);
  702. if (ret)
  703. return ret;
  704. }
  705. /*
  706. * Issue an VMGEXIT to the HV to consume the bytes from the
  707. * shared buffer or to have it write them into the shared buffer
  708. * depending on the instruction: OUTS or INS.
  709. */
  710. sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
  711. ghcb_set_sw_scratch(ghcb, sw_scratch);
  712. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
  713. exit_info_1, exit_info_2);
  714. if (ret != ES_OK)
  715. return ret;
  716. /* Read bytes from shared buffer into the guest's destination. */
  717. if (exit_info_1 & IOIO_TYPE_IN) {
  718. ret = vc_insn_string_write(ctxt,
  719. (void *)(es_base + regs->di),
  720. ghcb->shared_buffer, io_bytes,
  721. exit_info_2, df);
  722. if (ret)
  723. return ret;
  724. if (df)
  725. regs->di -= exit_bytes;
  726. else
  727. regs->di += exit_bytes;
  728. } else {
  729. if (df)
  730. regs->si -= exit_bytes;
  731. else
  732. regs->si += exit_bytes;
  733. }
  734. if (exit_info_1 & IOIO_REP)
  735. regs->cx -= exit_info_2;
  736. ret = regs->cx ? ES_RETRY : ES_OK;
  737. } else {
  738. /* IN/OUT into/from rAX */
  739. int bits = (exit_info_1 & 0x70) >> 1;
  740. u64 rax = 0;
  741. if (!(exit_info_1 & IOIO_TYPE_IN))
  742. rax = lower_bits(regs->ax, bits);
  743. ghcb_set_rax(ghcb, rax);
  744. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
  745. if (ret != ES_OK)
  746. return ret;
  747. if (exit_info_1 & IOIO_TYPE_IN) {
  748. if (!ghcb_rax_is_valid(ghcb))
  749. return ES_VMM_ERROR;
  750. regs->ax = lower_bits(ghcb->save.rax, bits);
  751. }
  752. }
  753. return ret;
  754. }
  755. static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
  756. {
  757. struct pt_regs *regs = ctxt->regs;
  758. struct cpuid_leaf leaf;
  759. int ret;
  760. leaf.fn = regs->ax;
  761. leaf.subfn = regs->cx;
  762. ret = snp_cpuid(ghcb, ctxt, &leaf);
  763. if (!ret) {
  764. regs->ax = leaf.eax;
  765. regs->bx = leaf.ebx;
  766. regs->cx = leaf.ecx;
  767. regs->dx = leaf.edx;
  768. }
  769. return ret;
  770. }
  771. static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
  772. struct es_em_ctxt *ctxt)
  773. {
  774. struct pt_regs *regs = ctxt->regs;
  775. u32 cr4 = native_read_cr4();
  776. enum es_result ret;
  777. int snp_cpuid_ret;
  778. snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
  779. if (!snp_cpuid_ret)
  780. return ES_OK;
  781. if (snp_cpuid_ret != -EOPNOTSUPP)
  782. return ES_VMM_ERROR;
  783. ghcb_set_rax(ghcb, regs->ax);
  784. ghcb_set_rcx(ghcb, regs->cx);
  785. if (cr4 & X86_CR4_OSXSAVE)
  786. /* Safe to read xcr0 */
  787. ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
  788. else
  789. /* xgetbv will cause #GP - use reset value for xcr0 */
  790. ghcb_set_xcr0(ghcb, 1);
  791. ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
  792. if (ret != ES_OK)
  793. return ret;
  794. if (!(ghcb_rax_is_valid(ghcb) &&
  795. ghcb_rbx_is_valid(ghcb) &&
  796. ghcb_rcx_is_valid(ghcb) &&
  797. ghcb_rdx_is_valid(ghcb)))
  798. return ES_VMM_ERROR;
  799. regs->ax = ghcb->save.rax;
  800. regs->bx = ghcb->save.rbx;
  801. regs->cx = ghcb->save.rcx;
  802. regs->dx = ghcb->save.rdx;
  803. return ES_OK;
  804. }
  805. static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
  806. struct es_em_ctxt *ctxt,
  807. unsigned long exit_code)
  808. {
  809. bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
  810. enum es_result ret;
  811. ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
  812. if (ret != ES_OK)
  813. return ret;
  814. if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
  815. (!rdtscp || ghcb_rcx_is_valid(ghcb))))
  816. return ES_VMM_ERROR;
  817. ctxt->regs->ax = ghcb->save.rax;
  818. ctxt->regs->dx = ghcb->save.rdx;
  819. if (rdtscp)
  820. ctxt->regs->cx = ghcb->save.rcx;
  821. return ES_OK;
  822. }
  823. struct cc_setup_data {
  824. struct setup_data header;
  825. u32 cc_blob_address;
  826. };
  827. /*
  828. * Search for a Confidential Computing blob passed in as a setup_data entry
  829. * via the Linux Boot Protocol.
  830. */
  831. static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
  832. {
  833. struct cc_setup_data *sd = NULL;
  834. struct setup_data *hdr;
  835. hdr = (struct setup_data *)bp->hdr.setup_data;
  836. while (hdr) {
  837. if (hdr->type == SETUP_CC_BLOB) {
  838. sd = (struct cc_setup_data *)hdr;
  839. return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
  840. }
  841. hdr = (struct setup_data *)hdr->next;
  842. }
  843. return NULL;
  844. }
  845. /*
  846. * Initialize the kernel's copy of the SNP CPUID table, and set up the
  847. * pointer that will be used to access it.
  848. *
  849. * Maintaining a direct mapping of the SNP CPUID table used by firmware would
  850. * be possible as an alternative, but the approach is brittle since the
  851. * mapping needs to be updated in sync with all the changes to virtual memory
  852. * layout and related mapping facilities throughout the boot process.
  853. */
  854. static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
  855. {
  856. const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
  857. int i;
  858. if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
  859. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
  860. cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
  861. if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
  862. sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
  863. cpuid_table = snp_cpuid_get_table();
  864. memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
  865. /* Initialize CPUID ranges for range-checking. */
  866. for (i = 0; i < cpuid_table->count; i++) {
  867. const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
  868. if (fn->eax_in == 0x0)
  869. cpuid_std_range_max = fn->eax;
  870. else if (fn->eax_in == 0x40000000)
  871. cpuid_hyp_range_max = fn->eax;
  872. else if (fn->eax_in == 0x80000000)
  873. cpuid_ext_range_max = fn->eax;
  874. }
  875. }