head_64.S 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * linux/boot/head.S
  4. *
  5. * Copyright (C) 1991, 1992, 1993 Linus Torvalds
  6. */
  7. /*
  8. * head.S contains the 32-bit startup code.
  9. *
  10. * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
  11. * the page directory will exist. The startup code will be overwritten by
  12. * the page directory. [According to comments etc elsewhere on a compressed
  13. * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
  14. *
  15. * Page 0 is deliberately kept safe, since System Management Mode code in
  16. * laptops may need to access the BIOS data stored there. This is also
  17. * useful for future device drivers that either access the BIOS via VM86
  18. * mode.
  19. */
  20. /*
  21. * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  22. */
  23. .code32
  24. .text
  25. #include <linux/init.h>
  26. #include <linux/linkage.h>
  27. #include <asm/segment.h>
  28. #include <asm/boot.h>
  29. #include <asm/msr.h>
  30. #include <asm/processor-flags.h>
  31. #include <asm/asm-offsets.h>
  32. #include <asm/bootparam.h>
  33. #include <asm/desc_defs.h>
  34. #include <asm/trapnr.h>
  35. #include "pgtable.h"
  36. /*
  37. * Locally defined symbols should be marked hidden:
  38. */
  39. .hidden _bss
  40. .hidden _ebss
  41. .hidden _end
  42. __HEAD
  43. /*
  44. * This macro gives the relative virtual address of X, i.e. the offset of X
  45. * from startup_32. This is the same as the link-time virtual address of X,
  46. * since startup_32 is at 0, but defining it this way tells the
  47. * assembler/linker that we do not want the actual run-time address of X. This
  48. * prevents the linker from trying to create unwanted run-time relocation
  49. * entries for the reference when the compressed kernel is linked as PIE.
  50. *
  51. * A reference X(%reg) will result in the link-time VA of X being stored with
  52. * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
  53. * adds the 64-bit base address where the kernel is loaded.
  54. *
  55. * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
  56. * and no run-time relocation.
  57. *
  58. * The macro should be used as a displacement with a base register containing
  59. * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
  60. * [$ rva(X)].
  61. *
  62. * This macro can only be used from within the .head.text section, since the
  63. * expression requires startup_32 to be in the same section as the code being
  64. * assembled.
  65. */
  66. #define rva(X) ((X) - startup_32)
  67. .code32
  68. SYM_FUNC_START(startup_32)
  69. /*
  70. * 32bit entry is 0 and it is ABI so immutable!
  71. * If we come here directly from a bootloader,
  72. * kernel(text+data+bss+brk) ramdisk, zero_page, command line
  73. * all need to be under the 4G limit.
  74. */
  75. cld
  76. cli
  77. /*
  78. * Calculate the delta between where we were compiled to run
  79. * at and where we were actually loaded at. This can only be done
  80. * with a short local call on x86. Nothing else will tell us what
  81. * address we are running at. The reserved chunk of the real-mode
  82. * data at 0x1e4 (defined as a scratch field) are used as the stack
  83. * for this calculation. Only 4 bytes are needed.
  84. */
  85. leal (BP_scratch+4)(%esi), %esp
  86. call 1f
  87. 1: popl %ebp
  88. subl $ rva(1b), %ebp
  89. /* Load new GDT with the 64bit segments using 32bit descriptor */
  90. leal rva(gdt)(%ebp), %eax
  91. movl %eax, 2(%eax)
  92. lgdt (%eax)
  93. /* Load segment registers with our descriptors */
  94. movl $__BOOT_DS, %eax
  95. movl %eax, %ds
  96. movl %eax, %es
  97. movl %eax, %fs
  98. movl %eax, %gs
  99. movl %eax, %ss
  100. /* Setup a stack and load CS from current GDT */
  101. leal rva(boot_stack_end)(%ebp), %esp
  102. pushl $__KERNEL32_CS
  103. leal rva(1f)(%ebp), %eax
  104. pushl %eax
  105. lretl
  106. 1:
  107. /* Setup Exception handling for SEV-ES */
  108. call startup32_load_idt
  109. /* Make sure cpu supports long mode. */
  110. call verify_cpu
  111. testl %eax, %eax
  112. jnz .Lno_longmode
  113. /*
  114. * Compute the delta between where we were compiled to run at
  115. * and where the code will actually run at.
  116. *
  117. * %ebp contains the address we are loaded at by the boot loader and %ebx
  118. * contains the address where we should move the kernel image temporarily
  119. * for safe in-place decompression.
  120. */
  121. #ifdef CONFIG_RELOCATABLE
  122. movl %ebp, %ebx
  123. #ifdef CONFIG_EFI_STUB
  124. /*
  125. * If we were loaded via the EFI LoadImage service, startup_32 will be at an
  126. * offset to the start of the space allocated for the image. efi_pe_entry will
  127. * set up image_offset to tell us where the image actually starts, so that we
  128. * can use the full available buffer.
  129. * image_offset = startup_32 - image_base
  130. * Otherwise image_offset will be zero and has no effect on the calculations.
  131. */
  132. subl rva(image_offset)(%ebp), %ebx
  133. #endif
  134. movl BP_kernel_alignment(%esi), %eax
  135. decl %eax
  136. addl %eax, %ebx
  137. notl %eax
  138. andl %eax, %ebx
  139. cmpl $LOAD_PHYSICAL_ADDR, %ebx
  140. jae 1f
  141. #endif
  142. movl $LOAD_PHYSICAL_ADDR, %ebx
  143. 1:
  144. /* Target address to relocate to for decompression */
  145. addl BP_init_size(%esi), %ebx
  146. subl $ rva(_end), %ebx
  147. /*
  148. * Prepare for entering 64 bit mode
  149. */
  150. /* Enable PAE mode */
  151. movl %cr4, %eax
  152. orl $X86_CR4_PAE, %eax
  153. movl %eax, %cr4
  154. /*
  155. * Build early 4G boot pagetable
  156. */
  157. /*
  158. * If SEV is active then set the encryption mask in the page tables.
  159. * This will insure that when the kernel is copied and decompressed
  160. * it will be done so encrypted.
  161. */
  162. call get_sev_encryption_bit
  163. xorl %edx, %edx
  164. #ifdef CONFIG_AMD_MEM_ENCRYPT
  165. testl %eax, %eax
  166. jz 1f
  167. subl $32, %eax /* Encryption bit is always above bit 31 */
  168. bts %eax, %edx /* Set encryption mask for page tables */
  169. /*
  170. * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
  171. * startup32_check_sev_cbit() will do a check. sev_enable() will
  172. * initialize sev_status with all the bits reported by
  173. * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
  174. * needs to be set for now.
  175. */
  176. movl $1, rva(sev_status)(%ebp)
  177. 1:
  178. #endif
  179. /* Initialize Page tables to 0 */
  180. leal rva(pgtable)(%ebx), %edi
  181. xorl %eax, %eax
  182. movl $(BOOT_INIT_PGT_SIZE/4), %ecx
  183. rep stosl
  184. /* Build Level 4 */
  185. leal rva(pgtable + 0)(%ebx), %edi
  186. leal 0x1007 (%edi), %eax
  187. movl %eax, 0(%edi)
  188. addl %edx, 4(%edi)
  189. /* Build Level 3 */
  190. leal rva(pgtable + 0x1000)(%ebx), %edi
  191. leal 0x1007(%edi), %eax
  192. movl $4, %ecx
  193. 1: movl %eax, 0x00(%edi)
  194. addl %edx, 0x04(%edi)
  195. addl $0x00001000, %eax
  196. addl $8, %edi
  197. decl %ecx
  198. jnz 1b
  199. /* Build Level 2 */
  200. leal rva(pgtable + 0x2000)(%ebx), %edi
  201. movl $0x00000183, %eax
  202. movl $2048, %ecx
  203. 1: movl %eax, 0(%edi)
  204. addl %edx, 4(%edi)
  205. addl $0x00200000, %eax
  206. addl $8, %edi
  207. decl %ecx
  208. jnz 1b
  209. /* Enable the boot page tables */
  210. leal rva(pgtable)(%ebx), %eax
  211. movl %eax, %cr3
  212. /* Enable Long mode in EFER (Extended Feature Enable Register) */
  213. movl $MSR_EFER, %ecx
  214. rdmsr
  215. btsl $_EFER_LME, %eax
  216. wrmsr
  217. /* After gdt is loaded */
  218. xorl %eax, %eax
  219. lldt %ax
  220. movl $__BOOT_TSS, %eax
  221. ltr %ax
  222. /*
  223. * Setup for the jump to 64bit mode
  224. *
  225. * When the jump is performed we will be in long mode but
  226. * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
  227. * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
  228. * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
  229. * We place all of the values on our mini stack so lret can
  230. * used to perform that far jump.
  231. */
  232. leal rva(startup_64)(%ebp), %eax
  233. #ifdef CONFIG_EFI_MIXED
  234. movl rva(efi32_boot_args)(%ebp), %edi
  235. testl %edi, %edi
  236. jz 1f
  237. leal rva(efi64_stub_entry)(%ebp), %eax
  238. movl rva(efi32_boot_args+4)(%ebp), %esi
  239. movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
  240. testl %edx, %edx
  241. jnz 1f
  242. /*
  243. * efi_pe_entry uses MS calling convention, which requires 32 bytes of
  244. * shadow space on the stack even if all arguments are passed in
  245. * registers. We also need an additional 8 bytes for the space that
  246. * would be occupied by the return address, and this also results in
  247. * the correct stack alignment for entry.
  248. */
  249. subl $40, %esp
  250. leal rva(efi_pe_entry)(%ebp), %eax
  251. movl %edi, %ecx // MS calling convention
  252. movl %esi, %edx
  253. 1:
  254. #endif
  255. /* Check if the C-bit position is correct when SEV is active */
  256. call startup32_check_sev_cbit
  257. pushl $__KERNEL_CS
  258. pushl %eax
  259. /* Enter paged protected Mode, activating Long Mode */
  260. movl $CR0_STATE, %eax
  261. movl %eax, %cr0
  262. /* Jump from 32bit compatibility mode into 64bit mode. */
  263. lret
  264. SYM_FUNC_END(startup_32)
  265. #ifdef CONFIG_EFI_MIXED
  266. .org 0x190
  267. SYM_FUNC_START(efi32_stub_entry)
  268. add $0x4, %esp /* Discard return address */
  269. popl %ecx
  270. popl %edx
  271. popl %esi
  272. call 1f
  273. 1: pop %ebp
  274. subl $ rva(1b), %ebp
  275. movl %esi, rva(efi32_boot_args+8)(%ebp)
  276. SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
  277. movl %ecx, rva(efi32_boot_args)(%ebp)
  278. movl %edx, rva(efi32_boot_args+4)(%ebp)
  279. movb $0, rva(efi_is64)(%ebp)
  280. /* Save firmware GDTR and code/data selectors */
  281. sgdtl rva(efi32_boot_gdt)(%ebp)
  282. movw %cs, rva(efi32_boot_cs)(%ebp)
  283. movw %ds, rva(efi32_boot_ds)(%ebp)
  284. /* Store firmware IDT descriptor */
  285. sidtl rva(efi32_boot_idt)(%ebp)
  286. /* Disable paging */
  287. movl %cr0, %eax
  288. btrl $X86_CR0_PG_BIT, %eax
  289. movl %eax, %cr0
  290. jmp startup_32
  291. SYM_FUNC_END(efi32_stub_entry)
  292. #endif
  293. .code64
  294. .org 0x200
  295. SYM_CODE_START(startup_64)
  296. /*
  297. * 64bit entry is 0x200 and it is ABI so immutable!
  298. * We come here either from startup_32 or directly from a
  299. * 64bit bootloader.
  300. * If we come here from a bootloader, kernel(text+data+bss+brk),
  301. * ramdisk, zero_page, command line could be above 4G.
  302. * We depend on an identity mapped page table being provided
  303. * that maps our entire kernel(text+data+bss+brk), zero page
  304. * and command line.
  305. */
  306. cld
  307. cli
  308. /* Setup data segments. */
  309. xorl %eax, %eax
  310. movl %eax, %ds
  311. movl %eax, %es
  312. movl %eax, %ss
  313. movl %eax, %fs
  314. movl %eax, %gs
  315. /*
  316. * Compute the decompressed kernel start address. It is where
  317. * we were loaded at aligned to a 2M boundary. %rbp contains the
  318. * decompressed kernel start address.
  319. *
  320. * If it is a relocatable kernel then decompress and run the kernel
  321. * from load address aligned to 2MB addr, otherwise decompress and
  322. * run the kernel from LOAD_PHYSICAL_ADDR
  323. *
  324. * We cannot rely on the calculation done in 32-bit mode, since we
  325. * may have been invoked via the 64-bit entry point.
  326. */
  327. /* Start with the delta to where the kernel will run at. */
  328. #ifdef CONFIG_RELOCATABLE
  329. leaq startup_32(%rip) /* - $startup_32 */, %rbp
  330. #ifdef CONFIG_EFI_STUB
  331. /*
  332. * If we were loaded via the EFI LoadImage service, startup_32 will be at an
  333. * offset to the start of the space allocated for the image. efi_pe_entry will
  334. * set up image_offset to tell us where the image actually starts, so that we
  335. * can use the full available buffer.
  336. * image_offset = startup_32 - image_base
  337. * Otherwise image_offset will be zero and has no effect on the calculations.
  338. */
  339. movl image_offset(%rip), %eax
  340. subq %rax, %rbp
  341. #endif
  342. movl BP_kernel_alignment(%rsi), %eax
  343. decl %eax
  344. addq %rax, %rbp
  345. notq %rax
  346. andq %rax, %rbp
  347. cmpq $LOAD_PHYSICAL_ADDR, %rbp
  348. jae 1f
  349. #endif
  350. movq $LOAD_PHYSICAL_ADDR, %rbp
  351. 1:
  352. /* Target address to relocate to for decompression */
  353. movl BP_init_size(%rsi), %ebx
  354. subl $ rva(_end), %ebx
  355. addq %rbp, %rbx
  356. /* Set up the stack */
  357. leaq rva(boot_stack_end)(%rbx), %rsp
  358. /*
  359. * At this point we are in long mode with 4-level paging enabled,
  360. * but we might want to enable 5-level paging or vice versa.
  361. *
  362. * The problem is that we cannot do it directly. Setting or clearing
  363. * CR4.LA57 in long mode would trigger #GP. So we need to switch off
  364. * long mode and paging first.
  365. *
  366. * We also need a trampoline in lower memory to switch over from
  367. * 4- to 5-level paging for cases when the bootloader puts the kernel
  368. * above 4G, but didn't enable 5-level paging for us.
  369. *
  370. * The same trampoline can be used to switch from 5- to 4-level paging
  371. * mode, like when starting 4-level paging kernel via kexec() when
  372. * original kernel worked in 5-level paging mode.
  373. *
  374. * For the trampoline, we need the top page table to reside in lower
  375. * memory as we don't have a way to load 64-bit values into CR3 in
  376. * 32-bit mode.
  377. *
  378. * We go though the trampoline even if we don't have to: if we're
  379. * already in a desired paging mode. This way the trampoline code gets
  380. * tested on every boot.
  381. */
  382. /* Make sure we have GDT with 32-bit code segment */
  383. leaq gdt64(%rip), %rax
  384. addq %rax, 2(%rax)
  385. lgdt (%rax)
  386. /* Reload CS so IRET returns to a CS actually in the GDT */
  387. pushq $__KERNEL_CS
  388. leaq .Lon_kernel_cs(%rip), %rax
  389. pushq %rax
  390. lretq
  391. .Lon_kernel_cs:
  392. pushq %rsi
  393. call load_stage1_idt
  394. popq %rsi
  395. #ifdef CONFIG_AMD_MEM_ENCRYPT
  396. /*
  397. * Now that the stage1 interrupt handlers are set up, #VC exceptions from
  398. * CPUID instructions can be properly handled for SEV-ES guests.
  399. *
  400. * For SEV-SNP, the CPUID table also needs to be set up in advance of any
  401. * CPUID instructions being issued, so go ahead and do that now via
  402. * sev_enable(), which will also handle the rest of the SEV-related
  403. * detection/setup to ensure that has been done in advance of any dependent
  404. * code.
  405. */
  406. pushq %rsi
  407. movq %rsi, %rdi /* real mode address */
  408. call sev_enable
  409. popq %rsi
  410. #endif
  411. /*
  412. * paging_prepare() sets up the trampoline and checks if we need to
  413. * enable 5-level paging.
  414. *
  415. * paging_prepare() returns a two-quadword structure which lands
  416. * into RDX:RAX:
  417. * - Address of the trampoline is returned in RAX.
  418. * - Non zero RDX means trampoline needs to enable 5-level
  419. * paging.
  420. *
  421. * RSI holds real mode data and needs to be preserved across
  422. * this function call.
  423. */
  424. pushq %rsi
  425. movq %rsi, %rdi /* real mode address */
  426. call paging_prepare
  427. popq %rsi
  428. /* Save the trampoline address in RCX */
  429. movq %rax, %rcx
  430. /* Set up 32-bit addressable stack */
  431. leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
  432. /*
  433. * Preserve live 64-bit registers on the stack: this is necessary
  434. * because the architecture does not guarantee that GPRs will retain
  435. * their full 64-bit values across a 32-bit mode switch.
  436. */
  437. pushq %rbp
  438. pushq %rbx
  439. pushq %rsi
  440. /*
  441. * Push the 64-bit address of trampoline_return() onto the new stack.
  442. * It will be used by the trampoline to return to the main code. Due to
  443. * the 32-bit mode switch, it cannot be kept it in a register either.
  444. */
  445. leaq trampoline_return(%rip), %rdi
  446. pushq %rdi
  447. /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
  448. pushq $__KERNEL32_CS
  449. leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
  450. pushq %rax
  451. lretq
  452. trampoline_return:
  453. /* Restore live 64-bit registers */
  454. popq %rsi
  455. popq %rbx
  456. popq %rbp
  457. /* Restore the stack, the 32-bit trampoline uses its own stack */
  458. leaq rva(boot_stack_end)(%rbx), %rsp
  459. /*
  460. * cleanup_trampoline() would restore trampoline memory.
  461. *
  462. * RDI is address of the page table to use instead of page table
  463. * in trampoline memory (if required).
  464. *
  465. * RSI holds real mode data and needs to be preserved across
  466. * this function call.
  467. */
  468. pushq %rsi
  469. leaq rva(top_pgtable)(%rbx), %rdi
  470. call cleanup_trampoline
  471. popq %rsi
  472. /* Zero EFLAGS */
  473. pushq $0
  474. popfq
  475. /*
  476. * Copy the compressed kernel to the end of our buffer
  477. * where decompression in place becomes safe.
  478. */
  479. pushq %rsi
  480. leaq (_bss-8)(%rip), %rsi
  481. leaq rva(_bss-8)(%rbx), %rdi
  482. movl $(_bss - startup_32), %ecx
  483. shrl $3, %ecx
  484. std
  485. rep movsq
  486. cld
  487. popq %rsi
  488. /*
  489. * The GDT may get overwritten either during the copy we just did or
  490. * during extract_kernel below. To avoid any issues, repoint the GDTR
  491. * to the new copy of the GDT.
  492. */
  493. leaq rva(gdt64)(%rbx), %rax
  494. leaq rva(gdt)(%rbx), %rdx
  495. movq %rdx, 2(%rax)
  496. lgdt (%rax)
  497. /*
  498. * Jump to the relocated address.
  499. */
  500. leaq rva(.Lrelocated)(%rbx), %rax
  501. jmp *%rax
  502. SYM_CODE_END(startup_64)
  503. #ifdef CONFIG_EFI_STUB
  504. .org 0x390
  505. SYM_FUNC_START(efi64_stub_entry)
  506. and $~0xf, %rsp /* realign the stack */
  507. movq %rdx, %rbx /* save boot_params pointer */
  508. call efi_main
  509. movq %rbx,%rsi
  510. leaq rva(startup_64)(%rax), %rax
  511. jmp *%rax
  512. SYM_FUNC_END(efi64_stub_entry)
  513. SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
  514. #endif
  515. .text
  516. SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
  517. /*
  518. * Clear BSS (stack is currently empty)
  519. */
  520. xorl %eax, %eax
  521. leaq _bss(%rip), %rdi
  522. leaq _ebss(%rip), %rcx
  523. subq %rdi, %rcx
  524. shrq $3, %rcx
  525. rep stosq
  526. pushq %rsi
  527. call load_stage2_idt
  528. /* Pass boot_params to initialize_identity_maps() */
  529. movq (%rsp), %rdi
  530. call initialize_identity_maps
  531. popq %rsi
  532. /*
  533. * Do the extraction, and jump to the new kernel..
  534. */
  535. pushq %rsi /* Save the real mode argument */
  536. movq %rsi, %rdi /* real mode address */
  537. leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
  538. leaq input_data(%rip), %rdx /* input_data */
  539. movl input_len(%rip), %ecx /* input_len */
  540. movq %rbp, %r8 /* output target address */
  541. movl output_len(%rip), %r9d /* decompressed length, end of relocs */
  542. call extract_kernel /* returns kernel location in %rax */
  543. popq %rsi
  544. /*
  545. * Jump to the decompressed kernel.
  546. */
  547. jmp *%rax
  548. SYM_FUNC_END(.Lrelocated)
  549. .code32
  550. /*
  551. * This is the 32-bit trampoline that will be copied over to low memory.
  552. *
  553. * Return address is at the top of the stack (might be above 4G).
  554. * ECX contains the base address of the trampoline memory.
  555. * Non zero RDX means trampoline needs to enable 5-level paging.
  556. */
  557. SYM_CODE_START(trampoline_32bit_src)
  558. /* Set up data and stack segments */
  559. movl $__KERNEL_DS, %eax
  560. movl %eax, %ds
  561. movl %eax, %ss
  562. /* Disable paging */
  563. movl %cr0, %eax
  564. btrl $X86_CR0_PG_BIT, %eax
  565. movl %eax, %cr0
  566. /* Check what paging mode we want to be in after the trampoline */
  567. testl %edx, %edx
  568. jz 1f
  569. /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
  570. movl %cr4, %eax
  571. testl $X86_CR4_LA57, %eax
  572. jnz 3f
  573. jmp 2f
  574. 1:
  575. /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
  576. movl %cr4, %eax
  577. testl $X86_CR4_LA57, %eax
  578. jz 3f
  579. 2:
  580. /* Point CR3 to the trampoline's new top level page table */
  581. leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
  582. movl %eax, %cr3
  583. 3:
  584. /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
  585. pushl %ecx
  586. pushl %edx
  587. movl $MSR_EFER, %ecx
  588. rdmsr
  589. btsl $_EFER_LME, %eax
  590. /* Avoid writing EFER if no change was made (for TDX guest) */
  591. jc 1f
  592. wrmsr
  593. 1: popl %edx
  594. popl %ecx
  595. #ifdef CONFIG_X86_MCE
  596. /*
  597. * Preserve CR4.MCE if the kernel will enable #MC support.
  598. * Clearing MCE may fault in some environments (that also force #MC
  599. * support). Any machine check that occurs before #MC support is fully
  600. * configured will crash the system regardless of the CR4.MCE value set
  601. * here.
  602. */
  603. movl %cr4, %eax
  604. andl $X86_CR4_MCE, %eax
  605. #else
  606. movl $0, %eax
  607. #endif
  608. /* Enable PAE and LA57 (if required) paging modes */
  609. orl $X86_CR4_PAE, %eax
  610. testl %edx, %edx
  611. jz 1f
  612. orl $X86_CR4_LA57, %eax
  613. 1:
  614. movl %eax, %cr4
  615. /* Calculate address of paging_enabled() once we are executing in the trampoline */
  616. leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
  617. /* Prepare the stack for far return to Long Mode */
  618. pushl $__KERNEL_CS
  619. pushl %eax
  620. /* Enable paging again. */
  621. movl %cr0, %eax
  622. btsl $X86_CR0_PG_BIT, %eax
  623. movl %eax, %cr0
  624. lret
  625. SYM_CODE_END(trampoline_32bit_src)
  626. .code64
  627. SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
  628. /* Return from the trampoline */
  629. retq
  630. SYM_FUNC_END(.Lpaging_enabled)
  631. /*
  632. * The trampoline code has a size limit.
  633. * Make sure we fail to compile if the trampoline code grows
  634. * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
  635. */
  636. .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
  637. .code32
  638. SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
  639. /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
  640. 1:
  641. hlt
  642. jmp 1b
  643. SYM_FUNC_END(.Lno_longmode)
  644. #include "../../kernel/verify_cpu.S"
  645. .data
  646. SYM_DATA_START_LOCAL(gdt64)
  647. .word gdt_end - gdt - 1
  648. .quad gdt - gdt64
  649. SYM_DATA_END(gdt64)
  650. .balign 8
  651. SYM_DATA_START_LOCAL(gdt)
  652. .word gdt_end - gdt - 1
  653. .long 0
  654. .word 0
  655. .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
  656. .quad 0x00af9a000000ffff /* __KERNEL_CS */
  657. .quad 0x00cf92000000ffff /* __KERNEL_DS */
  658. .quad 0x0080890000000000 /* TS descriptor */
  659. .quad 0x0000000000000000 /* TS continued */
  660. SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
  661. SYM_DATA_START(boot_idt_desc)
  662. .word boot_idt_end - boot_idt - 1
  663. .quad 0
  664. SYM_DATA_END(boot_idt_desc)
  665. .balign 8
  666. SYM_DATA_START(boot_idt)
  667. .rept BOOT_IDT_ENTRIES
  668. .quad 0
  669. .quad 0
  670. .endr
  671. SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
  672. #ifdef CONFIG_AMD_MEM_ENCRYPT
  673. SYM_DATA_START(boot32_idt_desc)
  674. .word boot32_idt_end - boot32_idt - 1
  675. .long 0
  676. SYM_DATA_END(boot32_idt_desc)
  677. .balign 8
  678. SYM_DATA_START(boot32_idt)
  679. .rept 32
  680. .quad 0
  681. .endr
  682. SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
  683. #endif
  684. #ifdef CONFIG_EFI_STUB
  685. SYM_DATA(image_offset, .long 0)
  686. #endif
  687. #ifdef CONFIG_EFI_MIXED
  688. SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
  689. SYM_DATA(efi_is64, .byte 1)
  690. #define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
  691. #define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
  692. #define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
  693. __HEAD
  694. .code32
  695. SYM_FUNC_START(efi32_pe_entry)
  696. /*
  697. * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
  698. * efi_system_table_32_t *sys_table)
  699. */
  700. pushl %ebp
  701. movl %esp, %ebp
  702. pushl %eax // dummy push to allocate loaded_image
  703. pushl %ebx // save callee-save registers
  704. pushl %edi
  705. call verify_cpu // check for long mode support
  706. testl %eax, %eax
  707. movl $0x80000003, %eax // EFI_UNSUPPORTED
  708. jnz 2f
  709. call 1f
  710. 1: pop %ebx
  711. subl $ rva(1b), %ebx
  712. /* Get the loaded image protocol pointer from the image handle */
  713. leal -4(%ebp), %eax
  714. pushl %eax // &loaded_image
  715. leal rva(loaded_image_proto)(%ebx), %eax
  716. pushl %eax // pass the GUID address
  717. pushl 8(%ebp) // pass the image handle
  718. /*
  719. * Note the alignment of the stack frame.
  720. * sys_table
  721. * handle <-- 16-byte aligned on entry by ABI
  722. * return address
  723. * frame pointer
  724. * loaded_image <-- local variable
  725. * saved %ebx <-- 16-byte aligned here
  726. * saved %edi
  727. * &loaded_image
  728. * &loaded_image_proto
  729. * handle <-- 16-byte aligned for call to handle_protocol
  730. */
  731. movl 12(%ebp), %eax // sys_table
  732. movl ST32_boottime(%eax), %eax // sys_table->boottime
  733. call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
  734. addl $12, %esp // restore argument space
  735. testl %eax, %eax
  736. jnz 2f
  737. movl 8(%ebp), %ecx // image_handle
  738. movl 12(%ebp), %edx // sys_table
  739. movl -4(%ebp), %esi // loaded_image
  740. movl LI32_image_base(%esi), %esi // loaded_image->image_base
  741. movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
  742. /*
  743. * We need to set the image_offset variable here since startup_32() will
  744. * use it before we get to the 64-bit efi_pe_entry() in C code.
  745. */
  746. subl %esi, %ebx
  747. movl %ebx, rva(image_offset)(%ebp) // save image_offset
  748. jmp efi32_pe_stub_entry
  749. 2: popl %edi // restore callee-save registers
  750. popl %ebx
  751. leave
  752. RET
  753. SYM_FUNC_END(efi32_pe_entry)
  754. .section ".rodata"
  755. /* EFI loaded image protocol GUID */
  756. .balign 4
  757. SYM_DATA_START_LOCAL(loaded_image_proto)
  758. .long 0x5b1b31a1
  759. .word 0x9562, 0x11d2
  760. .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
  761. SYM_DATA_END(loaded_image_proto)
  762. #endif
  763. #ifdef CONFIG_AMD_MEM_ENCRYPT
  764. __HEAD
  765. .code32
  766. /*
  767. * Write an IDT entry into boot32_idt
  768. *
  769. * Parameters:
  770. *
  771. * %eax: Handler address
  772. * %edx: Vector number
  773. *
  774. * Physical offset is expected in %ebp
  775. */
  776. SYM_FUNC_START(startup32_set_idt_entry)
  777. push %ebx
  778. push %ecx
  779. /* IDT entry address to %ebx */
  780. leal rva(boot32_idt)(%ebp), %ebx
  781. shl $3, %edx
  782. addl %edx, %ebx
  783. /* Build IDT entry, lower 4 bytes */
  784. movl %eax, %edx
  785. andl $0x0000ffff, %edx # Target code segment offset [15:0]
  786. movl $__KERNEL32_CS, %ecx # Target code segment selector
  787. shl $16, %ecx
  788. orl %ecx, %edx
  789. /* Store lower 4 bytes to IDT */
  790. movl %edx, (%ebx)
  791. /* Build IDT entry, upper 4 bytes */
  792. movl %eax, %edx
  793. andl $0xffff0000, %edx # Target code segment offset [31:16]
  794. orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
  795. /* Store upper 4 bytes to IDT */
  796. movl %edx, 4(%ebx)
  797. pop %ecx
  798. pop %ebx
  799. RET
  800. SYM_FUNC_END(startup32_set_idt_entry)
  801. #endif
  802. SYM_FUNC_START(startup32_load_idt)
  803. #ifdef CONFIG_AMD_MEM_ENCRYPT
  804. /* #VC handler */
  805. leal rva(startup32_vc_handler)(%ebp), %eax
  806. movl $X86_TRAP_VC, %edx
  807. call startup32_set_idt_entry
  808. /* Load IDT */
  809. leal rva(boot32_idt)(%ebp), %eax
  810. movl %eax, rva(boot32_idt_desc+2)(%ebp)
  811. lidt rva(boot32_idt_desc)(%ebp)
  812. #endif
  813. RET
  814. SYM_FUNC_END(startup32_load_idt)
  815. /*
  816. * Check for the correct C-bit position when the startup_32 boot-path is used.
  817. *
  818. * The check makes use of the fact that all memory is encrypted when paging is
  819. * disabled. The function creates 64 bits of random data using the RDRAND
  820. * instruction. RDRAND is mandatory for SEV guests, so always available. If the
  821. * hypervisor violates that the kernel will crash right here.
  822. *
  823. * The 64 bits of random data are stored to a memory location and at the same
  824. * time kept in the %eax and %ebx registers. Since encryption is always active
  825. * when paging is off the random data will be stored encrypted in main memory.
  826. *
  827. * Then paging is enabled. When the C-bit position is correct all memory is
  828. * still mapped encrypted and comparing the register values with memory will
  829. * succeed. An incorrect C-bit position will map all memory unencrypted, so that
  830. * the compare will use the encrypted random data and fail.
  831. */
  832. SYM_FUNC_START(startup32_check_sev_cbit)
  833. #ifdef CONFIG_AMD_MEM_ENCRYPT
  834. pushl %eax
  835. pushl %ebx
  836. pushl %ecx
  837. pushl %edx
  838. /* Check for non-zero sev_status */
  839. movl rva(sev_status)(%ebp), %eax
  840. testl %eax, %eax
  841. jz 4f
  842. /*
  843. * Get two 32-bit random values - Don't bail out if RDRAND fails
  844. * because it is better to prevent forward progress if no random value
  845. * can be gathered.
  846. */
  847. 1: rdrand %eax
  848. jnc 1b
  849. 2: rdrand %ebx
  850. jnc 2b
  851. /* Store to memory and keep it in the registers */
  852. movl %eax, rva(sev_check_data)(%ebp)
  853. movl %ebx, rva(sev_check_data+4)(%ebp)
  854. /* Enable paging to see if encryption is active */
  855. movl %cr0, %edx /* Backup %cr0 in %edx */
  856. movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
  857. movl %ecx, %cr0
  858. cmpl %eax, rva(sev_check_data)(%ebp)
  859. jne 3f
  860. cmpl %ebx, rva(sev_check_data+4)(%ebp)
  861. jne 3f
  862. movl %edx, %cr0 /* Restore previous %cr0 */
  863. jmp 4f
  864. 3: /* Check failed - hlt the machine */
  865. hlt
  866. jmp 3b
  867. 4:
  868. popl %edx
  869. popl %ecx
  870. popl %ebx
  871. popl %eax
  872. #endif
  873. RET
  874. SYM_FUNC_END(startup32_check_sev_cbit)
  875. /*
  876. * Stack and heap for uncompression
  877. */
  878. .bss
  879. .balign 4
  880. SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
  881. SYM_DATA_START_LOCAL(boot_stack)
  882. .fill BOOT_STACK_SIZE, 1, 0
  883. .balign 16
  884. SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
  885. /*
  886. * Space for page tables (not in .bss so not zeroed)
  887. */
  888. .section ".pgtable","aw",@nobits
  889. .balign 4096
  890. SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
  891. /*
  892. * The page table is going to be used instead of page table in the trampoline
  893. * memory.
  894. */
  895. SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)