calling.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #include <linux/jump_label.h>
  3. #include <asm/unwind_hints.h>
  4. #include <asm/cpufeatures.h>
  5. #include <asm/page_types.h>
  6. #include <asm/percpu.h>
  7. #include <asm/asm-offsets.h>
  8. #include <asm/processor-flags.h>
  9. #include <asm/ptrace-abi.h>
  10. #include <asm/msr.h>
  11. #include <asm/nospec-branch.h>
  12. /*
  13. x86 function call convention, 64-bit:
  14. -------------------------------------
  15. arguments | callee-saved | extra caller-saved | return
  16. [callee-clobbered] | | [callee-clobbered] |
  17. ---------------------------------------------------------------------------
  18. rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
  19. ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
  20. functions when it sees tail-call optimization possibilities) rflags is
  21. clobbered. Leftover arguments are passed over the stack frame.)
  22. [*] In the frame-pointers case rbp is fixed to the stack frame.
  23. [**] for struct return values wider than 64 bits the return convention is a
  24. bit more complex: up to 128 bits width we return small structures
  25. straight in rax, rdx. For structures larger than that (3 words or
  26. larger) the caller puts a pointer to an on-stack return struct
  27. [allocated in the caller's stack frame] into the first argument - i.e.
  28. into rdi. All other arguments shift up by one in this case.
  29. Fortunately this case is rare in the kernel.
  30. For 32-bit we have the following conventions - kernel is built with
  31. -mregparm=3 and -freg-struct-return:
  32. x86 function calling convention, 32-bit:
  33. ----------------------------------------
  34. arguments | callee-saved | extra caller-saved | return
  35. [callee-clobbered] | | [callee-clobbered] |
  36. -------------------------------------------------------------------------
  37. eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
  38. ( here too esp is obviously invariant across normal function calls. eflags
  39. is clobbered. Leftover arguments are passed over the stack frame. )
  40. [*] In the frame-pointers case ebp is fixed to the stack frame.
  41. [**] We build with -freg-struct-return, which on 32-bit means similar
  42. semantics as on 64-bit: edx can be used for a second return value
  43. (i.e. covering integer and structure sizes up to 64 bits) - after that
  44. it gets more complex and more expensive: 3-word or larger struct returns
  45. get done in the caller's frame and the pointer to the return struct goes
  46. into regparm0, i.e. eax - the other arguments shift up and the
  47. function's register parameters degenerate to regparm=2 in essence.
  48. */
  49. #ifdef CONFIG_X86_64
  50. /*
  51. * 64-bit system call stack frame layout defines and helpers,
  52. * for assembly code:
  53. */
  54. .macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
  55. .if \save_ret
  56. pushq %rsi /* pt_regs->si */
  57. movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
  58. movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
  59. .else
  60. pushq %rdi /* pt_regs->di */
  61. pushq %rsi /* pt_regs->si */
  62. .endif
  63. pushq \rdx /* pt_regs->dx */
  64. pushq \rcx /* pt_regs->cx */
  65. pushq \rax /* pt_regs->ax */
  66. pushq %r8 /* pt_regs->r8 */
  67. pushq %r9 /* pt_regs->r9 */
  68. pushq %r10 /* pt_regs->r10 */
  69. pushq %r11 /* pt_regs->r11 */
  70. pushq %rbx /* pt_regs->rbx */
  71. pushq %rbp /* pt_regs->rbp */
  72. pushq %r12 /* pt_regs->r12 */
  73. pushq %r13 /* pt_regs->r13 */
  74. pushq %r14 /* pt_regs->r14 */
  75. pushq %r15 /* pt_regs->r15 */
  76. UNWIND_HINT_REGS
  77. .if \save_ret
  78. pushq %rsi /* return address on top of stack */
  79. .endif
  80. .endm
  81. .macro CLEAR_REGS
  82. /*
  83. * Sanitize registers of values that a speculation attack might
  84. * otherwise want to exploit. The lower registers are likely clobbered
  85. * well before they could be put to use in a speculative execution
  86. * gadget.
  87. */
  88. xorl %esi, %esi /* nospec si */
  89. xorl %edx, %edx /* nospec dx */
  90. xorl %ecx, %ecx /* nospec cx */
  91. xorl %r8d, %r8d /* nospec r8 */
  92. xorl %r9d, %r9d /* nospec r9 */
  93. xorl %r10d, %r10d /* nospec r10 */
  94. xorl %r11d, %r11d /* nospec r11 */
  95. xorl %ebx, %ebx /* nospec rbx */
  96. xorl %ebp, %ebp /* nospec rbp */
  97. xorl %r12d, %r12d /* nospec r12 */
  98. xorl %r13d, %r13d /* nospec r13 */
  99. xorl %r14d, %r14d /* nospec r14 */
  100. xorl %r15d, %r15d /* nospec r15 */
  101. .endm
  102. .macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
  103. PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
  104. CLEAR_REGS
  105. .endm
  106. .macro POP_REGS pop_rdi=1
  107. popq %r15
  108. popq %r14
  109. popq %r13
  110. popq %r12
  111. popq %rbp
  112. popq %rbx
  113. popq %r11
  114. popq %r10
  115. popq %r9
  116. popq %r8
  117. popq %rax
  118. popq %rcx
  119. popq %rdx
  120. popq %rsi
  121. .if \pop_rdi
  122. popq %rdi
  123. .endif
  124. .endm
  125. #ifdef CONFIG_PAGE_TABLE_ISOLATION
  126. /*
  127. * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
  128. * halves:
  129. */
  130. #define PTI_USER_PGTABLE_BIT PAGE_SHIFT
  131. #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
  132. #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
  133. #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
  134. #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
  135. .macro SET_NOFLUSH_BIT reg:req
  136. bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
  137. .endm
  138. .macro ADJUST_KERNEL_CR3 reg:req
  139. ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
  140. /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
  141. andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
  142. .endm
  143. .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
  144. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  145. mov %cr3, \scratch_reg
  146. ADJUST_KERNEL_CR3 \scratch_reg
  147. mov \scratch_reg, %cr3
  148. .Lend_\@:
  149. .endm
  150. #define THIS_CPU_user_pcid_flush_mask \
  151. PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
  152. .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
  153. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  154. mov %cr3, \scratch_reg
  155. ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
  156. /*
  157. * Test if the ASID needs a flush.
  158. */
  159. movq \scratch_reg, \scratch_reg2
  160. andq $(0x7FF), \scratch_reg /* mask ASID */
  161. bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
  162. jnc .Lnoflush_\@
  163. /* Flush needed, clear the bit */
  164. btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
  165. movq \scratch_reg2, \scratch_reg
  166. jmp .Lwrcr3_pcid_\@
  167. .Lnoflush_\@:
  168. movq \scratch_reg2, \scratch_reg
  169. SET_NOFLUSH_BIT \scratch_reg
  170. .Lwrcr3_pcid_\@:
  171. /* Flip the ASID to the user version */
  172. orq $(PTI_USER_PCID_MASK), \scratch_reg
  173. .Lwrcr3_\@:
  174. /* Flip the PGD to the user version */
  175. orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
  176. mov \scratch_reg, %cr3
  177. .Lend_\@:
  178. .endm
  179. .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
  180. pushq %rax
  181. SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
  182. popq %rax
  183. .endm
  184. .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
  185. ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
  186. movq %cr3, \scratch_reg
  187. movq \scratch_reg, \save_reg
  188. /*
  189. * Test the user pagetable bit. If set, then the user page tables
  190. * are active. If clear CR3 already has the kernel page table
  191. * active.
  192. */
  193. bt $PTI_USER_PGTABLE_BIT, \scratch_reg
  194. jnc .Ldone_\@
  195. ADJUST_KERNEL_CR3 \scratch_reg
  196. movq \scratch_reg, %cr3
  197. .Ldone_\@:
  198. .endm
  199. .macro RESTORE_CR3 scratch_reg:req save_reg:req
  200. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
  201. ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
  202. /*
  203. * KERNEL pages can always resume with NOFLUSH as we do
  204. * explicit flushes.
  205. */
  206. bt $PTI_USER_PGTABLE_BIT, \save_reg
  207. jnc .Lnoflush_\@
  208. /*
  209. * Check if there's a pending flush for the user ASID we're
  210. * about to set.
  211. */
  212. movq \save_reg, \scratch_reg
  213. andq $(0x7FF), \scratch_reg
  214. bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
  215. jnc .Lnoflush_\@
  216. btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
  217. jmp .Lwrcr3_\@
  218. .Lnoflush_\@:
  219. SET_NOFLUSH_BIT \save_reg
  220. .Lwrcr3_\@:
  221. /*
  222. * The CR3 write could be avoided when not changing its value,
  223. * but would require a CR3 read *and* a scratch register.
  224. */
  225. movq \save_reg, %cr3
  226. .Lend_\@:
  227. .endm
  228. #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
  229. .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
  230. .endm
  231. .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
  232. .endm
  233. .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
  234. .endm
  235. .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
  236. .endm
  237. .macro RESTORE_CR3 scratch_reg:req save_reg:req
  238. .endm
  239. #endif
  240. /*
  241. * IBRS kernel mitigation for Spectre_v2.
  242. *
  243. * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
  244. * the regs it uses (AX, CX, DX). Must be called before the first RET
  245. * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
  246. *
  247. * The optional argument is used to save/restore the current value,
  248. * which is used on the paranoid paths.
  249. *
  250. * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
  251. */
  252. .macro IBRS_ENTER save_reg
  253. #ifdef CONFIG_CPU_IBRS_ENTRY
  254. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
  255. movl $MSR_IA32_SPEC_CTRL, %ecx
  256. .ifnb \save_reg
  257. rdmsr
  258. shl $32, %rdx
  259. or %rdx, %rax
  260. mov %rax, \save_reg
  261. test $SPEC_CTRL_IBRS, %eax
  262. jz .Ldo_wrmsr_\@
  263. lfence
  264. jmp .Lend_\@
  265. .Ldo_wrmsr_\@:
  266. .endif
  267. movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
  268. movl %edx, %eax
  269. shr $32, %rdx
  270. wrmsr
  271. .Lend_\@:
  272. #endif
  273. .endm
  274. /*
  275. * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
  276. * regs. Must be called after the last RET.
  277. */
  278. .macro IBRS_EXIT save_reg
  279. #ifdef CONFIG_CPU_IBRS_ENTRY
  280. ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
  281. movl $MSR_IA32_SPEC_CTRL, %ecx
  282. .ifnb \save_reg
  283. mov \save_reg, %rdx
  284. .else
  285. movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
  286. andl $(~SPEC_CTRL_IBRS), %edx
  287. .endif
  288. movl %edx, %eax
  289. shr $32, %rdx
  290. wrmsr
  291. .Lend_\@:
  292. #endif
  293. .endm
  294. /*
  295. * Mitigate Spectre v1 for conditional swapgs code paths.
  296. *
  297. * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
  298. * prevent a speculative swapgs when coming from kernel space.
  299. *
  300. * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
  301. * to prevent the swapgs from getting speculatively skipped when coming from
  302. * user space.
  303. */
  304. .macro FENCE_SWAPGS_USER_ENTRY
  305. ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
  306. .endm
  307. .macro FENCE_SWAPGS_KERNEL_ENTRY
  308. ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
  309. .endm
  310. .macro STACKLEAK_ERASE_NOCLOBBER
  311. #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
  312. PUSH_AND_CLEAR_REGS
  313. call stackleak_erase
  314. POP_REGS
  315. #endif
  316. .endm
  317. .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
  318. rdgsbase \save_reg
  319. GET_PERCPU_BASE \scratch_reg
  320. wrgsbase \scratch_reg
  321. .endm
  322. #else /* CONFIG_X86_64 */
  323. # undef UNWIND_HINT_IRET_REGS
  324. # define UNWIND_HINT_IRET_REGS
  325. #endif /* !CONFIG_X86_64 */
  326. .macro STACKLEAK_ERASE
  327. #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
  328. call stackleak_erase
  329. #endif
  330. .endm
  331. #ifdef CONFIG_SMP
  332. /*
  333. * CPU/node NR is loaded from the limit (size) field of a special segment
  334. * descriptor entry in GDT.
  335. */
  336. .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
  337. movq $__CPUNODE_SEG, \reg
  338. lsl \reg, \reg
  339. .endm
  340. /*
  341. * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
  342. * We normally use %gs for accessing per-CPU data, but we are setting up
  343. * %gs here and obviously can not use %gs itself to access per-CPU data.
  344. *
  345. * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
  346. * may not restore the host's value until the CPU returns to userspace.
  347. * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
  348. * while running KVM's run loop.
  349. */
  350. .macro GET_PERCPU_BASE reg:req
  351. LOAD_CPU_AND_NODE_SEG_LIMIT \reg
  352. andq $VDSO_CPUNODE_MASK, \reg
  353. movq __per_cpu_offset(, \reg, 8), \reg
  354. .endm
  355. #else
  356. .macro GET_PERCPU_BASE reg:req
  357. movq pcpu_unit_offsets(%rip), \reg
  358. .endm
  359. #endif /* CONFIG_SMP */