paravirt.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Paravirtualization interfaces
  3. Copyright (C) 2006 Rusty Russell IBM Corporation
  4. 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
  5. */
  6. #include <linux/errno.h>
  7. #include <linux/init.h>
  8. #include <linux/export.h>
  9. #include <linux/efi.h>
  10. #include <linux/bcd.h>
  11. #include <linux/highmem.h>
  12. #include <linux/kprobes.h>
  13. #include <linux/pgtable.h>
  14. #include <linux/static_call.h>
  15. #include <asm/bug.h>
  16. #include <asm/paravirt.h>
  17. #include <asm/debugreg.h>
  18. #include <asm/desc.h>
  19. #include <asm/setup.h>
  20. #include <asm/time.h>
  21. #include <asm/pgalloc.h>
  22. #include <asm/irq.h>
  23. #include <asm/delay.h>
  24. #include <asm/fixmap.h>
  25. #include <asm/apic.h>
  26. #include <asm/tlbflush.h>
  27. #include <asm/timer.h>
  28. #include <asm/special_insns.h>
  29. #include <asm/tlb.h>
  30. #include <asm/io_bitmap.h>
  31. /*
  32. * nop stub, which must not clobber anything *including the stack* to
  33. * avoid confusing the entry prologues.
  34. */
  35. extern void _paravirt_nop(void);
  36. asm (".pushsection .entry.text, \"ax\"\n"
  37. ".global _paravirt_nop\n"
  38. "_paravirt_nop:\n\t"
  39. ASM_ENDBR
  40. ASM_RET
  41. ".size _paravirt_nop, . - _paravirt_nop\n\t"
  42. ".type _paravirt_nop, @function\n\t"
  43. ".popsection");
  44. /* stub always returning 0. */
  45. asm (".pushsection .entry.text, \"ax\"\n"
  46. ".global paravirt_ret0\n"
  47. "paravirt_ret0:\n\t"
  48. ASM_ENDBR
  49. "xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
  50. ASM_RET
  51. ".size paravirt_ret0, . - paravirt_ret0\n\t"
  52. ".type paravirt_ret0, @function\n\t"
  53. ".popsection");
  54. void __init default_banner(void)
  55. {
  56. printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
  57. pv_info.name);
  58. }
  59. /* Undefined instruction for dealing with missing ops pointers. */
  60. noinstr void paravirt_BUG(void)
  61. {
  62. BUG();
  63. }
  64. static unsigned paravirt_patch_call(void *insn_buff, const void *target,
  65. unsigned long addr, unsigned len)
  66. {
  67. __text_gen_insn(insn_buff, CALL_INSN_OPCODE,
  68. (void *)addr, target, CALL_INSN_SIZE);
  69. return CALL_INSN_SIZE;
  70. }
  71. #ifdef CONFIG_PARAVIRT_XXL
  72. /* identity function, which can be inlined */
  73. u64 notrace _paravirt_ident_64(u64 x)
  74. {
  75. return x;
  76. }
  77. #endif
  78. DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
  79. void __init native_pv_lock_init(void)
  80. {
  81. if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
  82. static_branch_disable(&virt_spin_lock_key);
  83. }
  84. unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
  85. unsigned int len)
  86. {
  87. /*
  88. * Neat trick to map patch type back to the call within the
  89. * corresponding structure.
  90. */
  91. void *opfunc = *((void **)&pv_ops + type);
  92. unsigned ret;
  93. if (opfunc == NULL)
  94. /* If there's no function, patch it with paravirt_BUG() */
  95. ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len);
  96. else if (opfunc == _paravirt_nop)
  97. ret = 0;
  98. else
  99. /* Otherwise call the function. */
  100. ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
  101. return ret;
  102. }
  103. struct static_key paravirt_steal_enabled;
  104. struct static_key paravirt_steal_rq_enabled;
  105. static u64 native_steal_clock(int cpu)
  106. {
  107. return 0;
  108. }
  109. DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
  110. DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock);
  111. void paravirt_set_sched_clock(u64 (*func)(void))
  112. {
  113. static_call_update(pv_sched_clock, func);
  114. }
  115. /* These are in entry.S */
  116. static struct resource reserve_ioports = {
  117. .start = 0,
  118. .end = IO_SPACE_LIMIT,
  119. .name = "paravirt-ioport",
  120. .flags = IORESOURCE_IO | IORESOURCE_BUSY,
  121. };
  122. /*
  123. * Reserve the whole legacy IO space to prevent any legacy drivers
  124. * from wasting time probing for their hardware. This is a fairly
  125. * brute-force approach to disabling all non-virtual drivers.
  126. *
  127. * Note that this must be called very early to have any effect.
  128. */
  129. int paravirt_disable_iospace(void)
  130. {
  131. return request_resource(&ioport_resource, &reserve_ioports);
  132. }
  133. static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
  134. static inline void enter_lazy(enum paravirt_lazy_mode mode)
  135. {
  136. BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
  137. this_cpu_write(paravirt_lazy_mode, mode);
  138. }
  139. static void leave_lazy(enum paravirt_lazy_mode mode)
  140. {
  141. BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
  142. this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
  143. }
  144. void paravirt_enter_lazy_mmu(void)
  145. {
  146. enter_lazy(PARAVIRT_LAZY_MMU);
  147. }
  148. void paravirt_leave_lazy_mmu(void)
  149. {
  150. leave_lazy(PARAVIRT_LAZY_MMU);
  151. }
  152. void paravirt_flush_lazy_mmu(void)
  153. {
  154. preempt_disable();
  155. if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
  156. arch_leave_lazy_mmu_mode();
  157. arch_enter_lazy_mmu_mode();
  158. }
  159. preempt_enable();
  160. }
  161. #ifdef CONFIG_PARAVIRT_XXL
  162. void paravirt_start_context_switch(struct task_struct *prev)
  163. {
  164. BUG_ON(preemptible());
  165. if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
  166. arch_leave_lazy_mmu_mode();
  167. set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
  168. }
  169. enter_lazy(PARAVIRT_LAZY_CPU);
  170. }
  171. void paravirt_end_context_switch(struct task_struct *next)
  172. {
  173. BUG_ON(preemptible());
  174. leave_lazy(PARAVIRT_LAZY_CPU);
  175. if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
  176. arch_enter_lazy_mmu_mode();
  177. }
  178. static noinstr unsigned long pv_native_read_cr2(void)
  179. {
  180. return native_read_cr2();
  181. }
  182. static noinstr void pv_native_write_cr2(unsigned long val)
  183. {
  184. native_write_cr2(val);
  185. }
  186. static noinstr unsigned long pv_native_get_debugreg(int regno)
  187. {
  188. return native_get_debugreg(regno);
  189. }
  190. static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
  191. {
  192. native_set_debugreg(regno, val);
  193. }
  194. static noinstr void pv_native_irq_enable(void)
  195. {
  196. native_irq_enable();
  197. }
  198. static noinstr void pv_native_irq_disable(void)
  199. {
  200. native_irq_disable();
  201. }
  202. #endif
  203. enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
  204. {
  205. if (in_interrupt())
  206. return PARAVIRT_LAZY_NONE;
  207. return this_cpu_read(paravirt_lazy_mode);
  208. }
  209. struct pv_info pv_info = {
  210. .name = "bare hardware",
  211. #ifdef CONFIG_PARAVIRT_XXL
  212. .extra_user_64bit_cs = __USER_CS,
  213. #endif
  214. };
  215. /* 64-bit pagetable entries */
  216. #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
  217. struct paravirt_patch_template pv_ops = {
  218. /* Cpu ops. */
  219. .cpu.io_delay = native_io_delay,
  220. #ifdef CONFIG_PARAVIRT_XXL
  221. .cpu.cpuid = native_cpuid,
  222. .cpu.get_debugreg = pv_native_get_debugreg,
  223. .cpu.set_debugreg = pv_native_set_debugreg,
  224. .cpu.read_cr0 = native_read_cr0,
  225. .cpu.write_cr0 = native_write_cr0,
  226. .cpu.write_cr4 = native_write_cr4,
  227. .cpu.wbinvd = native_wbinvd,
  228. .cpu.read_msr = native_read_msr,
  229. .cpu.write_msr = native_write_msr,
  230. .cpu.read_msr_safe = native_read_msr_safe,
  231. .cpu.write_msr_safe = native_write_msr_safe,
  232. .cpu.read_pmc = native_read_pmc,
  233. .cpu.load_tr_desc = native_load_tr_desc,
  234. .cpu.set_ldt = native_set_ldt,
  235. .cpu.load_gdt = native_load_gdt,
  236. .cpu.load_idt = native_load_idt,
  237. .cpu.store_tr = native_store_tr,
  238. .cpu.load_tls = native_load_tls,
  239. .cpu.load_gs_index = native_load_gs_index,
  240. .cpu.write_ldt_entry = native_write_ldt_entry,
  241. .cpu.write_gdt_entry = native_write_gdt_entry,
  242. .cpu.write_idt_entry = native_write_idt_entry,
  243. .cpu.alloc_ldt = paravirt_nop,
  244. .cpu.free_ldt = paravirt_nop,
  245. .cpu.load_sp0 = native_load_sp0,
  246. #ifdef CONFIG_X86_IOPL_IOPERM
  247. .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
  248. .cpu.update_io_bitmap = native_tss_update_io_bitmap,
  249. #endif
  250. .cpu.start_context_switch = paravirt_nop,
  251. .cpu.end_context_switch = paravirt_nop,
  252. /* Irq ops. */
  253. .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
  254. .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
  255. .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
  256. .irq.safe_halt = native_safe_halt,
  257. .irq.halt = native_halt,
  258. #endif /* CONFIG_PARAVIRT_XXL */
  259. /* Mmu ops. */
  260. .mmu.flush_tlb_user = native_flush_tlb_local,
  261. .mmu.flush_tlb_kernel = native_flush_tlb_global,
  262. .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
  263. .mmu.flush_tlb_multi = native_flush_tlb_multi,
  264. .mmu.tlb_remove_table =
  265. (void (*)(struct mmu_gather *, void *))tlb_remove_page,
  266. .mmu.exit_mmap = paravirt_nop,
  267. .mmu.notify_page_enc_status_changed = paravirt_nop,
  268. #ifdef CONFIG_PARAVIRT_XXL
  269. .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
  270. .mmu.write_cr2 = pv_native_write_cr2,
  271. .mmu.read_cr3 = __native_read_cr3,
  272. .mmu.write_cr3 = native_write_cr3,
  273. .mmu.pgd_alloc = __paravirt_pgd_alloc,
  274. .mmu.pgd_free = paravirt_nop,
  275. .mmu.alloc_pte = paravirt_nop,
  276. .mmu.alloc_pmd = paravirt_nop,
  277. .mmu.alloc_pud = paravirt_nop,
  278. .mmu.alloc_p4d = paravirt_nop,
  279. .mmu.release_pte = paravirt_nop,
  280. .mmu.release_pmd = paravirt_nop,
  281. .mmu.release_pud = paravirt_nop,
  282. .mmu.release_p4d = paravirt_nop,
  283. .mmu.set_pte = native_set_pte,
  284. .mmu.set_pmd = native_set_pmd,
  285. .mmu.ptep_modify_prot_start = __ptep_modify_prot_start,
  286. .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit,
  287. .mmu.set_pud = native_set_pud,
  288. .mmu.pmd_val = PTE_IDENT,
  289. .mmu.make_pmd = PTE_IDENT,
  290. .mmu.pud_val = PTE_IDENT,
  291. .mmu.make_pud = PTE_IDENT,
  292. .mmu.set_p4d = native_set_p4d,
  293. #if CONFIG_PGTABLE_LEVELS >= 5
  294. .mmu.p4d_val = PTE_IDENT,
  295. .mmu.make_p4d = PTE_IDENT,
  296. .mmu.set_pgd = native_set_pgd,
  297. #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
  298. .mmu.pte_val = PTE_IDENT,
  299. .mmu.pgd_val = PTE_IDENT,
  300. .mmu.make_pte = PTE_IDENT,
  301. .mmu.make_pgd = PTE_IDENT,
  302. .mmu.dup_mmap = paravirt_nop,
  303. .mmu.activate_mm = paravirt_nop,
  304. .mmu.lazy_mode = {
  305. .enter = paravirt_nop,
  306. .leave = paravirt_nop,
  307. .flush = paravirt_nop,
  308. },
  309. .mmu.set_fixmap = native_set_fixmap,
  310. #endif /* CONFIG_PARAVIRT_XXL */
  311. #if defined(CONFIG_PARAVIRT_SPINLOCKS)
  312. /* Lock ops. */
  313. #ifdef CONFIG_SMP
  314. .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
  315. .lock.queued_spin_unlock =
  316. PV_CALLEE_SAVE(__native_queued_spin_unlock),
  317. .lock.wait = paravirt_nop,
  318. .lock.kick = paravirt_nop,
  319. .lock.vcpu_is_preempted =
  320. PV_CALLEE_SAVE(__native_vcpu_is_preempted),
  321. #endif /* SMP */
  322. #endif
  323. };
  324. #ifdef CONFIG_PARAVIRT_XXL
  325. NOKPROBE_SYMBOL(native_load_idt);
  326. #endif
  327. EXPORT_SYMBOL(pv_ops);
  328. EXPORT_SYMBOL_GPL(pv_info);