tlbflush.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_X86_TLBFLUSH_H
  3. #define _ASM_X86_TLBFLUSH_H
  4. #include <linux/mm.h>
  5. #include <linux/sched.h>
  6. #include <asm/processor.h>
  7. #include <asm/cpufeature.h>
  8. #include <asm/special_insns.h>
  9. #include <asm/smp.h>
  10. #include <asm/invpcid.h>
  11. #include <asm/pti.h>
  12. #include <asm/processor-flags.h>
  13. void __flush_tlb_all(void);
  14. #define TLB_FLUSH_ALL -1UL
  15. #define TLB_GENERATION_INVALID 0
  16. void cr4_update_irqsoff(unsigned long set, unsigned long clear);
  17. unsigned long cr4_read_shadow(void);
  18. /* Set in this cpu's CR4. */
  19. static inline void cr4_set_bits_irqsoff(unsigned long mask)
  20. {
  21. cr4_update_irqsoff(mask, 0);
  22. }
  23. /* Clear in this cpu's CR4. */
  24. static inline void cr4_clear_bits_irqsoff(unsigned long mask)
  25. {
  26. cr4_update_irqsoff(0, mask);
  27. }
  28. /* Set in this cpu's CR4. */
  29. static inline void cr4_set_bits(unsigned long mask)
  30. {
  31. unsigned long flags;
  32. local_irq_save(flags);
  33. cr4_set_bits_irqsoff(mask);
  34. local_irq_restore(flags);
  35. }
  36. /* Clear in this cpu's CR4. */
  37. static inline void cr4_clear_bits(unsigned long mask)
  38. {
  39. unsigned long flags;
  40. local_irq_save(flags);
  41. cr4_clear_bits_irqsoff(mask);
  42. local_irq_restore(flags);
  43. }
  44. #ifndef MODULE
  45. /*
  46. * 6 because 6 should be plenty and struct tlb_state will fit in two cache
  47. * lines.
  48. */
  49. #define TLB_NR_DYN_ASIDS 6
  50. struct tlb_context {
  51. u64 ctx_id;
  52. u64 tlb_gen;
  53. };
  54. struct tlb_state {
  55. /*
  56. * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
  57. * are on. This means that it may not match current->active_mm,
  58. * which will contain the previous user mm when we're in lazy TLB
  59. * mode even if we've already switched back to swapper_pg_dir.
  60. *
  61. * During switch_mm_irqs_off(), loaded_mm will be set to
  62. * LOADED_MM_SWITCHING during the brief interrupts-off window
  63. * when CR3 and loaded_mm would otherwise be inconsistent. This
  64. * is for nmi_uaccess_okay()'s benefit.
  65. */
  66. struct mm_struct *loaded_mm;
  67. #define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
  68. /* Last user mm for optimizing IBPB */
  69. union {
  70. struct mm_struct *last_user_mm;
  71. unsigned long last_user_mm_spec;
  72. };
  73. u16 loaded_mm_asid;
  74. u16 next_asid;
  75. /*
  76. * If set we changed the page tables in such a way that we
  77. * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
  78. * This tells us to go invalidate all the non-loaded ctxs[]
  79. * on the next context switch.
  80. *
  81. * The current ctx was kept up-to-date as it ran and does not
  82. * need to be invalidated.
  83. */
  84. bool invalidate_other;
  85. /*
  86. * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
  87. * the corresponding user PCID needs a flush next time we
  88. * switch to it; see SWITCH_TO_USER_CR3.
  89. */
  90. unsigned short user_pcid_flush_mask;
  91. /*
  92. * Access to this CR4 shadow and to H/W CR4 is protected by
  93. * disabling interrupts when modifying either one.
  94. */
  95. unsigned long cr4;
  96. /*
  97. * This is a list of all contexts that might exist in the TLB.
  98. * There is one per ASID that we use, and the ASID (what the
  99. * CPU calls PCID) is the index into ctxts.
  100. *
  101. * For each context, ctx_id indicates which mm the TLB's user
  102. * entries came from. As an invariant, the TLB will never
  103. * contain entries that are out-of-date as when that mm reached
  104. * the tlb_gen in the list.
  105. *
  106. * To be clear, this means that it's legal for the TLB code to
  107. * flush the TLB without updating tlb_gen. This can happen
  108. * (for now, at least) due to paravirt remote flushes.
  109. *
  110. * NB: context 0 is a bit special, since it's also used by
  111. * various bits of init code. This is fine -- code that
  112. * isn't aware of PCID will end up harmlessly flushing
  113. * context 0.
  114. */
  115. struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
  116. };
  117. DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
  118. struct tlb_state_shared {
  119. /*
  120. * We can be in one of several states:
  121. *
  122. * - Actively using an mm. Our CPU's bit will be set in
  123. * mm_cpumask(loaded_mm) and is_lazy == false;
  124. *
  125. * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
  126. * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
  127. *
  128. * - Lazily using a real mm. loaded_mm != &init_mm, our bit
  129. * is set in mm_cpumask(loaded_mm), but is_lazy == true.
  130. * We're heuristically guessing that the CR3 load we
  131. * skipped more than makes up for the overhead added by
  132. * lazy mode.
  133. */
  134. bool is_lazy;
  135. };
  136. DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
  137. bool nmi_uaccess_okay(void);
  138. #define nmi_uaccess_okay nmi_uaccess_okay
  139. /* Initialize cr4 shadow for this CPU. */
  140. static inline void cr4_init_shadow(void)
  141. {
  142. this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
  143. }
  144. extern unsigned long mmu_cr4_features;
  145. extern u32 *trampoline_cr4_features;
  146. extern void initialize_tlbstate_and_flush(void);
  147. /*
  148. * TLB flushing:
  149. *
  150. * - flush_tlb_all() flushes all processes TLBs
  151. * - flush_tlb_mm(mm) flushes the specified mm context TLB's
  152. * - flush_tlb_page(vma, vmaddr) flushes one page
  153. * - flush_tlb_range(vma, start, end) flushes a range of pages
  154. * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
  155. * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
  156. *
  157. * ..but the i386 has somewhat limited tlb flushing capabilities,
  158. * and page-granular flushes are available only on i486 and up.
  159. */
  160. struct flush_tlb_info {
  161. /*
  162. * We support several kinds of flushes.
  163. *
  164. * - Fully flush a single mm. .mm will be set, .end will be
  165. * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
  166. * which the IPI sender is trying to catch us up.
  167. *
  168. * - Partially flush a single mm. .mm will be set, .start and
  169. * .end will indicate the range, and .new_tlb_gen will be set
  170. * such that the changes between generation .new_tlb_gen-1 and
  171. * .new_tlb_gen are entirely contained in the indicated range.
  172. *
  173. * - Fully flush all mms whose tlb_gens have been updated. .mm
  174. * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
  175. * will be zero.
  176. */
  177. struct mm_struct *mm;
  178. unsigned long start;
  179. unsigned long end;
  180. u64 new_tlb_gen;
  181. unsigned int initiating_cpu;
  182. u8 stride_shift;
  183. u8 freed_tables;
  184. };
  185. void flush_tlb_local(void);
  186. void flush_tlb_one_user(unsigned long addr);
  187. void flush_tlb_one_kernel(unsigned long addr);
  188. void flush_tlb_multi(const struct cpumask *cpumask,
  189. const struct flush_tlb_info *info);
  190. #ifdef CONFIG_PARAVIRT
  191. #include <asm/paravirt.h>
  192. #endif
  193. #define flush_tlb_mm(mm) \
  194. flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
  195. #define flush_tlb_range(vma, start, end) \
  196. flush_tlb_mm_range((vma)->vm_mm, start, end, \
  197. ((vma)->vm_flags & VM_HUGETLB) \
  198. ? huge_page_shift(hstate_vma(vma)) \
  199. : PAGE_SHIFT, false)
  200. extern void flush_tlb_all(void);
  201. extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  202. unsigned long end, unsigned int stride_shift,
  203. bool freed_tables);
  204. extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  205. static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
  206. {
  207. flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
  208. }
  209. static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
  210. {
  211. /*
  212. * Bump the generation count. This also serves as a full barrier
  213. * that synchronizes with switch_mm(): callers are required to order
  214. * their read of mm_cpumask after their writes to the paging
  215. * structures.
  216. */
  217. return atomic64_inc_return(&mm->context.tlb_gen);
  218. }
  219. static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
  220. struct mm_struct *mm)
  221. {
  222. inc_mm_tlb_gen(mm);
  223. cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
  224. }
  225. extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
  226. static inline bool pte_flags_need_flush(unsigned long oldflags,
  227. unsigned long newflags,
  228. bool ignore_access)
  229. {
  230. /*
  231. * Flags that require a flush when cleared but not when they are set.
  232. * Only include flags that would not trigger spurious page-faults.
  233. * Non-present entries are not cached. Hardware would set the
  234. * dirty/access bit if needed without a fault.
  235. */
  236. const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
  237. _PAGE_ACCESSED;
  238. const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
  239. _PAGE_SOFTW3 | _PAGE_SOFTW4;
  240. const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
  241. _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
  242. _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
  243. _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
  244. unsigned long diff = oldflags ^ newflags;
  245. BUILD_BUG_ON(flush_on_clear & software_flags);
  246. BUILD_BUG_ON(flush_on_clear & flush_on_change);
  247. BUILD_BUG_ON(flush_on_change & software_flags);
  248. /* Ignore software flags */
  249. diff &= ~software_flags;
  250. if (ignore_access)
  251. diff &= ~_PAGE_ACCESSED;
  252. /*
  253. * Did any of the 'flush_on_clear' flags was clleared set from between
  254. * 'oldflags' and 'newflags'?
  255. */
  256. if (diff & oldflags & flush_on_clear)
  257. return true;
  258. /* Flush on modified flags. */
  259. if (diff & flush_on_change)
  260. return true;
  261. /* Ensure there are no flags that were left behind */
  262. if (IS_ENABLED(CONFIG_DEBUG_VM) &&
  263. (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
  264. VM_WARN_ON_ONCE(1);
  265. return true;
  266. }
  267. return false;
  268. }
  269. /*
  270. * pte_needs_flush() checks whether permissions were demoted and require a
  271. * flush. It should only be used for userspace PTEs.
  272. */
  273. static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
  274. {
  275. /* !PRESENT -> * ; no need for flush */
  276. if (!(pte_flags(oldpte) & _PAGE_PRESENT))
  277. return false;
  278. /* PFN changed ; needs flush */
  279. if (pte_pfn(oldpte) != pte_pfn(newpte))
  280. return true;
  281. /*
  282. * check PTE flags; ignore access-bit; see comment in
  283. * ptep_clear_flush_young().
  284. */
  285. return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
  286. true);
  287. }
  288. #define pte_needs_flush pte_needs_flush
  289. /*
  290. * huge_pmd_needs_flush() checks whether permissions were demoted and require a
  291. * flush. It should only be used for userspace huge PMDs.
  292. */
  293. static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
  294. {
  295. /* !PRESENT -> * ; no need for flush */
  296. if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
  297. return false;
  298. /* PFN changed ; needs flush */
  299. if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
  300. return true;
  301. /*
  302. * check PMD flags; do not ignore access-bit; see
  303. * pmdp_clear_flush_young().
  304. */
  305. return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
  306. false);
  307. }
  308. #define huge_pmd_needs_flush huge_pmd_needs_flush
  309. #endif /* !MODULE */
  310. static inline void __native_tlb_flush_global(unsigned long cr4)
  311. {
  312. native_write_cr4(cr4 ^ X86_CR4_PGE);
  313. native_write_cr4(cr4);
  314. }
  315. #endif /* _ASM_X86_TLBFLUSH_H */