tlbflush.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Based on arch/arm/include/asm/tlbflush.h
  4. *
  5. * Copyright (C) 1999-2003 Russell King
  6. * Copyright (C) 2012 ARM Ltd.
  7. */
  8. #ifndef __ASM_TLBFLUSH_H
  9. #define __ASM_TLBFLUSH_H
  10. #ifndef __ASSEMBLY__
  11. #include <linux/bitfield.h>
  12. #include <linux/mm_types.h>
  13. #include <linux/sched.h>
  14. #include <asm/cputype.h>
  15. #include <asm/mmu.h>
  16. /*
  17. * Raw TLBI operations.
  18. *
  19. * Where necessary, use the __tlbi() macro to avoid asm()
  20. * boilerplate. Drivers and most kernel code should use the TLB
  21. * management routines in preference to the macro below.
  22. *
  23. * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
  24. * on whether a particular TLBI operation takes an argument or
  25. * not. The macros handles invoking the asm with or without the
  26. * register argument as appropriate.
  27. */
  28. #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
  29. "tlbi " #op "\n" \
  30. ALTERNATIVE("nop\n nop", \
  31. "dsb ish\n tlbi " #op, \
  32. ARM64_WORKAROUND_REPEAT_TLBI, \
  33. CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
  34. : : )
  35. #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
  36. "tlbi " #op ", %0\n" \
  37. ALTERNATIVE("nop\n nop", \
  38. "dsb ish\n tlbi " #op ", %0", \
  39. ARM64_WORKAROUND_REPEAT_TLBI, \
  40. CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
  41. : : "r" (arg))
  42. #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
  43. #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
  44. #define __tlbi_user(op, arg) do { \
  45. if (arm64_kernel_unmapped_at_el0()) \
  46. __tlbi(op, (arg) | USER_ASID_FLAG); \
  47. } while (0)
  48. /* This macro creates a properly formatted VA operand for the TLBI */
  49. #define __TLBI_VADDR(addr, asid) \
  50. ({ \
  51. unsigned long __ta = (addr) >> 12; \
  52. __ta &= GENMASK_ULL(43, 0); \
  53. __ta |= (unsigned long)(asid) << 48; \
  54. __ta; \
  55. })
  56. /*
  57. * Get translation granule of the system, which is decided by
  58. * PAGE_SIZE. Used by TTL.
  59. * - 4KB : 1
  60. * - 16KB : 2
  61. * - 64KB : 3
  62. */
  63. #define TLBI_TTL_TG_4K 1
  64. #define TLBI_TTL_TG_16K 2
  65. #define TLBI_TTL_TG_64K 3
  66. static inline unsigned long get_trans_granule(void)
  67. {
  68. switch (PAGE_SIZE) {
  69. case SZ_4K:
  70. return TLBI_TTL_TG_4K;
  71. case SZ_16K:
  72. return TLBI_TTL_TG_16K;
  73. case SZ_64K:
  74. return TLBI_TTL_TG_64K;
  75. default:
  76. return 0;
  77. }
  78. }
  79. /*
  80. * Level-based TLBI operations.
  81. *
  82. * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
  83. * the level at which the invalidation must take place. If the level is
  84. * wrong, no invalidation may take place. In the case where the level
  85. * cannot be easily determined, a 0 value for the level parameter will
  86. * perform a non-hinted invalidation.
  87. *
  88. * For Stage-2 invalidation, use the level values provided to that effect
  89. * in asm/stage2_pgtable.h.
  90. */
  91. #define TLBI_TTL_MASK GENMASK_ULL(47, 44)
  92. #define __tlbi_level(op, addr, level) do { \
  93. u64 arg = addr; \
  94. \
  95. if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
  96. level) { \
  97. u64 ttl = level & 3; \
  98. ttl |= get_trans_granule() << 2; \
  99. arg &= ~TLBI_TTL_MASK; \
  100. arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
  101. } \
  102. \
  103. __tlbi(op, arg); \
  104. } while(0)
  105. #define __tlbi_user_level(op, arg, level) do { \
  106. if (arm64_kernel_unmapped_at_el0()) \
  107. __tlbi_level(op, (arg | USER_ASID_FLAG), level); \
  108. } while (0)
  109. /*
  110. * This macro creates a properly formatted VA operand for the TLB RANGE.
  111. * The value bit assignments are:
  112. *
  113. * +----------+------+-------+-------+-------+----------------------+
  114. * | ASID | TG | SCALE | NUM | TTL | BADDR |
  115. * +-----------------+-------+-------+-------+----------------------+
  116. * |63 48|47 46|45 44|43 39|38 37|36 0|
  117. *
  118. * The address range is determined by below formula:
  119. * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
  120. *
  121. */
  122. #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
  123. ({ \
  124. unsigned long __ta = (addr) >> PAGE_SHIFT; \
  125. __ta &= GENMASK_ULL(36, 0); \
  126. __ta |= (unsigned long)(ttl) << 37; \
  127. __ta |= (unsigned long)(num) << 39; \
  128. __ta |= (unsigned long)(scale) << 44; \
  129. __ta |= get_trans_granule() << 46; \
  130. __ta |= (unsigned long)(asid) << 48; \
  131. __ta; \
  132. })
  133. /* These macros are used by the TLBI RANGE feature. */
  134. #define __TLBI_RANGE_PAGES(num, scale) \
  135. ((unsigned long)((num) + 1) << (5 * (scale) + 1))
  136. #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
  137. /*
  138. * Generate 'num' values from -1 to 30 with -1 rejected by the
  139. * __flush_tlb_range() loop below.
  140. */
  141. #define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
  142. #define __TLBI_RANGE_NUM(pages, scale) \
  143. ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
  144. /*
  145. * TLB Invalidation
  146. * ================
  147. *
  148. * This header file implements the low-level TLB invalidation routines
  149. * (sometimes referred to as "flushing" in the kernel) for arm64.
  150. *
  151. * Every invalidation operation uses the following template:
  152. *
  153. * DSB ISHST // Ensure prior page-table updates have completed
  154. * TLBI ... // Invalidate the TLB
  155. * DSB ISH // Ensure the TLB invalidation has completed
  156. * if (invalidated kernel mappings)
  157. * ISB // Discard any instructions fetched from the old mapping
  158. *
  159. *
  160. * The following functions form part of the "core" TLB invalidation API,
  161. * as documented in Documentation/core-api/cachetlb.rst:
  162. *
  163. * flush_tlb_all()
  164. * Invalidate the entire TLB (kernel + user) on all CPUs
  165. *
  166. * flush_tlb_mm(mm)
  167. * Invalidate an entire user address space on all CPUs.
  168. * The 'mm' argument identifies the ASID to invalidate.
  169. *
  170. * flush_tlb_range(vma, start, end)
  171. * Invalidate the virtual-address range '[start, end)' on all
  172. * CPUs for the user address space corresponding to 'vma->mm'.
  173. * Note that this operation also invalidates any walk-cache
  174. * entries associated with translations for the specified address
  175. * range.
  176. *
  177. * flush_tlb_kernel_range(start, end)
  178. * Same as flush_tlb_range(..., start, end), but applies to
  179. * kernel mappings rather than a particular user address space.
  180. * Whilst not explicitly documented, this function is used when
  181. * unmapping pages from vmalloc/io space.
  182. *
  183. * flush_tlb_page(vma, addr)
  184. * Invalidate a single user mapping for address 'addr' in the
  185. * address space corresponding to 'vma->mm'. Note that this
  186. * operation only invalidates a single, last-level page-table
  187. * entry and therefore does not affect any walk-caches.
  188. *
  189. *
  190. * Next, we have some undocumented invalidation routines that you probably
  191. * don't want to call unless you know what you're doing:
  192. *
  193. * local_flush_tlb_all()
  194. * Same as flush_tlb_all(), but only applies to the calling CPU.
  195. *
  196. * __flush_tlb_kernel_pgtable(addr)
  197. * Invalidate a single kernel mapping for address 'addr' on all
  198. * CPUs, ensuring that any walk-cache entries associated with the
  199. * translation are also invalidated.
  200. *
  201. * __flush_tlb_range(vma, start, end, stride, last_level)
  202. * Invalidate the virtual-address range '[start, end)' on all
  203. * CPUs for the user address space corresponding to 'vma->mm'.
  204. * The invalidation operations are issued at a granularity
  205. * determined by 'stride' and only affect any walk-cache entries
  206. * if 'last_level' is equal to false.
  207. *
  208. *
  209. * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
  210. * on top of these routines, since that is our interface to the mmu_gather
  211. * API as used by munmap() and friends.
  212. */
  213. static inline void local_flush_tlb_all(void)
  214. {
  215. dsb(nshst);
  216. __tlbi(vmalle1);
  217. dsb(nsh);
  218. isb();
  219. }
  220. static inline void flush_tlb_all(void)
  221. {
  222. dsb(ishst);
  223. __tlbi(vmalle1is);
  224. dsb(ish);
  225. isb();
  226. }
  227. static inline void flush_tlb_mm(struct mm_struct *mm)
  228. {
  229. unsigned long asid;
  230. dsb(ishst);
  231. asid = __TLBI_VADDR(0, ASID(mm));
  232. __tlbi(aside1is, asid);
  233. __tlbi_user(aside1is, asid);
  234. dsb(ish);
  235. }
  236. static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
  237. unsigned long uaddr)
  238. {
  239. unsigned long addr;
  240. dsb(ishst);
  241. addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
  242. __tlbi(vale1is, addr);
  243. __tlbi_user(vale1is, addr);
  244. }
  245. static inline void flush_tlb_page(struct vm_area_struct *vma,
  246. unsigned long uaddr)
  247. {
  248. flush_tlb_page_nosync(vma, uaddr);
  249. dsb(ish);
  250. }
  251. /*
  252. * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
  253. * necessarily a performance improvement.
  254. */
  255. #define MAX_TLBI_OPS PTRS_PER_PTE
  256. static inline void __flush_tlb_range(struct vm_area_struct *vma,
  257. unsigned long start, unsigned long end,
  258. unsigned long stride, bool last_level,
  259. int tlb_level)
  260. {
  261. int num = 0;
  262. int scale = 0;
  263. unsigned long asid, addr, pages;
  264. start = round_down(start, stride);
  265. end = round_up(end, stride);
  266. pages = (end - start) >> PAGE_SHIFT;
  267. /*
  268. * When not uses TLB range ops, we can handle up to
  269. * (MAX_TLBI_OPS - 1) pages;
  270. * When uses TLB range ops, we can handle up to
  271. * (MAX_TLBI_RANGE_PAGES - 1) pages.
  272. */
  273. if ((!system_supports_tlb_range() &&
  274. (end - start) >= (MAX_TLBI_OPS * stride)) ||
  275. pages >= MAX_TLBI_RANGE_PAGES) {
  276. flush_tlb_mm(vma->vm_mm);
  277. return;
  278. }
  279. dsb(ishst);
  280. asid = ASID(vma->vm_mm);
  281. /*
  282. * When the CPU does not support TLB range operations, flush the TLB
  283. * entries one by one at the granularity of 'stride'. If the TLB
  284. * range ops are supported, then:
  285. *
  286. * 1. If 'pages' is odd, flush the first page through non-range
  287. * operations;
  288. *
  289. * 2. For remaining pages: the minimum range granularity is decided
  290. * by 'scale', so multiple range TLBI operations may be required.
  291. * Start from scale = 0, flush the corresponding number of pages
  292. * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
  293. * until no pages left.
  294. *
  295. * Note that certain ranges can be represented by either num = 31 and
  296. * scale or num = 0 and scale + 1. The loop below favours the latter
  297. * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
  298. */
  299. while (pages > 0) {
  300. if (!system_supports_tlb_range() ||
  301. pages % 2 == 1) {
  302. addr = __TLBI_VADDR(start, asid);
  303. if (last_level) {
  304. __tlbi_level(vale1is, addr, tlb_level);
  305. __tlbi_user_level(vale1is, addr, tlb_level);
  306. } else {
  307. __tlbi_level(vae1is, addr, tlb_level);
  308. __tlbi_user_level(vae1is, addr, tlb_level);
  309. }
  310. start += stride;
  311. pages -= stride >> PAGE_SHIFT;
  312. continue;
  313. }
  314. num = __TLBI_RANGE_NUM(pages, scale);
  315. if (num >= 0) {
  316. addr = __TLBI_VADDR_RANGE(start, asid, scale,
  317. num, tlb_level);
  318. if (last_level) {
  319. __tlbi(rvale1is, addr);
  320. __tlbi_user(rvale1is, addr);
  321. } else {
  322. __tlbi(rvae1is, addr);
  323. __tlbi_user(rvae1is, addr);
  324. }
  325. start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
  326. pages -= __TLBI_RANGE_PAGES(num, scale);
  327. }
  328. scale++;
  329. }
  330. dsb(ish);
  331. }
  332. static inline void flush_tlb_range(struct vm_area_struct *vma,
  333. unsigned long start, unsigned long end)
  334. {
  335. /*
  336. * We cannot use leaf-only invalidation here, since we may be invalidating
  337. * table entries as part of collapsing hugepages or moving page tables.
  338. * Set the tlb_level to 0 because we can not get enough information here.
  339. */
  340. __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
  341. }
  342. static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  343. {
  344. unsigned long addr;
  345. if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
  346. flush_tlb_all();
  347. return;
  348. }
  349. start = __TLBI_VADDR(start, 0);
  350. end = __TLBI_VADDR(end, 0);
  351. dsb(ishst);
  352. for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
  353. __tlbi(vaale1is, addr);
  354. dsb(ish);
  355. isb();
  356. }
  357. /*
  358. * Used to invalidate the TLB (walk caches) corresponding to intermediate page
  359. * table levels (pgd/pud/pmd).
  360. */
  361. static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
  362. {
  363. unsigned long addr = __TLBI_VADDR(kaddr, 0);
  364. dsb(ishst);
  365. __tlbi(vaae1is, addr);
  366. dsb(ish);
  367. isb();
  368. }
  369. #endif
  370. #endif