io_pgtable_v2.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * CPU-agnostic AMD IO page table v2 allocator.
  4. *
  5. * Copyright (C) 2022 Advanced Micro Devices, Inc.
  6. * Author: Suravee Suthikulpanit <[email protected]>
  7. * Author: Vasant Hegde <[email protected]>
  8. */
  9. #define pr_fmt(fmt) "AMD-Vi: " fmt
  10. #define dev_fmt(fmt) pr_fmt(fmt)
  11. #include <linux/bitops.h>
  12. #include <linux/io-pgtable.h>
  13. #include <linux/kernel.h>
  14. #include <asm/barrier.h>
  15. #include "amd_iommu_types.h"
  16. #include "amd_iommu.h"
  17. #define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
  18. #define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
  19. #define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */
  20. #define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */
  21. #define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */
  22. #define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */
  23. #define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */
  24. #define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */
  25. #define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */
  26. #define MAX_PTRS_PER_PAGE 512
  27. #define IOMMU_PAGE_SIZE_2M BIT_ULL(21)
  28. #define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
  29. static inline int get_pgtable_level(void)
  30. {
  31. /* 5 level page table is not supported */
  32. return PAGE_MODE_4_LEVEL;
  33. }
  34. static inline bool is_large_pte(u64 pte)
  35. {
  36. return (pte & IOMMU_PAGE_PSE);
  37. }
  38. static inline void *alloc_pgtable_page(void)
  39. {
  40. return (void *)get_zeroed_page(GFP_KERNEL);
  41. }
  42. static inline u64 set_pgtable_attr(u64 *page)
  43. {
  44. u64 prot;
  45. prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
  46. prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
  47. return (iommu_virt_to_phys(page) | prot);
  48. }
  49. static inline void *get_pgtable_pte(u64 pte)
  50. {
  51. return iommu_phys_to_virt(pte & PM_ADDR_MASK);
  52. }
  53. static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
  54. {
  55. u64 pte;
  56. pte = __sme_set(paddr & PM_ADDR_MASK);
  57. pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
  58. pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
  59. if (prot & IOMMU_PROT_IW)
  60. pte |= IOMMU_PAGE_RW;
  61. /* Large page */
  62. if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
  63. pte |= IOMMU_PAGE_PSE;
  64. return pte;
  65. }
  66. static inline u64 get_alloc_page_size(u64 size)
  67. {
  68. if (size >= IOMMU_PAGE_SIZE_1G)
  69. return IOMMU_PAGE_SIZE_1G;
  70. if (size >= IOMMU_PAGE_SIZE_2M)
  71. return IOMMU_PAGE_SIZE_2M;
  72. return PAGE_SIZE;
  73. }
  74. static inline int page_size_to_level(u64 pg_size)
  75. {
  76. if (pg_size == IOMMU_PAGE_SIZE_1G)
  77. return PAGE_MODE_3_LEVEL;
  78. if (pg_size == IOMMU_PAGE_SIZE_2M)
  79. return PAGE_MODE_2_LEVEL;
  80. return PAGE_MODE_1_LEVEL;
  81. }
  82. static inline void free_pgtable_page(u64 *pt)
  83. {
  84. free_page((unsigned long)pt);
  85. }
  86. static void free_pgtable(u64 *pt, int level)
  87. {
  88. u64 *p;
  89. int i;
  90. for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
  91. /* PTE present? */
  92. if (!IOMMU_PTE_PRESENT(pt[i]))
  93. continue;
  94. if (is_large_pte(pt[i]))
  95. continue;
  96. /*
  97. * Free the next level. No need to look at l1 tables here since
  98. * they can only contain leaf PTEs; just free them directly.
  99. */
  100. p = get_pgtable_pte(pt[i]);
  101. if (level > 2)
  102. free_pgtable(p, level - 1);
  103. else
  104. free_pgtable_page(p);
  105. }
  106. free_pgtable_page(pt);
  107. }
  108. /* Allocate page table */
  109. static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova,
  110. unsigned long pg_size, bool *updated)
  111. {
  112. u64 *pte, *page;
  113. int level, end_level;
  114. level = get_pgtable_level() - 1;
  115. end_level = page_size_to_level(pg_size);
  116. pte = &pgd[PM_LEVEL_INDEX(level, iova)];
  117. iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
  118. while (level >= end_level) {
  119. u64 __pte, __npte;
  120. __pte = *pte;
  121. if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
  122. /* Unmap large pte */
  123. cmpxchg64(pte, *pte, 0ULL);
  124. *updated = true;
  125. continue;
  126. }
  127. if (!IOMMU_PTE_PRESENT(__pte)) {
  128. page = alloc_pgtable_page();
  129. if (!page)
  130. return NULL;
  131. __npte = set_pgtable_attr(page);
  132. /* pte could have been changed somewhere. */
  133. if (cmpxchg64(pte, __pte, __npte) != __pte)
  134. free_pgtable_page(page);
  135. else if (IOMMU_PTE_PRESENT(__pte))
  136. *updated = true;
  137. continue;
  138. }
  139. level -= 1;
  140. pte = get_pgtable_pte(__pte);
  141. pte = &pte[PM_LEVEL_INDEX(level, iova)];
  142. }
  143. /* Tear down existing pte entries */
  144. if (IOMMU_PTE_PRESENT(*pte)) {
  145. u64 *__pte;
  146. *updated = true;
  147. __pte = get_pgtable_pte(*pte);
  148. cmpxchg64(pte, *pte, 0ULL);
  149. if (pg_size == IOMMU_PAGE_SIZE_1G)
  150. free_pgtable(__pte, end_level - 1);
  151. else if (pg_size == IOMMU_PAGE_SIZE_2M)
  152. free_pgtable_page(__pte);
  153. }
  154. return pte;
  155. }
  156. /*
  157. * This function checks if there is a PTE for a given dma address.
  158. * If there is one, it returns the pointer to it.
  159. */
  160. static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  161. unsigned long iova, unsigned long *page_size)
  162. {
  163. u64 *pte;
  164. int level;
  165. level = get_pgtable_level() - 1;
  166. pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
  167. /* Default page size is 4K */
  168. *page_size = PAGE_SIZE;
  169. while (level) {
  170. /* Not present */
  171. if (!IOMMU_PTE_PRESENT(*pte))
  172. return NULL;
  173. /* Walk to the next level */
  174. pte = get_pgtable_pte(*pte);
  175. pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
  176. /* Large page */
  177. if (is_large_pte(*pte)) {
  178. if (level == PAGE_MODE_3_LEVEL)
  179. *page_size = IOMMU_PAGE_SIZE_1G;
  180. else if (level == PAGE_MODE_2_LEVEL)
  181. *page_size = IOMMU_PAGE_SIZE_2M;
  182. else
  183. return NULL; /* Wrongly set PSE bit in PTE */
  184. break;
  185. }
  186. level -= 1;
  187. }
  188. return pte;
  189. }
  190. static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
  191. phys_addr_t paddr, size_t pgsize, size_t pgcount,
  192. int prot, gfp_t gfp, size_t *mapped)
  193. {
  194. struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
  195. struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg;
  196. u64 *pte;
  197. unsigned long map_size;
  198. unsigned long mapped_size = 0;
  199. unsigned long o_iova = iova;
  200. size_t size = pgcount << __ffs(pgsize);
  201. int count = 0;
  202. int ret = 0;
  203. bool updated = false;
  204. if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
  205. return -EINVAL;
  206. if (!(prot & IOMMU_PROT_MASK))
  207. return -EINVAL;
  208. while (mapped_size < size) {
  209. map_size = get_alloc_page_size(pgsize);
  210. pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated);
  211. if (!pte) {
  212. ret = -EINVAL;
  213. goto out;
  214. }
  215. *pte = set_pte_attr(paddr, map_size, prot);
  216. count++;
  217. iova += map_size;
  218. paddr += map_size;
  219. mapped_size += map_size;
  220. }
  221. out:
  222. if (updated) {
  223. if (count > 1)
  224. amd_iommu_flush_tlb(&pdom->domain, 0);
  225. else
  226. amd_iommu_flush_page(&pdom->domain, 0, o_iova);
  227. }
  228. if (mapped)
  229. *mapped += mapped_size;
  230. return ret;
  231. }
  232. static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
  233. unsigned long iova,
  234. size_t pgsize, size_t pgcount,
  235. struct iommu_iotlb_gather *gather)
  236. {
  237. struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
  238. struct io_pgtable_cfg *cfg = &pgtable->iop.cfg;
  239. unsigned long unmap_size;
  240. unsigned long unmapped = 0;
  241. size_t size = pgcount << __ffs(pgsize);
  242. u64 *pte;
  243. if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
  244. return 0;
  245. while (unmapped < size) {
  246. pte = fetch_pte(pgtable, iova, &unmap_size);
  247. if (!pte)
  248. return unmapped;
  249. *pte = 0ULL;
  250. iova = (iova & ~(unmap_size - 1)) + unmap_size;
  251. unmapped += unmap_size;
  252. }
  253. return unmapped;
  254. }
  255. static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
  256. {
  257. struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
  258. unsigned long offset_mask, pte_pgsize;
  259. u64 *pte, __pte;
  260. pte = fetch_pte(pgtable, iova, &pte_pgsize);
  261. if (!pte || !IOMMU_PTE_PRESENT(*pte))
  262. return 0;
  263. offset_mask = pte_pgsize - 1;
  264. __pte = __sme_clr(*pte & PM_ADDR_MASK);
  265. return (__pte & ~offset_mask) | (iova & offset_mask);
  266. }
  267. /*
  268. * ----------------------------------------------------
  269. */
  270. static void v2_tlb_flush_all(void *cookie)
  271. {
  272. }
  273. static void v2_tlb_flush_walk(unsigned long iova, size_t size,
  274. size_t granule, void *cookie)
  275. {
  276. }
  277. static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
  278. unsigned long iova, size_t granule,
  279. void *cookie)
  280. {
  281. }
  282. static const struct iommu_flush_ops v2_flush_ops = {
  283. .tlb_flush_all = v2_tlb_flush_all,
  284. .tlb_flush_walk = v2_tlb_flush_walk,
  285. .tlb_add_page = v2_tlb_add_page,
  286. };
  287. static void v2_free_pgtable(struct io_pgtable *iop)
  288. {
  289. struct protection_domain *pdom;
  290. struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
  291. pdom = container_of(pgtable, struct protection_domain, iop);
  292. if (!(pdom->flags & PD_IOMMUV2_MASK))
  293. return;
  294. /*
  295. * Make changes visible to IOMMUs. No need to clear gcr3 entry
  296. * as gcr3 table is already freed.
  297. */
  298. amd_iommu_domain_update(pdom);
  299. /* Free page table */
  300. free_pgtable(pgtable->pgd, get_pgtable_level());
  301. }
  302. static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
  303. {
  304. struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
  305. struct protection_domain *pdom = (struct protection_domain *)cookie;
  306. int ret;
  307. pgtable->pgd = alloc_pgtable_page();
  308. if (!pgtable->pgd)
  309. return NULL;
  310. ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd));
  311. if (ret)
  312. goto err_free_pgd;
  313. pgtable->iop.ops.map_pages = iommu_v2_map_pages;
  314. pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages;
  315. pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
  316. cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
  317. cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
  318. cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
  319. cfg->tlb = &v2_flush_ops;
  320. return &pgtable->iop;
  321. err_free_pgd:
  322. free_pgtable_page(pgtable->pgd);
  323. return NULL;
  324. }
  325. struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
  326. .alloc = v2_alloc_pgtable,
  327. .free = v2_free_pgtable,
  328. };