io-pgtable-fast.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
  4. * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
  5. */
  6. #define pr_fmt(fmt) "io-pgtable-fast: " fmt
  7. #include <linux/iommu.h>
  8. #include <linux/kernel.h>
  9. #include <linux/scatterlist.h>
  10. #include <linux/sizes.h>
  11. #include <linux/slab.h>
  12. #include <linux/types.h>
  13. #include <linux/io-pgtable.h>
  14. #include <linux/io-pgtable-fast.h>
  15. #include <linux/mm.h>
  16. #include <linux/vmalloc.h>
  17. #include <linux/dma-mapping.h>
  18. #include <linux/qcom-iommu-util.h>
  19. #include <linux/qcom-io-pgtable.h>
  20. #define AV8L_FAST_MAX_ADDR_BITS 48
  21. /* Page table bits */
  22. #define AV8L_FAST_PTE_TYPE_SHIFT 0
  23. #define AV8L_FAST_PTE_TYPE_MASK 0x3
  24. #define AV8L_FAST_PTE_TYPE_BLOCK 1
  25. #define AV8L_FAST_PTE_TYPE_TABLE 3
  26. #define AV8L_FAST_PTE_TYPE_PAGE 3
  27. #define AV8L_FAST_PTE_NSTABLE (((av8l_fast_iopte)1) << 63)
  28. #define AV8L_FAST_PTE_XN (((av8l_fast_iopte)3) << 53)
  29. #define AV8L_FAST_PTE_AF (((av8l_fast_iopte)1) << 10)
  30. #define AV8L_FAST_PTE_SH_NS (((av8l_fast_iopte)0) << 8)
  31. #define AV8L_FAST_PTE_SH_OS (((av8l_fast_iopte)2) << 8)
  32. #define AV8L_FAST_PTE_SH_IS (((av8l_fast_iopte)3) << 8)
  33. #define AV8L_FAST_PTE_SH_MASK (((av8l_fast_iopte)3) << 8)
  34. #define AV8L_FAST_PTE_NS (((av8l_fast_iopte)1) << 5)
  35. #define AV8L_FAST_PTE_VALID (((av8l_fast_iopte)1) << 0)
  36. #define AV8L_FAST_PTE_ATTR_LO_MASK (((av8l_fast_iopte)0x3ff) << 2)
  37. /* Ignore the contiguous bit for block splitting */
  38. #define AV8L_FAST_PTE_ATTR_HI_MASK (((av8l_fast_iopte)6) << 52)
  39. #define AV8L_FAST_PTE_ATTR_MASK (AV8L_FAST_PTE_ATTR_LO_MASK | \
  40. AV8L_FAST_PTE_ATTR_HI_MASK)
  41. #define AV8L_FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
  42. /* Stage-1 PTE */
  43. #define AV8L_FAST_PTE_AP_PRIV_RW (((av8l_fast_iopte)0) << 6)
  44. #define AV8L_FAST_PTE_AP_RW (((av8l_fast_iopte)1) << 6)
  45. #define AV8L_FAST_PTE_AP_PRIV_RO (((av8l_fast_iopte)2) << 6)
  46. #define AV8L_FAST_PTE_AP_RO (((av8l_fast_iopte)3) << 6)
  47. #define AV8L_FAST_PTE_ATTRINDX_SHIFT 2
  48. #define AV8L_FAST_PTE_ATTRINDX_MASK 0x7
  49. #define AV8L_FAST_PTE_nG (((av8l_fast_iopte)1) << 11)
  50. /* Stage-2 PTE */
  51. #define AV8L_FAST_PTE_HAP_FAULT (((av8l_fast_iopte)0) << 6)
  52. #define AV8L_FAST_PTE_HAP_READ (((av8l_fast_iopte)1) << 6)
  53. #define AV8L_FAST_PTE_HAP_WRITE (((av8l_fast_iopte)2) << 6)
  54. #define AV8L_FAST_PTE_MEMATTR_OIWB (((av8l_fast_iopte)0xf) << 2)
  55. #define AV8L_FAST_PTE_MEMATTR_NC (((av8l_fast_iopte)0x5) << 2)
  56. #define AV8L_FAST_PTE_MEMATTR_DEV (((av8l_fast_iopte)0x1) << 2)
  57. /* Register bits */
  58. #define ARM_32_LPAE_TCR_EAE (1 << 31)
  59. #define ARM_64_LPAE_S2_TCR_RES1 (1 << 31)
  60. #define AV8L_FAST_TCR_TG0_4K (0 << 14)
  61. #define AV8L_FAST_TCR_TG0_64K (1 << 14)
  62. #define AV8L_FAST_TCR_TG0_16K (2 << 14)
  63. #define AV8L_FAST_TCR_SH0_SHIFT 12
  64. #define AV8L_FAST_TCR_SH0_MASK 0x3
  65. #define AV8L_FAST_TCR_SH_NS 0
  66. #define AV8L_FAST_TCR_SH_OS 2
  67. #define AV8L_FAST_TCR_SH_IS 3
  68. #define AV8L_FAST_TCR_ORGN0_SHIFT 10
  69. #define AV8L_FAST_TCR_IRGN0_SHIFT 8
  70. #define AV8L_FAST_TCR_RGN_MASK 0x3
  71. #define AV8L_FAST_TCR_RGN_NC 0
  72. #define AV8L_FAST_TCR_RGN_WBWA 1
  73. #define AV8L_FAST_TCR_RGN_WT 2
  74. #define AV8L_FAST_TCR_RGN_WB 3
  75. #define AV8L_FAST_TCR_SL0_SHIFT 6
  76. #define AV8L_FAST_TCR_SL0_MASK 0x3
  77. #define AV8L_FAST_TCR_T0SZ_SHIFT 0
  78. #define AV8L_FAST_TCR_SZ_MASK 0xf
  79. #define AV8L_FAST_TCR_PS_SHIFT 16
  80. #define AV8L_FAST_TCR_PS_MASK 0x7
  81. #define AV8L_FAST_TCR_IPS_SHIFT 32
  82. #define AV8L_FAST_TCR_IPS_MASK 0x7
  83. #define AV8L_FAST_TCR_PS_32_BIT 0x0ULL
  84. #define AV8L_FAST_TCR_PS_36_BIT 0x1ULL
  85. #define AV8L_FAST_TCR_PS_40_BIT 0x2ULL
  86. #define AV8L_FAST_TCR_PS_42_BIT 0x3ULL
  87. #define AV8L_FAST_TCR_PS_44_BIT 0x4ULL
  88. #define AV8L_FAST_TCR_PS_48_BIT 0x5ULL
  89. #define AV8L_FAST_TCR_EPD1_SHIFT 23
  90. #define AV8L_FAST_TCR_EPD1_FAULT 1
  91. #define AV8L_FAST_MAIR_ATTR_SHIFT(n) ((n) << 3)
  92. #define AV8L_FAST_MAIR_ATTR_MASK 0xff
  93. #define AV8L_FAST_MAIR_ATTR_DEVICE 0x04
  94. #define AV8L_FAST_MAIR_ATTR_NC 0x44
  95. #define AV8L_FAST_MAIR_ATTR_WBRWA 0xff
  96. #define AV8L_FAST_MAIR_ATTR_UPSTREAM 0xf4
  97. #define AV8L_FAST_MAIR_ATTR_IDX_NC 0
  98. #define AV8L_FAST_MAIR_ATTR_IDX_CACHE 1
  99. #define AV8L_FAST_MAIR_ATTR_IDX_DEV 2
  100. #define AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM 3
  101. #define AV8L_FAST_PAGE_SHIFT 12
  102. #define PTE_MAIR_IDX(pte) \
  103. ((pte >> AV8L_FAST_PTE_ATTRINDX_SHIFT) & \
  104. AV8L_FAST_PTE_ATTRINDX_MASK)
  105. #define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK)
  106. #define iopte_pmd_offset(pmds, base, iova) (pmds + ((iova - base) >> 12))
  107. static inline dma_addr_t av8l_dma_addr(void *addr)
  108. {
  109. if (is_vmalloc_addr(addr))
  110. return page_to_phys(vmalloc_to_page(addr)) +
  111. offset_in_page(addr);
  112. return virt_to_phys(addr);
  113. }
  114. static void __av8l_clean_range(struct device *dev, void *start, void *end)
  115. {
  116. size_t size;
  117. void *region_end;
  118. unsigned long page_end;
  119. if (is_vmalloc_addr(start)) {
  120. while (start < end) {
  121. page_end = round_down((unsigned long)start + PAGE_SIZE,
  122. PAGE_SIZE);
  123. region_end = min_t(void *, end, page_end);
  124. size = region_end - start;
  125. dma_sync_single_for_device(dev, av8l_dma_addr(start),
  126. size, DMA_TO_DEVICE);
  127. start = region_end;
  128. }
  129. } else {
  130. size = end - start;
  131. dma_sync_single_for_device(dev, av8l_dma_addr(start), size,
  132. DMA_TO_DEVICE);
  133. }
  134. }
  135. static void av8l_clean_range(struct io_pgtable_cfg *cfg, av8l_fast_iopte *start,
  136. av8l_fast_iopte *end)
  137. {
  138. if (!cfg->coherent_walk)
  139. __av8l_clean_range(cfg->iommu_dev, start, end);
  140. }
  141. #ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
  142. #include <linux/notifier.h>
  143. static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list);
  144. void av8l_register_notify(struct notifier_block *nb)
  145. {
  146. atomic_notifier_chain_register(&av8l_notifier_list, nb);
  147. }
  148. EXPORT_SYMBOL(av8l_register_notify);
  149. static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
  150. {
  151. if (unlikely(*ptep)) {
  152. atomic_notifier_call_chain(
  153. &av8l_notifier_list, MAPPED_OVER_STALE_TLB,
  154. (void *) ptep);
  155. pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n",
  156. *ptep, ptep);
  157. pr_err("Nearby memory:\n");
  158. print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS,
  159. 32, 8, ptep - 16, 32 * sizeof(*ptep), false);
  160. }
  161. }
  162. void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base,
  163. u64 end, bool skip_sync)
  164. {
  165. int i;
  166. struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
  167. struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
  168. av8l_fast_iopte *pmdp = iopte_pmd_offset(data->pmds, data->base, base);
  169. for (i = base >> AV8L_FAST_PAGE_SHIFT;
  170. i <= (end >> AV8L_FAST_PAGE_SHIFT); ++i) {
  171. if (!(*pmdp & AV8L_FAST_PTE_VALID)) {
  172. *pmdp = 0;
  173. if (!skip_sync)
  174. av8l_clean_range(&iop->cfg, pmdp, pmdp + 1);
  175. }
  176. pmdp++;
  177. }
  178. }
  179. #else
  180. static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
  181. {
  182. }
  183. #endif
  184. static av8l_fast_iopte
  185. av8l_fast_prot_to_pte(struct av8l_fast_io_pgtable *data, int prot)
  186. {
  187. av8l_fast_iopte pte = AV8L_FAST_PTE_XN
  188. | AV8L_FAST_PTE_TYPE_PAGE
  189. | AV8L_FAST_PTE_AF
  190. | AV8L_FAST_PTE_nG
  191. | AV8L_FAST_PTE_SH_OS;
  192. if (prot & IOMMU_MMIO)
  193. pte |= (AV8L_FAST_MAIR_ATTR_IDX_DEV
  194. << AV8L_FAST_PTE_ATTRINDX_SHIFT);
  195. else if (prot & IOMMU_CACHE)
  196. pte |= (AV8L_FAST_MAIR_ATTR_IDX_CACHE
  197. << AV8L_FAST_PTE_ATTRINDX_SHIFT);
  198. else if (prot & IOMMU_SYS_CACHE)
  199. pte |= (AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM
  200. << AV8L_FAST_PTE_ATTRINDX_SHIFT);
  201. if (!(prot & IOMMU_WRITE))
  202. pte |= AV8L_FAST_PTE_AP_RO;
  203. else
  204. pte |= AV8L_FAST_PTE_AP_RW;
  205. return pte;
  206. }
  207. static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova,
  208. phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
  209. {
  210. struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
  211. struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
  212. av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova);
  213. unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT;
  214. av8l_fast_iopte pte;
  215. pte = av8l_fast_prot_to_pte(data, prot);
  216. paddr &= AV8L_FAST_PTE_ADDR_MASK;
  217. for (i = 0; i < nptes; i++, paddr += SZ_4K) {
  218. __av8l_check_for_stale_tlb(ptep + i);
  219. *(ptep + i) = pte | paddr;
  220. }
  221. av8l_clean_range(&iop->cfg, ptep, ptep + nptes);
  222. return 0;
  223. }
  224. int av8l_fast_map_public(struct io_pgtable_ops *ops, unsigned long iova,
  225. phys_addr_t paddr, size_t size, int prot)
  226. {
  227. return av8l_fast_map(ops, iova, paddr, size, prot, GFP_ATOMIC);
  228. }
  229. static int av8l_fast_map_pages(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr,
  230. size_t pgsize, size_t pgcount, int prot, gfp_t gfp,
  231. size_t *mapped)
  232. {
  233. int ret = av8l_fast_map(ops, iova, paddr, pgsize * pgcount, prot, gfp);
  234. if (!ret)
  235. *mapped = pgsize * pgcount;
  236. return ret;
  237. }
  238. static size_t
  239. __av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova,
  240. size_t size, bool allow_stale_tlb)
  241. {
  242. struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
  243. struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops);
  244. unsigned long nptes;
  245. av8l_fast_iopte *ptep;
  246. int val = allow_stale_tlb
  247. ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI
  248. : 0;
  249. ptep = iopte_pmd_offset(data->pmds, data->base, iova);
  250. nptes = size >> AV8L_FAST_PAGE_SHIFT;
  251. memset(ptep, val, sizeof(*ptep) * nptes);
  252. av8l_clean_range(&iop->cfg, ptep, ptep + nptes);
  253. if (!allow_stale_tlb)
  254. io_pgtable_tlb_flush_all(&data->iop);
  255. return size;
  256. }
  257. /* caller must take care of tlb cache maintenance */
  258. void av8l_fast_unmap_public(struct io_pgtable_ops *ops, unsigned long iova,
  259. size_t size)
  260. {
  261. __av8l_fast_unmap(ops, iova, size, true);
  262. }
  263. static size_t av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova,
  264. size_t size, struct iommu_iotlb_gather *gather)
  265. {
  266. return __av8l_fast_unmap(ops, iova, size, false);
  267. }
  268. static size_t av8l_fast_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize,
  269. size_t pgcount, struct iommu_iotlb_gather *gather)
  270. {
  271. return __av8l_fast_unmap(ops, iova, pgsize * pgcount, false);
  272. }
  273. /* TODO: Add this back in android-mainline */
  274. static int __maybe_unused av8l_fast_map_sg(struct io_pgtable_ops *ops,
  275. unsigned long iova, struct scatterlist *sgl,
  276. unsigned int nents, int prot, gfp_t gfp, size_t *mapped)
  277. {
  278. struct scatterlist *sg;
  279. int i;
  280. for_each_sg(sgl, sg, nents, i) {
  281. av8l_fast_map(ops, iova, sg_phys(sg), sg->length, prot, gfp);
  282. iova += sg->length;
  283. *mapped += sg->length;
  284. }
  285. return 0;
  286. }
  287. int av8l_fast_map_sg_public(struct io_pgtable_ops *ops,
  288. unsigned long iova, struct scatterlist *sgl,
  289. unsigned int nents, int prot, size_t *mapped)
  290. {
  291. return av8l_fast_map_sg(ops, iova, sgl, nents, prot, GFP_ATOMIC, mapped);
  292. }
  293. #if defined(CONFIG_ARM64)
  294. #define FAST_PGDNDX(va) (((va) & 0x7fc0000000) >> 27)
  295. #elif defined(CONFIG_ARM)
  296. #define FAST_PGDNDX(va) (((va) & 0xc0000000) >> 27)
  297. #endif
  298. static phys_addr_t av8l_fast_iova_to_phys(struct io_pgtable_ops *ops,
  299. unsigned long iova)
  300. {
  301. struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
  302. av8l_fast_iopte pte, *pgdp, *pudp, *pmdp;
  303. unsigned long pgd;
  304. phys_addr_t phys;
  305. const unsigned long pts = AV8L_FAST_PTE_TYPE_SHIFT;
  306. const unsigned long ptm = AV8L_FAST_PTE_TYPE_MASK;
  307. const unsigned long ptt = AV8L_FAST_PTE_TYPE_TABLE;
  308. const unsigned long ptp = AV8L_FAST_PTE_TYPE_PAGE;
  309. const av8l_fast_iopte am = AV8L_FAST_PTE_ADDR_MASK;
  310. /* TODO: clean up some of these magic numbers... */
  311. pgd = (unsigned long)data->pgd | FAST_PGDNDX(iova);
  312. pgdp = (av8l_fast_iopte *)pgd;
  313. pte = *pgdp;
  314. if (((pte >> pts) & ptm) != ptt)
  315. return 0;
  316. pudp = phys_to_virt((pte & am) | ((iova & 0x3fe00000) >> 18));
  317. pte = *pudp;
  318. if (((pte >> pts) & ptm) != ptt)
  319. return 0;
  320. pmdp = phys_to_virt((pte & am) | ((iova & 0x1ff000) >> 9));
  321. pte = *pmdp;
  322. if (((pte >> pts) & ptm) != ptp)
  323. return 0;
  324. phys = pte & am;
  325. return phys | (iova & 0xfff);
  326. }
  327. phys_addr_t av8l_fast_iova_to_phys_public(struct io_pgtable_ops *ops,
  328. unsigned long iova)
  329. {
  330. return av8l_fast_iova_to_phys(ops, iova);
  331. }
  332. static bool av8l_fast_iova_coherent(struct io_pgtable_ops *ops,
  333. unsigned long iova)
  334. {
  335. struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
  336. av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova);
  337. return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) &&
  338. ((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) ||
  339. (PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_IS)));
  340. }
  341. bool av8l_fast_iova_coherent_public(struct io_pgtable_ops *ops,
  342. unsigned long iova)
  343. {
  344. return av8l_fast_iova_coherent(ops, iova);
  345. }
  346. static struct av8l_fast_io_pgtable *
  347. av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg)
  348. {
  349. struct av8l_fast_io_pgtable *data;
  350. data = kmalloc(sizeof(*data), GFP_KERNEL);
  351. if (!data)
  352. return NULL;
  353. data->iop.ops = (struct io_pgtable_ops) {
  354. .map = av8l_fast_map,
  355. .map_pages = av8l_fast_map_pages,
  356. .unmap = av8l_fast_unmap,
  357. .unmap_pages = av8l_fast_unmap_pages,
  358. .iova_to_phys = av8l_fast_iova_to_phys,
  359. };
  360. return data;
  361. }
  362. /*
  363. * We need max 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and
  364. * 2048 pages for pmds (each pud page contains 512 table entries, each
  365. * pointing to a pmd).
  366. */
  367. #define NUM_PGD_PAGES 1
  368. #define NUM_PUD_PAGES 4
  369. #define NUM_PMD_PAGES 2048
  370. #define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES)
  371. /* undefine arch specific definitions which depends on page table format */
  372. #undef pud_index
  373. #undef pud_mask
  374. #undef pud_next
  375. #undef pmd_index
  376. #undef pmd_mask
  377. #undef pmd_next
  378. #define pud_index(addr) (((addr) >> 30) & 0x3)
  379. #define pud_mask(addr) ((addr) & ~((1UL << 30) - 1))
  380. #define pud_next(addr, end) \
  381. ({ unsigned long __boundary = pud_mask(addr + (1UL << 30));\
  382. (__boundary - 1 < (end) - 1) ? __boundary : (end); \
  383. })
  384. #define pmd_index(addr) (((addr) >> 21) & 0x1ff)
  385. #define pmd_mask(addr) ((addr) & ~((1UL << 21) - 1))
  386. #define pmd_next(addr, end) \
  387. ({ unsigned long __boundary = pmd_mask(addr + (1UL << 21));\
  388. (__boundary - 1 < (end) - 1) ? __boundary : (end); \
  389. })
  390. static int
  391. av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data,
  392. struct io_pgtable_cfg *cfg, void *cookie)
  393. {
  394. int i, j, pg = 0;
  395. struct page **pages, *page;
  396. struct qcom_io_pgtable_info *pgtbl_info = to_qcom_io_pgtable_info(cfg);
  397. dma_addr_t pud, pmd;
  398. int pmd_pg_index;
  399. dma_addr_t base = pgtbl_info->iova_base;
  400. dma_addr_t end = pgtbl_info->iova_end;
  401. pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN |
  402. __GFP_NORETRY);
  403. if (!pages)
  404. pages = vmalloc(sizeof(*pages) * NUM_PGTBL_PAGES);
  405. if (!pages)
  406. return -ENOMEM;
  407. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  408. if (!page)
  409. goto err_free_pages_arr;
  410. pages[pg++] = page;
  411. data->pgd = page_address(page);
  412. /*
  413. * We need max 2048 entries at level 2 to map 4GB of VA space. A page
  414. * can hold 512 entries, so we need max 4 pages.
  415. */
  416. for (i = pud_index(base), pud = base; pud < end;
  417. ++i, pud = pud_next(pud, end)) {
  418. av8l_fast_iopte pte, *ptep;
  419. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  420. if (!page)
  421. goto err_free_pages;
  422. pages[pg++] = page;
  423. data->puds[i] = page_address(page);
  424. pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
  425. ptep = ((av8l_fast_iopte *)data->pgd) + i;
  426. *ptep = pte;
  427. }
  428. av8l_clean_range(cfg, data->pgd, data->pgd + 4);
  429. /*
  430. * We have max 4 puds, each of which can point to 512 pmds, so we'll
  431. * have max 2048 pmds, each of which can hold 512 ptes, for a grand
  432. * total of 2048*512=1048576 PTEs.
  433. */
  434. pmd_pg_index = pg;
  435. for (i = pud_index(base), pud = base; pud < end;
  436. ++i, pud = pud_next(pud, end)) {
  437. for (j = pmd_index(pud), pmd = pud; pmd < pud_next(pud, end);
  438. ++j, pmd = pmd_next(pmd, end)) {
  439. av8l_fast_iopte pte, *pudp;
  440. void *addr;
  441. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  442. if (!page)
  443. goto err_free_pages;
  444. pages[pg++] = page;
  445. addr = page_address(page);
  446. av8l_clean_range(cfg, addr, addr + SZ_4K);
  447. pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
  448. pudp = data->puds[i] + j;
  449. *pudp = pte;
  450. }
  451. av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512);
  452. }
  453. /*
  454. * We map the pmds into a virtually contiguous space so that we
  455. * don't have to traverse the first two levels of the page tables
  456. * to find the appropriate pud. Instead, it will be a simple
  457. * offset from the virtual base of the pmds.
  458. */
  459. data->pmds = vmap(&pages[pmd_pg_index], pg - pmd_pg_index,
  460. VM_IOREMAP, PAGE_KERNEL);
  461. if (!data->pmds)
  462. goto err_free_pages;
  463. data->pages = pages;
  464. data->base = base;
  465. data->end = end;
  466. data->nr_pages = pg;
  467. return 0;
  468. err_free_pages:
  469. for (i = 0; i < pg; ++i)
  470. __free_page(pages[i]);
  471. err_free_pages_arr:
  472. kvfree(pages);
  473. return -ENOMEM;
  474. }
  475. static struct io_pgtable *
  476. av8l_fast_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
  477. {
  478. u64 reg;
  479. struct av8l_fast_io_pgtable *data =
  480. av8l_fast_alloc_pgtable_data(cfg);
  481. typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
  482. if (!data)
  483. return NULL;
  484. /* restrict according to the fast map requirements */
  485. cfg->ias = 32;
  486. cfg->pgsize_bitmap = SZ_4K;
  487. /* TCR */
  488. if (cfg->coherent_walk) {
  489. tcr->sh = AV8L_FAST_TCR_SH_IS;
  490. tcr->irgn = AV8L_FAST_TCR_RGN_WBWA;
  491. tcr->orgn = AV8L_FAST_TCR_RGN_WBWA;
  492. if (WARN_ON(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
  493. goto out_free_data;
  494. } else {
  495. tcr->sh = AV8L_FAST_TCR_SH_OS;
  496. tcr->irgn = AV8L_FAST_TCR_RGN_NC;
  497. if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
  498. tcr->orgn = AV8L_FAST_TCR_RGN_NC;
  499. else
  500. tcr->orgn = AV8L_FAST_TCR_RGN_WBWA;
  501. }
  502. tcr->tg = AV8L_FAST_TCR_TG0_4K;
  503. switch (cfg->oas) {
  504. case 32:
  505. tcr->ips = AV8L_FAST_TCR_PS_32_BIT;
  506. break;
  507. case 36:
  508. tcr->ips = AV8L_FAST_TCR_PS_36_BIT;
  509. break;
  510. case 40:
  511. tcr->ips = AV8L_FAST_TCR_PS_40_BIT;
  512. break;
  513. case 42:
  514. tcr->ips = AV8L_FAST_TCR_PS_42_BIT;
  515. break;
  516. case 44:
  517. tcr->ips = AV8L_FAST_TCR_PS_44_BIT;
  518. break;
  519. case 48:
  520. tcr->ips = AV8L_FAST_TCR_PS_48_BIT;
  521. break;
  522. default:
  523. goto out_free_data;
  524. }
  525. tcr->tsz = 64ULL - cfg->ias;
  526. /* MAIRs */
  527. reg = (AV8L_FAST_MAIR_ATTR_NC
  528. << AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_NC)) |
  529. (AV8L_FAST_MAIR_ATTR_WBRWA
  530. << AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_CACHE)) |
  531. (AV8L_FAST_MAIR_ATTR_DEVICE
  532. << AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_DEV)) |
  533. (AV8L_FAST_MAIR_ATTR_UPSTREAM
  534. << AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM));
  535. cfg->arm_lpae_s1_cfg.mair = reg;
  536. /* Allocate all page table memory! */
  537. if (av8l_fast_prepopulate_pgtables(data, cfg, cookie))
  538. goto out_free_data;
  539. /* TTBRs */
  540. cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
  541. return &data->iop;
  542. out_free_data:
  543. kfree(data);
  544. return NULL;
  545. }
  546. static void av8l_fast_free_pgtable(struct io_pgtable *iop)
  547. {
  548. int i;
  549. struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop);
  550. vunmap(data->pmds);
  551. for (i = 0; i < data->nr_pages; ++i)
  552. __free_page(data->pages[i]);
  553. kvfree(data->pages);
  554. kfree(data);
  555. }
  556. struct io_pgtable_init_fns io_pgtable_av8l_fast_init_fns = {
  557. .alloc = av8l_fast_alloc_pgtable,
  558. .free = av8l_fast_free_pgtable,
  559. };
  560. #ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_SELFTEST
  561. #include <linux/dma-map-ops.h>
  562. static struct io_pgtable_cfg *cfg_cookie;
  563. static void dummy_tlb_flush_all(void *cookie)
  564. {
  565. WARN_ON(cookie != cfg_cookie);
  566. }
  567. static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
  568. void *cookie)
  569. {
  570. WARN_ON(cookie != cfg_cookie);
  571. }
  572. static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
  573. unsigned long iova, size_t granule, void *cookie)
  574. {
  575. dummy_tlb_flush(iova, granule, granule, cookie);
  576. }
  577. static struct iommu_flush_ops dummy_tlb_ops __initdata = {
  578. .tlb_flush_all = dummy_tlb_flush_all,
  579. .tlb_flush_walk = dummy_tlb_flush,
  580. .tlb_add_page = dummy_tlb_add_page,
  581. };
  582. /*
  583. * Returns true if the iova range is successfully mapped to the contiguous
  584. * phys range in ops.
  585. */
  586. static bool av8l_fast_range_has_specific_mapping(struct io_pgtable_ops *ops,
  587. const unsigned long iova_start,
  588. const phys_addr_t phys_start,
  589. const size_t size)
  590. {
  591. u64 iova = iova_start;
  592. phys_addr_t phys = phys_start;
  593. while (iova < (iova_start + size)) {
  594. /* + 42 just to make sure offsetting is working */
  595. if (ops->iova_to_phys(ops, iova + 42) != (phys + 42))
  596. return false;
  597. iova += SZ_4K;
  598. phys += SZ_4K;
  599. }
  600. return true;
  601. }
  602. static int __init av8l_fast_positive_testing(void)
  603. {
  604. int failed = 0;
  605. u64 iova;
  606. struct io_pgtable_ops *ops;
  607. struct qcom_io_pgtable_info pgtable_info;
  608. struct av8l_fast_io_pgtable *data;
  609. av8l_fast_iopte *pmds;
  610. u64 max = SZ_1G * 4ULL - 1;
  611. u64 base = 0;
  612. pgtable_info.iova_base = base;
  613. pgtable_info.iova_end = max;
  614. pgtable_info.cfg = (struct io_pgtable_cfg) {
  615. .quirks = 0,
  616. .tlb = &dummy_tlb_ops,
  617. .ias = 32,
  618. .oas = 32,
  619. .pgsize_bitmap = SZ_4K,
  620. .coherent_walk = true,
  621. };
  622. cfg_cookie = &pgtable_info.pgtbl_cfg;
  623. ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &pgtable_info.pgtbl_cfg,
  624. &pgtable_info.pgtbl_cfg);
  625. if (WARN_ON(!ops))
  626. return 1;
  627. data = iof_pgtable_ops_to_data(ops);
  628. pmds = data->pmds;
  629. /* map the entire 4GB VA space with 4K map calls */
  630. for (iova = base; iova < max; iova += SZ_4K) {
  631. if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) {
  632. failed++;
  633. continue;
  634. }
  635. }
  636. if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
  637. base, max - base)))
  638. failed++;
  639. /* unmap it all */
  640. for (iova = base; iova < max; iova += SZ_4K) {
  641. if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K))
  642. failed++;
  643. }
  644. /* sweep up TLB proving PTEs */
  645. av8l_fast_clear_stale_ptes(ops, base, max, false);
  646. /* map the entire 4GB VA space with 8K map calls */
  647. for (iova = base; iova < max; iova += SZ_8K) {
  648. if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) {
  649. failed++;
  650. continue;
  651. }
  652. }
  653. if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
  654. base, max - base)))
  655. failed++;
  656. /* unmap it all with 8K unmap calls */
  657. for (iova = base; iova < max; iova += SZ_8K) {
  658. if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K))
  659. failed++;
  660. }
  661. /* sweep up TLB proving PTEs */
  662. av8l_fast_clear_stale_ptes(ops, base, max, false);
  663. /* map the entire 4GB VA space with 16K map calls */
  664. for (iova = base; iova < max; iova += SZ_16K) {
  665. if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) {
  666. failed++;
  667. continue;
  668. }
  669. }
  670. if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
  671. base, max - base)))
  672. failed++;
  673. /* unmap it all */
  674. for (iova = base; iova < max; iova += SZ_16K) {
  675. if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K))
  676. failed++;
  677. }
  678. /* sweep up TLB proving PTEs */
  679. av8l_fast_clear_stale_ptes(ops, base, max, false);
  680. /* map the entire 4GB VA space with 64K map calls */
  681. for (iova = base; iova < max; iova += SZ_64K) {
  682. if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) {
  683. failed++;
  684. continue;
  685. }
  686. }
  687. if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base,
  688. base, max - base)))
  689. failed++;
  690. /* unmap it all at once */
  691. if (WARN_ON(ops->unmap(ops, base, max - base, NULL) != (max - base)))
  692. failed++;
  693. free_io_pgtable_ops(ops);
  694. return failed;
  695. }
  696. static int __init av8l_fast_do_selftests(void)
  697. {
  698. int failed = 0;
  699. failed += av8l_fast_positive_testing();
  700. pr_err("selftest: completed with %d failures\n", failed);
  701. return 0;
  702. }
  703. subsys_initcall(av8l_fast_do_selftests);
  704. #endif