dma-mapping-fast.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
  4. * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
  5. */
  6. #include <linux/dma-mapping.h>
  7. #include <linux/dma-mapping-fast.h>
  8. #include <linux/qcom-dma-mapping.h>
  9. #include <linux/dma-map-ops.h>
  10. #include <linux/io-pgtable-fast.h>
  11. #include <linux/vmalloc.h>
  12. #include <linux/slab.h>
  13. #include <linux/spinlock.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/pci.h>
  16. #include <linux/iova.h>
  17. #include <linux/io-pgtable.h>
  18. #include <linux/qcom-iommu-util.h>
  19. #include <trace/hooks/iommu.h>
  20. #include "qcom-dma-iommu-generic.h"
  21. /* some redundant definitions... :( TODO: move to io-pgtable-fast.h */
  22. #define FAST_PAGE_SHIFT 12
  23. #define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT)
  24. #define FAST_PAGE_MASK (~(PAGE_SIZE - 1))
  25. static struct rb_root mappings;
  26. static DEFINE_RWLOCK(mappings_lock);
  27. static int fast_smmu_add_mapping(struct dma_fast_smmu_mapping *fast)
  28. {
  29. struct rb_node **new = &mappings.rb_node, *parent = NULL;
  30. struct dma_fast_smmu_mapping *entry;
  31. int ret = 0;
  32. unsigned long flags;
  33. write_lock_irqsave(&mappings_lock, flags);
  34. while (*new) {
  35. entry = rb_entry(*new, struct dma_fast_smmu_mapping, node);
  36. parent = *new;
  37. if (fast->domain < entry->domain) {
  38. new = &((*new)->rb_left);
  39. } else if (fast->domain > entry->domain) {
  40. new = &((*new)->rb_right);
  41. } else {
  42. ret = -EEXIST;
  43. break;
  44. }
  45. }
  46. if (!ret) {
  47. rb_link_node(&fast->node, parent, new);
  48. rb_insert_color(&fast->node, &mappings);
  49. }
  50. write_unlock_irqrestore(&mappings_lock, flags);
  51. return ret;
  52. }
  53. static struct dma_fast_smmu_mapping *__fast_smmu_lookup_mapping(struct iommu_domain *domain)
  54. {
  55. struct rb_node *node = mappings.rb_node;
  56. struct dma_fast_smmu_mapping *entry;
  57. while (node) {
  58. entry = rb_entry(node, struct dma_fast_smmu_mapping, node);
  59. if (domain < entry->domain)
  60. node = node->rb_left;
  61. else if (domain > entry->domain)
  62. node = node->rb_right;
  63. else
  64. return entry;
  65. }
  66. return NULL;
  67. }
  68. static struct dma_fast_smmu_mapping *fast_smmu_lookup_mapping(struct iommu_domain *domain)
  69. {
  70. struct dma_fast_smmu_mapping *fast;
  71. unsigned long flags;
  72. read_lock_irqsave(&mappings_lock, flags);
  73. fast = __fast_smmu_lookup_mapping(domain);
  74. read_unlock_irqrestore(&mappings_lock, flags);
  75. return fast;
  76. }
  77. static struct dma_fast_smmu_mapping *fast_smmu_remove_mapping(struct iommu_domain *domain)
  78. {
  79. struct dma_fast_smmu_mapping *fast;
  80. unsigned long flags;
  81. write_lock_irqsave(&mappings_lock, flags);
  82. fast = __fast_smmu_lookup_mapping(domain);
  83. if (fast)
  84. rb_erase(&fast->node, &mappings);
  85. write_unlock_irqrestore(&mappings_lock, flags);
  86. return fast;
  87. }
  88. static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
  89. bool coherent)
  90. {
  91. if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
  92. return pgprot_writecombine(prot);
  93. return prot;
  94. }
  95. static bool is_dma_coherent(struct device *dev, unsigned long attrs)
  96. {
  97. bool is_coherent;
  98. if (attrs & DMA_ATTR_FORCE_COHERENT)
  99. is_coherent = true;
  100. else if (attrs & DMA_ATTR_FORCE_NON_COHERENT)
  101. is_coherent = false;
  102. else if (dev_is_dma_coherent(dev))
  103. is_coherent = true;
  104. else
  105. is_coherent = false;
  106. return is_coherent;
  107. }
  108. static struct dma_fast_smmu_mapping *dev_get_mapping(struct device *dev)
  109. {
  110. struct iommu_domain *domain;
  111. domain = iommu_get_domain_for_dev(dev);
  112. if (!domain)
  113. return ERR_PTR(-EINVAL);
  114. return fast_smmu_lookup_mapping(domain);
  115. }
  116. static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
  117. unsigned long attrs,
  118. size_t size)
  119. {
  120. unsigned long bit, nbits = size >> FAST_PAGE_SHIFT;
  121. unsigned long align = (1 << get_order(size)) - 1;
  122. struct iommu_domain *domain = mapping->domain;
  123. bit = bitmap_find_next_zero_area(mapping->clean_bitmap,
  124. mapping->num_4k_pages,
  125. mapping->next_start, nbits, align);
  126. if (unlikely(bit > mapping->num_4k_pages)) {
  127. /* try wrapping */
  128. bit = bitmap_find_next_zero_area(
  129. mapping->clean_bitmap, mapping->num_4k_pages, 0, nbits,
  130. align);
  131. if (unlikely(bit > mapping->num_4k_pages)) {
  132. /*
  133. * If we just re-allocated a VA whose TLB hasn't been
  134. * invalidated since it was last used and unmapped, we
  135. * need to invalidate it here. We actually invalidate
  136. * the entire TLB so that we don't have to invalidate
  137. * the TLB again until we wrap back around.
  138. */
  139. if (mapping->have_stale_tlbs) {
  140. bool skip_sync = (attrs &
  141. DMA_ATTR_SKIP_CPU_SYNC);
  142. iommu_flush_iotlb_all(domain);
  143. bitmap_copy(mapping->clean_bitmap,
  144. mapping->bitmap,
  145. mapping->num_4k_pages);
  146. mapping->have_stale_tlbs = false;
  147. av8l_fast_clear_stale_ptes(mapping->pgtbl_ops,
  148. mapping->base,
  149. mapping->base +
  150. mapping->size - 1,
  151. skip_sync);
  152. bit = bitmap_find_next_zero_area(
  153. mapping->clean_bitmap,
  154. mapping->num_4k_pages,
  155. 0, nbits,
  156. align);
  157. if (unlikely(bit > mapping->num_4k_pages))
  158. return DMA_MAPPING_ERROR;
  159. } else {
  160. return DMA_MAPPING_ERROR;
  161. }
  162. }
  163. }
  164. bitmap_set(mapping->bitmap, bit, nbits);
  165. bitmap_set(mapping->clean_bitmap, bit, nbits);
  166. mapping->next_start = bit + nbits;
  167. if (unlikely(mapping->next_start >= mapping->num_4k_pages))
  168. mapping->next_start = 0;
  169. return (bit << FAST_PAGE_SHIFT) + mapping->base;
  170. }
  171. static void __fast_smmu_free_iova(struct dma_fast_smmu_mapping *mapping,
  172. dma_addr_t iova, size_t size)
  173. {
  174. unsigned long start_bit = (iova - mapping->base) >> FAST_PAGE_SHIFT;
  175. unsigned long nbits = size >> FAST_PAGE_SHIFT;
  176. /*
  177. * We don't invalidate TLBs on unmap. We invalidate TLBs on map
  178. * when we're about to re-allocate a VA that was previously
  179. * unmapped but hasn't yet been invalidated.
  180. */
  181. bitmap_clear(mapping->bitmap, start_bit, nbits);
  182. mapping->have_stale_tlbs = true;
  183. }
  184. static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page,
  185. unsigned long offset, size_t size,
  186. enum dma_data_direction dir,
  187. unsigned long attrs)
  188. {
  189. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  190. dma_addr_t iova;
  191. unsigned long flags;
  192. phys_addr_t phys_plus_off = page_to_phys(page) + offset;
  193. phys_addr_t phys_to_map = round_down(phys_plus_off, FAST_PAGE_SIZE);
  194. unsigned long offset_from_phys_to_map = phys_plus_off & ~FAST_PAGE_MASK;
  195. size_t len = ALIGN(size + offset_from_phys_to_map, FAST_PAGE_SIZE);
  196. bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
  197. bool is_coherent = is_dma_coherent(dev, attrs);
  198. int prot = qcom_dma_info_to_prot(dir, is_coherent, attrs);
  199. if (!skip_sync && !is_coherent)
  200. qcom_arch_sync_dma_for_device(phys_plus_off, size, dir);
  201. spin_lock_irqsave(&mapping->lock, flags);
  202. iova = __fast_smmu_alloc_iova(mapping, attrs, len);
  203. if (unlikely(iova == DMA_MAPPING_ERROR))
  204. goto fail;
  205. if (unlikely(av8l_fast_map_public(mapping->pgtbl_ops, iova,
  206. phys_to_map, len, prot)))
  207. goto fail_free_iova;
  208. spin_unlock_irqrestore(&mapping->lock, flags);
  209. return iova + offset_from_phys_to_map;
  210. fail_free_iova:
  211. __fast_smmu_free_iova(mapping, iova, size);
  212. fail:
  213. spin_unlock_irqrestore(&mapping->lock, flags);
  214. return DMA_MAPPING_ERROR;
  215. }
  216. static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova,
  217. size_t size, enum dma_data_direction dir,
  218. unsigned long attrs)
  219. {
  220. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  221. unsigned long flags;
  222. unsigned long offset = iova & ~FAST_PAGE_MASK;
  223. size_t len = ALIGN(size + offset, FAST_PAGE_SIZE);
  224. bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
  225. bool is_coherent = is_dma_coherent(dev, attrs);
  226. if (!skip_sync && !is_coherent) {
  227. phys_addr_t phys;
  228. phys = av8l_fast_iova_to_phys_public(mapping->pgtbl_ops, iova);
  229. WARN_ON(!phys);
  230. qcom_arch_sync_dma_for_cpu(phys, size, dir);
  231. }
  232. spin_lock_irqsave(&mapping->lock, flags);
  233. av8l_fast_unmap_public(mapping->pgtbl_ops, iova, len);
  234. __fast_smmu_free_iova(mapping, iova, len);
  235. spin_unlock_irqrestore(&mapping->lock, flags);
  236. }
  237. static void fast_smmu_sync_single_for_cpu(struct device *dev,
  238. dma_addr_t iova, size_t size, enum dma_data_direction dir)
  239. {
  240. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  241. if (!av8l_fast_iova_coherent_public(mapping->pgtbl_ops, iova)) {
  242. phys_addr_t phys;
  243. phys = av8l_fast_iova_to_phys_public(mapping->pgtbl_ops, iova);
  244. WARN_ON(!phys);
  245. qcom_arch_sync_dma_for_cpu(phys, size, dir);
  246. }
  247. }
  248. static void fast_smmu_sync_single_for_device(struct device *dev,
  249. dma_addr_t iova, size_t size, enum dma_data_direction dir)
  250. {
  251. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  252. if (!av8l_fast_iova_coherent_public(mapping->pgtbl_ops, iova)) {
  253. phys_addr_t phys;
  254. phys = av8l_fast_iova_to_phys_public(mapping->pgtbl_ops, iova);
  255. WARN_ON(!phys);
  256. qcom_arch_sync_dma_for_device(phys, size, dir);
  257. }
  258. }
  259. static void fast_smmu_sync_sg_for_cpu(struct device *dev,
  260. struct scatterlist *sgl, int nelems,
  261. enum dma_data_direction dir)
  262. {
  263. struct scatterlist *sg;
  264. dma_addr_t iova = sg_dma_address(sgl);
  265. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  266. int i;
  267. if (av8l_fast_iova_coherent_public(mapping->pgtbl_ops, iova))
  268. return;
  269. for_each_sg(sgl, sg, nelems, i)
  270. qcom_arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
  271. }
  272. static void fast_smmu_sync_sg_for_device(struct device *dev,
  273. struct scatterlist *sgl, int nelems,
  274. enum dma_data_direction dir)
  275. {
  276. struct scatterlist *sg;
  277. dma_addr_t iova = sg_dma_address(sgl);
  278. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  279. int i;
  280. if (av8l_fast_iova_coherent_public(mapping->pgtbl_ops, iova))
  281. return;
  282. for_each_sg(sgl, sg, nelems, i)
  283. qcom_arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
  284. }
  285. static int fast_smmu_map_sg(struct device *dev, struct scatterlist *sg,
  286. int nents, enum dma_data_direction dir,
  287. unsigned long attrs)
  288. {
  289. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  290. size_t iova_len;
  291. bool is_coherent = is_dma_coherent(dev, attrs);
  292. int prot = qcom_dma_info_to_prot(dir, is_coherent, attrs);
  293. int ret;
  294. dma_addr_t iova;
  295. unsigned long flags;
  296. size_t unused = 0;
  297. iova_len = qcom_iommu_dma_prepare_map_sg(dev, mapping->iovad, sg, nents);
  298. spin_lock_irqsave(&mapping->lock, flags);
  299. iova = __fast_smmu_alloc_iova(mapping, attrs, iova_len);
  300. spin_unlock_irqrestore(&mapping->lock, flags);
  301. if (unlikely(iova == DMA_MAPPING_ERROR))
  302. goto fail;
  303. av8l_fast_map_sg_public(mapping->pgtbl_ops, iova, sg, nents, prot,
  304. &unused);
  305. ret = qcom_iommu_dma_finalise_sg(dev, sg, nents, iova);
  306. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  307. fast_smmu_sync_sg_for_device(dev, sg, nents, dir);
  308. return ret;
  309. fail:
  310. qcom_iommu_dma_invalidate_sg(sg, nents);
  311. return 0;
  312. }
  313. static void fast_smmu_unmap_sg(struct device *dev,
  314. struct scatterlist *sg, int nelems,
  315. enum dma_data_direction dir,
  316. unsigned long attrs)
  317. {
  318. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  319. unsigned long flags;
  320. dma_addr_t start;
  321. size_t len, offset;
  322. struct scatterlist *tmp;
  323. int i;
  324. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  325. fast_smmu_sync_sg_for_cpu(dev, sg, nelems, dir);
  326. /*
  327. * The scatterlist segments are mapped into a single
  328. * contiguous IOVA allocation, so this is incredibly easy.
  329. */
  330. start = sg_dma_address(sg);
  331. offset = start & ~FAST_PAGE_MASK;
  332. for_each_sg(sg_next(sg), tmp, nelems - 1, i) {
  333. if (sg_dma_len(tmp) == 0)
  334. break;
  335. sg = tmp;
  336. }
  337. len = ALIGN(sg_dma_address(sg) + sg_dma_len(sg) - (start - offset),
  338. FAST_PAGE_SIZE);
  339. av8l_fast_unmap_public(mapping->pgtbl_ops, start, len);
  340. spin_lock_irqsave(&mapping->lock, flags);
  341. __fast_smmu_free_iova(mapping, start, len);
  342. spin_unlock_irqrestore(&mapping->lock, flags);
  343. }
  344. static void __fast_smmu_free_pages(struct page **pages, int count)
  345. {
  346. int i;
  347. if (!pages)
  348. return;
  349. for (i = 0; i < count; i++)
  350. __free_page(pages[i]);
  351. kvfree(pages);
  352. }
  353. static void *fast_smmu_alloc_atomic(struct dma_fast_smmu_mapping *mapping,
  354. size_t size, gfp_t gfp, unsigned long attrs,
  355. dma_addr_t *handle, bool coherent)
  356. {
  357. void *addr;
  358. unsigned long flags;
  359. struct page *page;
  360. dma_addr_t dma_addr;
  361. int prot = qcom_dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
  362. if (coherent) {
  363. page = alloc_pages(gfp, get_order(size));
  364. addr = page ? page_address(page) : NULL;
  365. } else
  366. addr = qcom_dma_alloc_from_pool(mapping->dev, size, &page, gfp);
  367. if (!addr)
  368. return NULL;
  369. spin_lock_irqsave(&mapping->lock, flags);
  370. dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
  371. if (dma_addr == DMA_MAPPING_ERROR) {
  372. dev_err(mapping->dev, "no iova\n");
  373. spin_unlock_irqrestore(&mapping->lock, flags);
  374. goto out_free_page;
  375. }
  376. if (unlikely(av8l_fast_map_public(mapping->pgtbl_ops, dma_addr,
  377. page_to_phys(page), size, prot))) {
  378. dev_err(mapping->dev, "no map public\n");
  379. goto out_free_iova;
  380. }
  381. spin_unlock_irqrestore(&mapping->lock, flags);
  382. *handle = dma_addr;
  383. return addr;
  384. out_free_iova:
  385. __fast_smmu_free_iova(mapping, dma_addr, size);
  386. spin_unlock_irqrestore(&mapping->lock, flags);
  387. out_free_page:
  388. if (coherent)
  389. __free_pages(page, get_order(size));
  390. else
  391. qcom_dma_free_from_pool(mapping->dev, addr, size);
  392. return NULL;
  393. }
  394. static struct page **__fast_smmu_alloc_pages(unsigned int count, gfp_t gfp)
  395. {
  396. struct page **pages;
  397. unsigned int i = 0, array_size = count * sizeof(*pages);
  398. if (array_size <= PAGE_SIZE)
  399. pages = kzalloc(array_size, GFP_KERNEL);
  400. else
  401. pages = vzalloc(array_size);
  402. if (!pages)
  403. return NULL;
  404. /* IOMMU can map any pages, so himem can also be used here */
  405. gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
  406. for (i = 0; i < count; ++i) {
  407. struct page *page = alloc_page(gfp);
  408. if (!page) {
  409. __fast_smmu_free_pages(pages, i);
  410. return NULL;
  411. }
  412. pages[i] = page;
  413. }
  414. return pages;
  415. }
  416. static void *__fast_smmu_alloc_contiguous(struct device *dev, size_t size,
  417. dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
  418. {
  419. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  420. bool is_coherent = is_dma_coherent(dev, attrs);
  421. int prot = qcom_dma_info_to_prot(DMA_BIDIRECTIONAL, is_coherent, attrs);
  422. pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
  423. struct page *page;
  424. dma_addr_t iova;
  425. unsigned long flags;
  426. void *coherent_addr;
  427. page = qcom_dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
  428. get_order(size), gfp & __GFP_NOWARN);
  429. if (!page)
  430. return NULL;
  431. spin_lock_irqsave(&mapping->lock, flags);
  432. iova = __fast_smmu_alloc_iova(mapping, attrs, size);
  433. spin_unlock_irqrestore(&mapping->lock, flags);
  434. if (iova == DMA_MAPPING_ERROR)
  435. goto release_page;
  436. if (av8l_fast_map_public(mapping->pgtbl_ops, iova, page_to_phys(page),
  437. size, prot))
  438. goto release_iova;
  439. if (!is_coherent || PageHighMem(page)) {
  440. coherent_addr = qcom_dma_common_contiguous_remap(page, size,
  441. remap_prot,
  442. __fast_smmu_alloc_contiguous);
  443. if (!coherent_addr)
  444. goto release_mapping;
  445. if (!is_coherent)
  446. qcom_arch_dma_prep_coherent(page, size);
  447. } else {
  448. coherent_addr = page_address(page);
  449. }
  450. memset(coherent_addr, 0, size);
  451. *handle = iova;
  452. return coherent_addr;
  453. release_mapping:
  454. av8l_fast_unmap_public(mapping->pgtbl_ops, iova, size);
  455. release_iova:
  456. __fast_smmu_free_iova(mapping, iova, size);
  457. release_page:
  458. qcom_dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
  459. return NULL;
  460. }
  461. static void *fast_smmu_alloc(struct device *dev, size_t size,
  462. dma_addr_t *handle, gfp_t gfp,
  463. unsigned long attrs)
  464. {
  465. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  466. struct sg_table sgt;
  467. dma_addr_t dma_addr, iova_iter;
  468. void *addr;
  469. unsigned long flags;
  470. struct sg_mapping_iter miter;
  471. size_t count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
  472. bool is_coherent = is_dma_coherent(dev, attrs);
  473. int prot = qcom_dma_info_to_prot(DMA_BIDIRECTIONAL, is_coherent, attrs);
  474. pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
  475. struct page **pages;
  476. /*
  477. * sg_alloc_table_from_pages accepts unsigned int value for count
  478. * so check count doesn't exceed UINT_MAX.
  479. */
  480. if (count > UINT_MAX) {
  481. dev_err(dev, "count: %zx exceeds UNIT_MAX\n", count);
  482. return NULL;
  483. }
  484. gfp |= __GFP_ZERO;
  485. *handle = DMA_MAPPING_ERROR;
  486. size = ALIGN(size, SZ_4K);
  487. if (!gfpflags_allow_blocking(gfp))
  488. return fast_smmu_alloc_atomic(mapping, size, gfp, attrs, handle,
  489. is_coherent);
  490. else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
  491. return __fast_smmu_alloc_contiguous(dev, size, handle, gfp,
  492. attrs);
  493. pages = __fast_smmu_alloc_pages(count, gfp);
  494. if (!pages) {
  495. dev_err(dev, "no pages\n");
  496. return NULL;
  497. }
  498. if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, gfp)) {
  499. dev_err(dev, "no sg tablen\n");
  500. goto out_free_pages;
  501. }
  502. if (!is_coherent) {
  503. /*
  504. * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
  505. * sufficient here, so skip it by using the "wrong" direction.
  506. */
  507. sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
  508. SG_MITER_FROM_SG);
  509. while (sg_miter_next(&miter))
  510. qcom_arch_dma_prep_coherent(miter.page, miter.length);
  511. sg_miter_stop(&miter);
  512. }
  513. spin_lock_irqsave(&mapping->lock, flags);
  514. dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
  515. if (dma_addr == DMA_MAPPING_ERROR) {
  516. dev_err(dev, "no iova\n");
  517. spin_unlock_irqrestore(&mapping->lock, flags);
  518. goto out_free_sg;
  519. }
  520. iova_iter = dma_addr;
  521. sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
  522. SG_MITER_FROM_SG | SG_MITER_ATOMIC);
  523. while (sg_miter_next(&miter)) {
  524. if (unlikely(av8l_fast_map_public(
  525. mapping->pgtbl_ops, iova_iter,
  526. page_to_phys(miter.page),
  527. miter.length, prot))) {
  528. dev_err(dev, "no map public\n");
  529. /* TODO: unwind previously successful mappings */
  530. goto out_free_iova;
  531. }
  532. iova_iter += miter.length;
  533. }
  534. sg_miter_stop(&miter);
  535. spin_unlock_irqrestore(&mapping->lock, flags);
  536. addr = qcom_dma_common_pages_remap(pages, size, remap_prot,
  537. __builtin_return_address(0));
  538. if (!addr) {
  539. dev_err(dev, "no common pages\n");
  540. goto out_unmap;
  541. }
  542. *handle = dma_addr;
  543. sg_free_table(&sgt);
  544. return addr;
  545. out_unmap:
  546. /* need to take the lock again for page tables and iova */
  547. spin_lock_irqsave(&mapping->lock, flags);
  548. av8l_fast_unmap_public(mapping->pgtbl_ops, dma_addr, size);
  549. out_free_iova:
  550. __fast_smmu_free_iova(mapping, dma_addr, size);
  551. spin_unlock_irqrestore(&mapping->lock, flags);
  552. out_free_sg:
  553. sg_free_table(&sgt);
  554. out_free_pages:
  555. __fast_smmu_free_pages(pages, count);
  556. return NULL;
  557. }
  558. static void fast_smmu_free(struct device *dev, size_t size,
  559. void *cpu_addr, dma_addr_t dma_handle,
  560. unsigned long attrs)
  561. {
  562. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  563. struct page **pages = NULL;
  564. struct page *page = NULL;
  565. unsigned long flags;
  566. size = ALIGN(size, FAST_PAGE_SIZE);
  567. spin_lock_irqsave(&mapping->lock, flags);
  568. av8l_fast_unmap_public(mapping->pgtbl_ops, dma_handle, size);
  569. __fast_smmu_free_iova(mapping, dma_handle, size);
  570. spin_unlock_irqrestore(&mapping->lock, flags);
  571. if (qcom_dma_free_from_pool(dev, cpu_addr, size))
  572. return;
  573. if (is_vmalloc_addr(cpu_addr)) {
  574. pages = qcom_dma_common_find_pages(cpu_addr);
  575. if (!pages)
  576. page = vmalloc_to_page(cpu_addr);
  577. qcom_dma_common_free_remap(cpu_addr, size);
  578. } else {
  579. page = virt_to_page(cpu_addr);
  580. }
  581. if (pages)
  582. __fast_smmu_free_pages(pages, size >> FAST_PAGE_SHIFT);
  583. if (page)
  584. qcom_dma_free_contiguous(dev, page, size);
  585. }
  586. static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
  587. void *cpu_addr, dma_addr_t dma_addr,
  588. size_t size, unsigned long attrs)
  589. {
  590. return qcom_iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
  591. }
  592. static int fast_smmu_get_sgtable(struct device *dev, struct sg_table *sgt,
  593. void *cpu_addr, dma_addr_t dma_addr,
  594. size_t size, unsigned long attrs)
  595. {
  596. return qcom_iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
  597. }
  598. static dma_addr_t fast_smmu_dma_map_resource(
  599. struct device *dev, phys_addr_t phys_addr,
  600. size_t size, enum dma_data_direction dir,
  601. unsigned long attrs)
  602. {
  603. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  604. size_t offset = phys_addr & ~FAST_PAGE_MASK;
  605. size_t len = round_up(size + offset, FAST_PAGE_SIZE);
  606. dma_addr_t dma_addr;
  607. int prot;
  608. unsigned long flags;
  609. spin_lock_irqsave(&mapping->lock, flags);
  610. dma_addr = __fast_smmu_alloc_iova(mapping, attrs, len);
  611. spin_unlock_irqrestore(&mapping->lock, flags);
  612. if (dma_addr == DMA_MAPPING_ERROR)
  613. return dma_addr;
  614. prot = qcom_dma_info_to_prot(dir, false, attrs);
  615. prot |= IOMMU_MMIO;
  616. if (iommu_map(mapping->domain, dma_addr, phys_addr - offset,
  617. len, prot)) {
  618. spin_lock_irqsave(&mapping->lock, flags);
  619. __fast_smmu_free_iova(mapping, dma_addr, len);
  620. spin_unlock_irqrestore(&mapping->lock, flags);
  621. return DMA_MAPPING_ERROR;
  622. }
  623. return dma_addr + offset;
  624. }
  625. static void fast_smmu_dma_unmap_resource(
  626. struct device *dev, dma_addr_t addr,
  627. size_t size, enum dma_data_direction dir,
  628. unsigned long attrs)
  629. {
  630. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  631. size_t offset = addr & ~FAST_PAGE_MASK;
  632. size_t len = round_up(size + offset, FAST_PAGE_SIZE);
  633. unsigned long flags;
  634. iommu_unmap(mapping->domain, addr - offset, len);
  635. spin_lock_irqsave(&mapping->lock, flags);
  636. __fast_smmu_free_iova(mapping, addr, len);
  637. spin_unlock_irqrestore(&mapping->lock, flags);
  638. }
  639. static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
  640. void *priv)
  641. {
  642. av8l_fast_iopte *pmds, *ptep = priv;
  643. dma_addr_t iova;
  644. unsigned long bitmap_idx;
  645. struct av8l_fast_io_pgtable *data;
  646. data = iof_pgtable_ops_to_data(fast->pgtbl_ops);
  647. pmds = data->pmds;
  648. bitmap_idx = (unsigned long)(ptep - pmds);
  649. iova = bitmap_idx << FAST_PAGE_SHIFT;
  650. dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
  651. dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
  652. dev_err(fast->dev, "ptep: %pK pmds: %pK diff: %lu\n", ptep,
  653. pmds, bitmap_idx);
  654. print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
  655. 32, 8, fast->bitmap, fast->bitmap_size, false);
  656. }
  657. static int fast_smmu_notify(struct notifier_block *self,
  658. unsigned long action, void *data)
  659. {
  660. struct dma_fast_smmu_mapping *fast = container_of(
  661. self, struct dma_fast_smmu_mapping, notifier);
  662. switch (action) {
  663. case MAPPED_OVER_STALE_TLB:
  664. __fast_smmu_mapped_over_stale(fast, data);
  665. return NOTIFY_OK;
  666. default:
  667. WARN(1, "Unhandled notifier action");
  668. return NOTIFY_DONE;
  669. }
  670. }
  671. static const struct dma_map_ops fast_smmu_dma_ops = {
  672. .alloc = fast_smmu_alloc,
  673. .free = fast_smmu_free,
  674. .mmap = fast_smmu_mmap_attrs,
  675. .get_sgtable = fast_smmu_get_sgtable,
  676. .map_page = fast_smmu_map_page,
  677. .unmap_page = fast_smmu_unmap_page,
  678. .sync_single_for_cpu = fast_smmu_sync_single_for_cpu,
  679. .sync_single_for_device = fast_smmu_sync_single_for_device,
  680. .map_sg = fast_smmu_map_sg,
  681. .unmap_sg = fast_smmu_unmap_sg,
  682. .sync_sg_for_cpu = fast_smmu_sync_sg_for_cpu,
  683. .sync_sg_for_device = fast_smmu_sync_sg_for_device,
  684. .map_resource = fast_smmu_dma_map_resource,
  685. .unmap_resource = fast_smmu_dma_unmap_resource,
  686. };
  687. /**
  688. * __fast_smmu_create_mapping_sized
  689. * @base: bottom of the VA range
  690. * @size: size of the VA range in bytes
  691. *
  692. * Creates a mapping structure which holds information about used/unused IO
  693. * address ranges, which is required to perform mapping with IOMMU aware
  694. * functions. The only VA range supported is [0, 4GB).
  695. *
  696. * The client device need to be attached to the mapping with
  697. * fast_smmu_attach_device function.
  698. */
  699. static struct dma_fast_smmu_mapping *__fast_smmu_create_mapping_sized(
  700. dma_addr_t base, u64 size)
  701. {
  702. struct dma_fast_smmu_mapping *fast;
  703. fast = kzalloc(sizeof(struct dma_fast_smmu_mapping), GFP_KERNEL);
  704. if (!fast)
  705. goto err;
  706. fast->base = base;
  707. fast->size = size;
  708. fast->num_4k_pages = size >> FAST_PAGE_SHIFT;
  709. fast->bitmap_size = BITS_TO_LONGS(fast->num_4k_pages) * sizeof(long);
  710. fast->bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL | __GFP_NOWARN |
  711. __GFP_NORETRY);
  712. if (!fast->bitmap)
  713. fast->bitmap = vzalloc(fast->bitmap_size);
  714. if (!fast->bitmap)
  715. goto err2;
  716. fast->clean_bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL |
  717. __GFP_NOWARN | __GFP_NORETRY);
  718. if (!fast->clean_bitmap)
  719. fast->clean_bitmap = vzalloc(fast->bitmap_size);
  720. if (!fast->clean_bitmap)
  721. goto err3;
  722. spin_lock_init(&fast->lock);
  723. mutex_init(&fast->msi_cookie_init_lock);
  724. fast->iovad = kzalloc(sizeof(*fast->iovad), GFP_KERNEL);
  725. if (!fast->iovad)
  726. goto err_free_bitmap;
  727. init_iova_domain(fast->iovad, FAST_PAGE_SIZE,
  728. base >> FAST_PAGE_SHIFT);
  729. return fast;
  730. err_free_bitmap:
  731. kvfree(fast->clean_bitmap);
  732. err3:
  733. kvfree(fast->bitmap);
  734. err2:
  735. kfree(fast);
  736. err:
  737. return ERR_PTR(-ENOMEM);
  738. }
  739. /*
  740. * Based off of similar code from dma-iommu.c, but modified to use a different
  741. * iova allocator
  742. */
  743. static void fast_smmu_reserve_pci_windows(struct device *dev,
  744. struct dma_fast_smmu_mapping *mapping)
  745. {
  746. struct pci_host_bridge *bridge;
  747. struct resource_entry *window;
  748. phys_addr_t start, end;
  749. struct pci_dev *pci_dev;
  750. unsigned long flags;
  751. if (!dev_is_pci(dev))
  752. return;
  753. pci_dev = to_pci_dev(dev);
  754. bridge = qcom_pci_find_host_bridge(pci_dev->bus);
  755. spin_lock_irqsave(&mapping->lock, flags);
  756. resource_list_for_each_entry(window, &bridge->windows) {
  757. if (resource_type(window->res) != IORESOURCE_MEM &&
  758. resource_type(window->res) != IORESOURCE_IO)
  759. continue;
  760. start = round_down(window->res->start - window->offset,
  761. FAST_PAGE_SIZE);
  762. end = round_up(window->res->end - window->offset,
  763. FAST_PAGE_SIZE);
  764. start = max_t(unsigned long, mapping->base, start);
  765. end = min_t(unsigned long, mapping->base + mapping->size, end);
  766. if (start >= end)
  767. continue;
  768. dev_dbg(dev, "iova allocator reserved 0x%pa-0x%pa\n",
  769. &start, &end);
  770. start = (start - mapping->base) >> FAST_PAGE_SHIFT;
  771. end = (end - mapping->base) >> FAST_PAGE_SHIFT;
  772. bitmap_set(mapping->bitmap, start, end - start);
  773. bitmap_set(mapping->clean_bitmap, start, end - start);
  774. }
  775. spin_unlock_irqrestore(&mapping->lock, flags);
  776. }
  777. static void fast_smmu_reserve_msi_iova(struct device *dev, struct dma_fast_smmu_mapping *fast)
  778. {
  779. dma_addr_t msi_iova_base;
  780. u32 msi_size;
  781. int ret;
  782. unsigned long flags;
  783. mutex_lock(&fast->msi_cookie_init_lock);
  784. spin_lock_irqsave(&fast->lock, flags);
  785. /* MSI cookie has already been setup. */
  786. if (fast->domain->iova_cookie)
  787. goto out;
  788. if (qcom_iommu_get_msi_size(dev, &msi_size) < 0)
  789. goto out;
  790. msi_iova_base = __fast_smmu_alloc_iova(fast, 0, msi_size);
  791. if (msi_iova_base == DMA_MAPPING_ERROR) {
  792. dev_err(dev, "iova allocator failed to reserve MSI range of size: 0x%x\n",
  793. msi_size);
  794. goto out;
  795. }
  796. dev_dbg(dev, "iova allocator reserved 0x%lx-0x%lx for MSI\n", msi_iova_base,
  797. msi_iova_base + msi_size);
  798. spin_unlock_irqrestore(&fast->lock, flags);
  799. ret = iommu_get_msi_cookie(fast->domain, msi_iova_base);
  800. spin_lock_irqsave(&fast->lock, flags);
  801. if (ret < 0) {
  802. dev_err(dev, "failed to obtain MSI iova cookie rc: %d\n", ret);
  803. __fast_smmu_free_iova(fast, msi_iova_base, msi_size);
  804. }
  805. out:
  806. spin_unlock_irqrestore(&fast->lock, flags);
  807. mutex_unlock(&fast->msi_cookie_init_lock);
  808. }
  809. static void fast_smmu_reserve_iommu_regions(struct device *dev,
  810. struct dma_fast_smmu_mapping *fast)
  811. {
  812. struct iommu_resv_region *region;
  813. unsigned long flags;
  814. struct dma_fast_smmu_mapping *mapping = dev_get_mapping(dev);
  815. LIST_HEAD(resv_regions);
  816. if (dev_is_pci(dev))
  817. fast_smmu_reserve_pci_windows(dev, fast);
  818. qcom_iommu_get_resv_regions(dev, &resv_regions);
  819. spin_lock_irqsave(&mapping->lock, flags);
  820. list_for_each_entry(region, &resv_regions, list) {
  821. unsigned long lo, hi;
  822. lo = max(fast->base, region->start);
  823. hi = min(fast->base + fast->size - 1,
  824. region->start + region->length - 1);
  825. lo = (lo - fast->base) >> FAST_PAGE_SHIFT;
  826. hi = (hi - fast->base) >> FAST_PAGE_SHIFT;
  827. bitmap_set(fast->bitmap, lo, hi - lo + 1);
  828. bitmap_set(fast->clean_bitmap, lo, hi - lo + 1);
  829. }
  830. spin_unlock_irqrestore(&mapping->lock, flags);
  831. iommu_put_resv_regions(dev, &resv_regions);
  832. fast_smmu_reserve_msi_iova(dev, fast);
  833. }
  834. void fast_smmu_put_dma_cookie(struct iommu_domain *domain)
  835. {
  836. struct dma_fast_smmu_mapping *fast = fast_smmu_remove_mapping(domain);
  837. if (!fast)
  838. return;
  839. if (fast->iovad) {
  840. put_iova_domain(fast->iovad);
  841. kfree(fast->iovad);
  842. }
  843. if (fast->bitmap)
  844. kvfree(fast->bitmap);
  845. if (fast->clean_bitmap)
  846. kvfree(fast->clean_bitmap);
  847. kfree(fast);
  848. }
  849. EXPORT_SYMBOL(fast_smmu_put_dma_cookie);
  850. /**
  851. * fast_smmu_init_mapping
  852. * @dev: valid struct device pointer
  853. * @domain: valid IOMMU domain pointer
  854. * @pgtable_ops: The page table ops associated with this domain
  855. *
  856. * Called the first time a device is attached to this mapping.
  857. * Not for dma client use.
  858. */
  859. int fast_smmu_init_mapping(struct device *dev, struct iommu_domain *domain,
  860. struct io_pgtable_ops *pgtable_ops)
  861. {
  862. u64 dma_base, dma_end, size;
  863. struct dma_fast_smmu_mapping *fast = fast_smmu_lookup_mapping(domain);
  864. if (fast) {
  865. dev_err(dev, "Iova cookie already present\n");
  866. return -EINVAL;
  867. }
  868. if (!pgtable_ops)
  869. return -EINVAL;
  870. dma_base = max_t(u64, domain->geometry.aperture_start, 0);
  871. dma_end = min_t(u64, domain->geometry.aperture_end,
  872. (SZ_1G * 4ULL - 1));
  873. size = dma_end - dma_base + 1;
  874. if (dma_base >= dma_end) {
  875. dev_err(dev, "Invalid domain geometry\n");
  876. return -EINVAL;
  877. }
  878. fast = __fast_smmu_create_mapping_sized(dma_base, size);
  879. if (IS_ERR(fast))
  880. return -ENOMEM;
  881. fast->domain = domain;
  882. fast->dev = dev;
  883. fast_smmu_add_mapping(fast);
  884. fast->pgtbl_ops = pgtable_ops;
  885. fast->notifier.notifier_call = fast_smmu_notify;
  886. av8l_register_notify(&fast->notifier);
  887. return 0;
  888. }
  889. EXPORT_SYMBOL(fast_smmu_init_mapping);
  890. static void __fast_smmu_setup_dma_ops(void *data, struct device *dev,
  891. u64 dma_base, u64 dma_limit)
  892. {
  893. struct dma_fast_smmu_mapping *fast;
  894. struct iommu_domain *domain;
  895. int ret;
  896. domain = iommu_get_domain_for_dev(dev);
  897. if (!domain)
  898. return;
  899. ret = qcom_iommu_get_mappings_configuration(domain);
  900. if (ret < 0 || !(ret & QCOM_IOMMU_MAPPING_CONF_FAST))
  901. return;
  902. fast = dev_get_mapping(dev);
  903. if (!fast) {
  904. dev_err(dev, "Missing fastmap iova cookie\n");
  905. return;
  906. }
  907. fast_smmu_reserve_iommu_regions(dev, fast);
  908. dev->dma_ops = &fast_smmu_dma_ops;
  909. }
  910. /*
  911. * Called by drivers who create their own iommu domains via
  912. * iommu_domain_alloc().
  913. */
  914. void fast_smmu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
  915. {
  916. __fast_smmu_setup_dma_ops(NULL, dev, dma_base, dma_limit);
  917. }
  918. EXPORT_SYMBOL(fast_smmu_setup_dma_ops);
  919. int __init dma_mapping_fast_init(void)
  920. {
  921. return register_trace_android_rvh_iommu_setup_dma_ops(
  922. __fast_smmu_setup_dma_ops, NULL);
  923. }