dma-iommu.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * A fairly generic DMA-API to IOMMU-API glue layer.
  4. *
  5. * Copyright (C) 2014-2015 ARM Ltd.
  6. *
  7. * based in part on arch/arm/mm/dma-mapping.c:
  8. * Copyright (C) 2000-2004 Russell King
  9. */
  10. #include <linux/acpi_iort.h>
  11. #include <linux/atomic.h>
  12. #include <linux/crash_dump.h>
  13. #include <linux/device.h>
  14. #include <linux/dma-direct.h>
  15. #include <linux/dma-map-ops.h>
  16. #include <linux/gfp.h>
  17. #include <linux/huge_mm.h>
  18. #include <linux/iommu.h>
  19. #include <linux/iova.h>
  20. #include <linux/irq.h>
  21. #include <linux/list_sort.h>
  22. #include <linux/memremap.h>
  23. #include <linux/mm.h>
  24. #include <linux/mutex.h>
  25. #include <linux/of_iommu.h>
  26. #include <linux/pci.h>
  27. #include <linux/scatterlist.h>
  28. #include <linux/spinlock.h>
  29. #include <linux/swiotlb.h>
  30. #include <linux/vmalloc.h>
  31. #include <trace/hooks/iommu.h>
  32. #include "dma-iommu.h"
  33. struct iommu_dma_msi_page {
  34. struct list_head list;
  35. dma_addr_t iova;
  36. phys_addr_t phys;
  37. };
  38. enum iommu_dma_cookie_type {
  39. IOMMU_DMA_IOVA_COOKIE,
  40. IOMMU_DMA_MSI_COOKIE,
  41. };
  42. struct iommu_dma_cookie {
  43. enum iommu_dma_cookie_type type;
  44. union {
  45. /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
  46. struct {
  47. struct iova_domain iovad;
  48. struct iova_fq __percpu *fq; /* Flush queue */
  49. /* Number of TLB flushes that have been started */
  50. atomic64_t fq_flush_start_cnt;
  51. /* Number of TLB flushes that have been finished */
  52. atomic64_t fq_flush_finish_cnt;
  53. /* Timer to regularily empty the flush queues */
  54. struct timer_list fq_timer;
  55. /* 1 when timer is active, 0 when not */
  56. atomic_t fq_timer_on;
  57. };
  58. /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
  59. dma_addr_t msi_iova;
  60. };
  61. struct list_head msi_page_list;
  62. /* Domain for flush queue callback; NULL if flush queue not in use */
  63. struct iommu_domain *fq_domain;
  64. struct mutex mutex;
  65. };
  66. static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
  67. bool iommu_dma_forcedac __read_mostly;
  68. static int __init iommu_dma_forcedac_setup(char *str)
  69. {
  70. int ret = kstrtobool(str, &iommu_dma_forcedac);
  71. if (!ret && iommu_dma_forcedac)
  72. pr_info("Forcing DAC for PCI devices\n");
  73. return ret;
  74. }
  75. early_param("iommu.forcedac", iommu_dma_forcedac_setup);
  76. /* Number of entries per flush queue */
  77. #define IOVA_FQ_SIZE 256
  78. /* Timeout (in ms) after which entries are flushed from the queue */
  79. #define IOVA_FQ_TIMEOUT 10
  80. /* Flush queue entry for deferred flushing */
  81. struct iova_fq_entry {
  82. unsigned long iova_pfn;
  83. unsigned long pages;
  84. struct list_head freelist;
  85. u64 counter; /* Flush counter when this entry was added */
  86. };
  87. /* Per-CPU flush queue structure */
  88. struct iova_fq {
  89. struct iova_fq_entry entries[IOVA_FQ_SIZE];
  90. unsigned int head, tail;
  91. spinlock_t lock;
  92. };
  93. #define fq_ring_for_each(i, fq) \
  94. for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
  95. static inline bool fq_full(struct iova_fq *fq)
  96. {
  97. assert_spin_locked(&fq->lock);
  98. return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
  99. }
  100. static inline unsigned int fq_ring_add(struct iova_fq *fq)
  101. {
  102. unsigned int idx = fq->tail;
  103. assert_spin_locked(&fq->lock);
  104. fq->tail = (idx + 1) % IOVA_FQ_SIZE;
  105. return idx;
  106. }
  107. static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
  108. {
  109. u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
  110. unsigned int idx;
  111. assert_spin_locked(&fq->lock);
  112. fq_ring_for_each(idx, fq) {
  113. if (fq->entries[idx].counter >= counter)
  114. break;
  115. put_pages_list(&fq->entries[idx].freelist);
  116. free_iova_fast(&cookie->iovad,
  117. fq->entries[idx].iova_pfn,
  118. fq->entries[idx].pages);
  119. fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
  120. }
  121. }
  122. static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
  123. {
  124. atomic64_inc(&cookie->fq_flush_start_cnt);
  125. cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
  126. atomic64_inc(&cookie->fq_flush_finish_cnt);
  127. }
  128. static void fq_flush_timeout(struct timer_list *t)
  129. {
  130. struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
  131. int cpu;
  132. atomic_set(&cookie->fq_timer_on, 0);
  133. fq_flush_iotlb(cookie);
  134. for_each_possible_cpu(cpu) {
  135. unsigned long flags;
  136. struct iova_fq *fq;
  137. fq = per_cpu_ptr(cookie->fq, cpu);
  138. spin_lock_irqsave(&fq->lock, flags);
  139. fq_ring_free(cookie, fq);
  140. spin_unlock_irqrestore(&fq->lock, flags);
  141. }
  142. }
  143. static void queue_iova(struct iommu_dma_cookie *cookie,
  144. unsigned long pfn, unsigned long pages,
  145. struct list_head *freelist)
  146. {
  147. struct iova_fq *fq;
  148. unsigned long flags;
  149. unsigned int idx;
  150. /*
  151. * Order against the IOMMU driver's pagetable update from unmapping
  152. * @pte, to guarantee that fq_flush_iotlb() observes that if called
  153. * from a different CPU before we release the lock below. Full barrier
  154. * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
  155. * written fq state here.
  156. */
  157. smp_mb();
  158. fq = raw_cpu_ptr(cookie->fq);
  159. spin_lock_irqsave(&fq->lock, flags);
  160. /*
  161. * First remove all entries from the flush queue that have already been
  162. * flushed out on another CPU. This makes the fq_full() check below less
  163. * likely to be true.
  164. */
  165. fq_ring_free(cookie, fq);
  166. if (fq_full(fq)) {
  167. fq_flush_iotlb(cookie);
  168. fq_ring_free(cookie, fq);
  169. }
  170. idx = fq_ring_add(fq);
  171. fq->entries[idx].iova_pfn = pfn;
  172. fq->entries[idx].pages = pages;
  173. fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt);
  174. list_splice(freelist, &fq->entries[idx].freelist);
  175. spin_unlock_irqrestore(&fq->lock, flags);
  176. /* Avoid false sharing as much as possible. */
  177. if (!atomic_read(&cookie->fq_timer_on) &&
  178. !atomic_xchg(&cookie->fq_timer_on, 1))
  179. mod_timer(&cookie->fq_timer,
  180. jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
  181. }
  182. static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
  183. {
  184. int cpu, idx;
  185. if (!cookie->fq)
  186. return;
  187. del_timer_sync(&cookie->fq_timer);
  188. /* The IOVAs will be torn down separately, so just free our queued pages */
  189. for_each_possible_cpu(cpu) {
  190. struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
  191. fq_ring_for_each(idx, fq)
  192. put_pages_list(&fq->entries[idx].freelist);
  193. }
  194. free_percpu(cookie->fq);
  195. }
  196. /* sysfs updates are serialised by the mutex of the group owning @domain */
  197. int iommu_dma_init_fq(struct iommu_domain *domain)
  198. {
  199. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  200. struct iova_fq __percpu *queue;
  201. int i, cpu;
  202. if (cookie->fq_domain)
  203. return 0;
  204. atomic64_set(&cookie->fq_flush_start_cnt, 0);
  205. atomic64_set(&cookie->fq_flush_finish_cnt, 0);
  206. queue = alloc_percpu(struct iova_fq);
  207. if (!queue) {
  208. pr_warn("iova flush queue initialization failed\n");
  209. return -ENOMEM;
  210. }
  211. for_each_possible_cpu(cpu) {
  212. struct iova_fq *fq = per_cpu_ptr(queue, cpu);
  213. fq->head = 0;
  214. fq->tail = 0;
  215. spin_lock_init(&fq->lock);
  216. for (i = 0; i < IOVA_FQ_SIZE; i++)
  217. INIT_LIST_HEAD(&fq->entries[i].freelist);
  218. }
  219. cookie->fq = queue;
  220. timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
  221. atomic_set(&cookie->fq_timer_on, 0);
  222. /*
  223. * Prevent incomplete fq state being observable. Pairs with path from
  224. * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
  225. */
  226. smp_wmb();
  227. WRITE_ONCE(cookie->fq_domain, domain);
  228. return 0;
  229. }
  230. static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
  231. {
  232. if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
  233. return cookie->iovad.granule;
  234. return PAGE_SIZE;
  235. }
  236. static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
  237. {
  238. struct iommu_dma_cookie *cookie;
  239. cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
  240. if (cookie) {
  241. INIT_LIST_HEAD(&cookie->msi_page_list);
  242. cookie->type = type;
  243. }
  244. return cookie;
  245. }
  246. /**
  247. * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
  248. * @domain: IOMMU domain to prepare for DMA-API usage
  249. */
  250. int iommu_get_dma_cookie(struct iommu_domain *domain)
  251. {
  252. if (domain->iova_cookie)
  253. return -EEXIST;
  254. domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
  255. if (!domain->iova_cookie)
  256. return -ENOMEM;
  257. mutex_init(&domain->iova_cookie->mutex);
  258. return 0;
  259. }
  260. /**
  261. * iommu_get_msi_cookie - Acquire just MSI remapping resources
  262. * @domain: IOMMU domain to prepare
  263. * @base: Start address of IOVA region for MSI mappings
  264. *
  265. * Users who manage their own IOVA allocation and do not want DMA API support,
  266. * but would still like to take advantage of automatic MSI remapping, can use
  267. * this to initialise their own domain appropriately. Users should reserve a
  268. * contiguous IOVA region, starting at @base, large enough to accommodate the
  269. * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
  270. * used by the devices attached to @domain.
  271. */
  272. int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
  273. {
  274. struct iommu_dma_cookie *cookie;
  275. if (domain->type != IOMMU_DOMAIN_UNMANAGED)
  276. return -EINVAL;
  277. if (domain->iova_cookie)
  278. return -EEXIST;
  279. cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
  280. if (!cookie)
  281. return -ENOMEM;
  282. cookie->msi_iova = base;
  283. domain->iova_cookie = cookie;
  284. return 0;
  285. }
  286. EXPORT_SYMBOL(iommu_get_msi_cookie);
  287. /**
  288. * iommu_put_dma_cookie - Release a domain's DMA mapping resources
  289. * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
  290. * iommu_get_msi_cookie()
  291. */
  292. void iommu_put_dma_cookie(struct iommu_domain *domain)
  293. {
  294. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  295. struct iommu_dma_msi_page *msi, *tmp;
  296. if (!cookie)
  297. return;
  298. if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
  299. iommu_dma_free_fq(cookie);
  300. put_iova_domain(&cookie->iovad);
  301. }
  302. list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
  303. list_del(&msi->list);
  304. kfree(msi);
  305. }
  306. kfree(cookie);
  307. domain->iova_cookie = NULL;
  308. }
  309. /**
  310. * iommu_dma_get_resv_regions - Reserved region driver helper
  311. * @dev: Device from iommu_get_resv_regions()
  312. * @list: Reserved region list from iommu_get_resv_regions()
  313. *
  314. * IOMMU drivers can use this to implement their .get_resv_regions callback
  315. * for general non-IOMMU-specific reservations. Currently, this covers GICv3
  316. * ITS region reservation on ACPI based ARM platforms that may require HW MSI
  317. * reservation.
  318. */
  319. void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
  320. {
  321. if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
  322. iort_iommu_get_resv_regions(dev, list);
  323. if (dev->of_node)
  324. of_iommu_get_resv_regions(dev, list);
  325. }
  326. EXPORT_SYMBOL(iommu_dma_get_resv_regions);
  327. static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
  328. phys_addr_t start, phys_addr_t end)
  329. {
  330. struct iova_domain *iovad = &cookie->iovad;
  331. struct iommu_dma_msi_page *msi_page;
  332. int i, num_pages;
  333. start -= iova_offset(iovad, start);
  334. num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
  335. for (i = 0; i < num_pages; i++) {
  336. msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
  337. if (!msi_page)
  338. return -ENOMEM;
  339. msi_page->phys = start;
  340. msi_page->iova = start;
  341. INIT_LIST_HEAD(&msi_page->list);
  342. list_add(&msi_page->list, &cookie->msi_page_list);
  343. start += iovad->granule;
  344. }
  345. return 0;
  346. }
  347. static int iommu_dma_ranges_sort(void *priv, const struct list_head *a,
  348. const struct list_head *b)
  349. {
  350. struct resource_entry *res_a = list_entry(a, typeof(*res_a), node);
  351. struct resource_entry *res_b = list_entry(b, typeof(*res_b), node);
  352. return res_a->res->start > res_b->res->start;
  353. }
  354. static int iova_reserve_pci_windows(struct pci_dev *dev,
  355. struct iova_domain *iovad)
  356. {
  357. struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
  358. struct resource_entry *window;
  359. unsigned long lo, hi;
  360. phys_addr_t start = 0, end;
  361. resource_list_for_each_entry(window, &bridge->windows) {
  362. if (resource_type(window->res) != IORESOURCE_MEM)
  363. continue;
  364. lo = iova_pfn(iovad, window->res->start - window->offset);
  365. hi = iova_pfn(iovad, window->res->end - window->offset);
  366. reserve_iova(iovad, lo, hi);
  367. }
  368. /* Get reserved DMA windows from host bridge */
  369. list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort);
  370. resource_list_for_each_entry(window, &bridge->dma_ranges) {
  371. end = window->res->start - window->offset;
  372. resv_iova:
  373. if (end > start) {
  374. lo = iova_pfn(iovad, start);
  375. hi = iova_pfn(iovad, end);
  376. reserve_iova(iovad, lo, hi);
  377. } else if (end < start) {
  378. /* DMA ranges should be non-overlapping */
  379. dev_err(&dev->dev,
  380. "Failed to reserve IOVA [%pa-%pa]\n",
  381. &start, &end);
  382. return -EINVAL;
  383. }
  384. start = window->res->end - window->offset + 1;
  385. /* If window is last entry */
  386. if (window->node.next == &bridge->dma_ranges &&
  387. end != ~(phys_addr_t)0) {
  388. end = ~(phys_addr_t)0;
  389. goto resv_iova;
  390. }
  391. }
  392. return 0;
  393. }
  394. static int iova_reserve_iommu_regions(struct device *dev,
  395. struct iommu_domain *domain)
  396. {
  397. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  398. struct iova_domain *iovad = &cookie->iovad;
  399. struct iommu_resv_region *region;
  400. LIST_HEAD(resv_regions);
  401. int ret = 0;
  402. if (dev_is_pci(dev)) {
  403. ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
  404. if (ret)
  405. return ret;
  406. }
  407. iommu_get_resv_regions(dev, &resv_regions);
  408. list_for_each_entry(region, &resv_regions, list) {
  409. unsigned long lo, hi;
  410. /* We ARE the software that manages these! */
  411. if (region->type == IOMMU_RESV_SW_MSI)
  412. continue;
  413. lo = iova_pfn(iovad, region->start);
  414. hi = iova_pfn(iovad, region->start + region->length - 1);
  415. reserve_iova(iovad, lo, hi);
  416. if (region->type == IOMMU_RESV_MSI)
  417. ret = cookie_init_hw_msi_region(cookie, region->start,
  418. region->start + region->length);
  419. if (ret)
  420. break;
  421. }
  422. iommu_put_resv_regions(dev, &resv_regions);
  423. return ret;
  424. }
  425. static bool dev_is_untrusted(struct device *dev)
  426. {
  427. return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
  428. }
  429. static bool dev_use_swiotlb(struct device *dev)
  430. {
  431. return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
  432. }
  433. /**
  434. * iommu_dma_init_domain - Initialise a DMA mapping domain
  435. * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
  436. * @base: IOVA at which the mappable address space starts
  437. * @limit: Last address of the IOVA space
  438. * @dev: Device the domain is being initialised for
  439. *
  440. * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
  441. * avoid rounding surprises. If necessary, we reserve the page at address 0
  442. * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
  443. * any change which could make prior IOVAs invalid will fail.
  444. */
  445. static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
  446. dma_addr_t limit, struct device *dev)
  447. {
  448. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  449. unsigned long order, base_pfn;
  450. struct iova_domain *iovad;
  451. int ret;
  452. if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
  453. return -EINVAL;
  454. iovad = &cookie->iovad;
  455. /* Use the smallest supported page size for IOVA granularity */
  456. order = __ffs(domain->pgsize_bitmap);
  457. base_pfn = max_t(unsigned long, 1, base >> order);
  458. /* Check the domain allows at least some access to the device... */
  459. if (domain->geometry.force_aperture) {
  460. if (base > domain->geometry.aperture_end ||
  461. limit < domain->geometry.aperture_start) {
  462. pr_warn("specified DMA range outside IOMMU capability\n");
  463. return -EFAULT;
  464. }
  465. /* ...then finally give it a kicking to make sure it fits */
  466. base_pfn = max_t(unsigned long, base_pfn,
  467. domain->geometry.aperture_start >> order);
  468. }
  469. /* start_pfn is always nonzero for an already-initialised domain */
  470. mutex_lock(&cookie->mutex);
  471. if (iovad->start_pfn) {
  472. if (1UL << order != iovad->granule ||
  473. base_pfn != iovad->start_pfn) {
  474. pr_warn("Incompatible range for DMA domain\n");
  475. ret = -EFAULT;
  476. goto done_unlock;
  477. }
  478. ret = 0;
  479. goto done_unlock;
  480. }
  481. init_iova_domain(iovad, 1UL << order, base_pfn);
  482. trace_android_rvh_iommu_iovad_init_alloc_algo(dev, iovad);
  483. ret = iova_domain_init_rcaches(iovad);
  484. if (ret)
  485. goto done_unlock;
  486. /* If the FQ fails we can simply fall back to strict mode */
  487. if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
  488. domain->type = IOMMU_DOMAIN_DMA;
  489. ret = iova_reserve_iommu_regions(dev, domain);
  490. done_unlock:
  491. mutex_unlock(&cookie->mutex);
  492. return ret;
  493. }
  494. /**
  495. * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
  496. * page flags.
  497. * @dir: Direction of DMA transfer
  498. * @coherent: Is the DMA master cache-coherent?
  499. * @attrs: DMA attributes for the mapping
  500. *
  501. * Return: corresponding IOMMU API page protection flags
  502. */
  503. static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
  504. unsigned long attrs)
  505. {
  506. int prot = coherent ? IOMMU_CACHE : 0;
  507. if (attrs & DMA_ATTR_PRIVILEGED)
  508. prot |= IOMMU_PRIV;
  509. if (attrs & DMA_ATTR_SYS_CACHE)
  510. prot |= IOMMU_SYS_CACHE;
  511. if (attrs & DMA_ATTR_SYS_CACHE_NWA)
  512. prot |= IOMMU_SYS_CACHE_NWA;
  513. switch (dir) {
  514. case DMA_BIDIRECTIONAL:
  515. return prot | IOMMU_READ | IOMMU_WRITE;
  516. case DMA_TO_DEVICE:
  517. return prot | IOMMU_READ;
  518. case DMA_FROM_DEVICE:
  519. return prot | IOMMU_WRITE;
  520. default:
  521. return 0;
  522. }
  523. }
  524. static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
  525. size_t size, u64 dma_limit, struct device *dev)
  526. {
  527. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  528. struct iova_domain *iovad = &cookie->iovad;
  529. unsigned long shift, iova_len, iova = 0;
  530. if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
  531. cookie->msi_iova += size;
  532. return cookie->msi_iova - size;
  533. }
  534. shift = iova_shift(iovad);
  535. iova_len = size >> shift;
  536. dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
  537. if (domain->geometry.force_aperture)
  538. dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
  539. /* Try to get PCI devices a SAC address */
  540. if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
  541. iova = alloc_iova_fast(iovad, iova_len,
  542. DMA_BIT_MASK(32) >> shift, false);
  543. if (!iova)
  544. iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
  545. true);
  546. trace_android_vh_iommu_iovad_alloc_iova(dev, iovad, (dma_addr_t)iova << shift, size);
  547. return (dma_addr_t)iova << shift;
  548. }
  549. static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
  550. dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
  551. {
  552. struct iova_domain *iovad = &cookie->iovad;
  553. /* The MSI case is only ever cleaning up its most recent allocation */
  554. if (cookie->type == IOMMU_DMA_MSI_COOKIE)
  555. cookie->msi_iova -= size;
  556. else if (gather && gather->queued)
  557. queue_iova(cookie, iova_pfn(iovad, iova),
  558. size >> iova_shift(iovad),
  559. &gather->freelist);
  560. else
  561. free_iova_fast(iovad, iova_pfn(iovad, iova),
  562. size >> iova_shift(iovad));
  563. trace_android_vh_iommu_iovad_free_iova(iovad, iova, size);
  564. }
  565. static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
  566. size_t size)
  567. {
  568. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  569. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  570. struct iova_domain *iovad = &cookie->iovad;
  571. size_t iova_off = iova_offset(iovad, dma_addr);
  572. struct iommu_iotlb_gather iotlb_gather;
  573. size_t unmapped;
  574. dma_addr -= iova_off;
  575. size = iova_align(iovad, size + iova_off);
  576. iommu_iotlb_gather_init(&iotlb_gather);
  577. iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
  578. unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
  579. WARN_ON(unmapped != size);
  580. if (!iotlb_gather.queued)
  581. iommu_iotlb_sync(domain, &iotlb_gather);
  582. iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
  583. }
  584. static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
  585. size_t size, int prot, u64 dma_mask)
  586. {
  587. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  588. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  589. struct iova_domain *iovad = &cookie->iovad;
  590. size_t iova_off = iova_offset(iovad, phys);
  591. dma_addr_t iova;
  592. if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
  593. iommu_deferred_attach(dev, domain))
  594. return DMA_MAPPING_ERROR;
  595. size = iova_align(iovad, size + iova_off);
  596. iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
  597. if (!iova)
  598. return DMA_MAPPING_ERROR;
  599. if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
  600. iommu_dma_free_iova(cookie, iova, size, NULL);
  601. return DMA_MAPPING_ERROR;
  602. }
  603. return iova + iova_off;
  604. }
  605. static void __iommu_dma_free_pages(struct page **pages, int count)
  606. {
  607. while (count--)
  608. __free_page(pages[count]);
  609. kvfree(pages);
  610. }
  611. static struct page **__iommu_dma_alloc_pages(struct device *dev,
  612. unsigned int count, unsigned long order_mask, gfp_t gfp)
  613. {
  614. struct page **pages;
  615. unsigned int i = 0, nid = dev_to_node(dev);
  616. order_mask &= (2U << MAX_ORDER) - 1;
  617. if (!order_mask)
  618. return NULL;
  619. pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
  620. if (!pages)
  621. return NULL;
  622. /* IOMMU can map any pages, so himem can also be used here */
  623. gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
  624. /* It makes no sense to muck about with huge pages */
  625. gfp &= ~__GFP_COMP;
  626. while (count) {
  627. struct page *page = NULL;
  628. unsigned int order_size;
  629. /*
  630. * Higher-order allocations are a convenience rather
  631. * than a necessity, hence using __GFP_NORETRY until
  632. * falling back to minimum-order allocations.
  633. */
  634. for (order_mask &= (2U << __fls(count)) - 1;
  635. order_mask; order_mask &= ~order_size) {
  636. unsigned int order = __fls(order_mask);
  637. gfp_t alloc_flags = gfp;
  638. order_size = 1U << order;
  639. if (order_mask > order_size)
  640. alloc_flags |= __GFP_NORETRY;
  641. trace_android_vh_adjust_alloc_flags(order, &alloc_flags);
  642. page = alloc_pages_node(nid, alloc_flags, order);
  643. if (!page)
  644. continue;
  645. if (order)
  646. split_page(page, order);
  647. break;
  648. }
  649. if (!page) {
  650. __iommu_dma_free_pages(pages, i);
  651. return NULL;
  652. }
  653. count -= order_size;
  654. while (order_size--)
  655. pages[i++] = page++;
  656. }
  657. return pages;
  658. }
  659. /*
  660. * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
  661. * but an IOMMU which supports smaller pages might not map the whole thing.
  662. */
  663. static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
  664. size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
  665. unsigned long attrs)
  666. {
  667. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  668. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  669. struct iova_domain *iovad = &cookie->iovad;
  670. bool coherent = dev_is_dma_coherent(dev);
  671. int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
  672. unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
  673. struct page **pages;
  674. dma_addr_t iova;
  675. ssize_t ret;
  676. if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
  677. iommu_deferred_attach(dev, domain))
  678. return NULL;
  679. min_size = alloc_sizes & -alloc_sizes;
  680. if (min_size < PAGE_SIZE) {
  681. min_size = PAGE_SIZE;
  682. alloc_sizes |= PAGE_SIZE;
  683. } else {
  684. size = ALIGN(size, min_size);
  685. }
  686. if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
  687. alloc_sizes = min_size;
  688. count = PAGE_ALIGN(size) >> PAGE_SHIFT;
  689. pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
  690. gfp);
  691. if (!pages)
  692. return NULL;
  693. size = iova_align(iovad, size);
  694. iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
  695. if (!iova)
  696. goto out_free_pages;
  697. if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
  698. goto out_free_iova;
  699. if (!(ioprot & IOMMU_CACHE)) {
  700. struct scatterlist *sg;
  701. int i;
  702. for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
  703. arch_dma_prep_coherent(sg_page(sg), sg->length);
  704. }
  705. ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
  706. if (ret < 0 || ret < size)
  707. goto out_free_sg;
  708. sgt->sgl->dma_address = iova;
  709. sgt->sgl->dma_length = size;
  710. return pages;
  711. out_free_sg:
  712. sg_free_table(sgt);
  713. out_free_iova:
  714. iommu_dma_free_iova(cookie, iova, size, NULL);
  715. out_free_pages:
  716. __iommu_dma_free_pages(pages, count);
  717. return NULL;
  718. }
  719. static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
  720. dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
  721. unsigned long attrs)
  722. {
  723. struct page **pages;
  724. struct sg_table sgt;
  725. void *vaddr;
  726. pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
  727. attrs);
  728. if (!pages)
  729. return NULL;
  730. *dma_handle = sgt.sgl->dma_address;
  731. sg_free_table(&sgt);
  732. vaddr = dma_common_pages_remap(pages, size, prot,
  733. __builtin_return_address(0));
  734. if (!vaddr)
  735. goto out_unmap;
  736. return vaddr;
  737. out_unmap:
  738. __iommu_dma_unmap(dev, *dma_handle, size);
  739. __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
  740. return NULL;
  741. }
  742. static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
  743. size_t size, enum dma_data_direction dir, gfp_t gfp,
  744. unsigned long attrs)
  745. {
  746. struct dma_sgt_handle *sh;
  747. sh = kmalloc(sizeof(*sh), gfp);
  748. if (!sh)
  749. return NULL;
  750. sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
  751. PAGE_KERNEL, attrs);
  752. if (!sh->pages) {
  753. kfree(sh);
  754. return NULL;
  755. }
  756. return &sh->sgt;
  757. }
  758. static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
  759. struct sg_table *sgt, enum dma_data_direction dir)
  760. {
  761. struct dma_sgt_handle *sh = sgt_handle(sgt);
  762. __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
  763. __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
  764. sg_free_table(&sh->sgt);
  765. kfree(sh);
  766. }
  767. static void iommu_dma_sync_single_for_cpu(struct device *dev,
  768. dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
  769. {
  770. phys_addr_t phys;
  771. if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
  772. return;
  773. phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
  774. if (!dev_is_dma_coherent(dev))
  775. arch_sync_dma_for_cpu(phys, size, dir);
  776. if (is_swiotlb_buffer(dev, phys))
  777. swiotlb_sync_single_for_cpu(dev, phys, size, dir);
  778. }
  779. static void iommu_dma_sync_single_for_device(struct device *dev,
  780. dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
  781. {
  782. phys_addr_t phys;
  783. if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
  784. return;
  785. phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
  786. if (is_swiotlb_buffer(dev, phys))
  787. swiotlb_sync_single_for_device(dev, phys, size, dir);
  788. if (!dev_is_dma_coherent(dev))
  789. arch_sync_dma_for_device(phys, size, dir);
  790. }
  791. static void iommu_dma_sync_sg_for_cpu(struct device *dev,
  792. struct scatterlist *sgl, int nelems,
  793. enum dma_data_direction dir)
  794. {
  795. struct scatterlist *sg;
  796. int i;
  797. if (dev_use_swiotlb(dev))
  798. for_each_sg(sgl, sg, nelems, i)
  799. iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  800. sg->length, dir);
  801. else if (!dev_is_dma_coherent(dev))
  802. for_each_sg(sgl, sg, nelems, i)
  803. arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
  804. }
  805. static void iommu_dma_sync_sg_for_device(struct device *dev,
  806. struct scatterlist *sgl, int nelems,
  807. enum dma_data_direction dir)
  808. {
  809. struct scatterlist *sg;
  810. int i;
  811. if (dev_use_swiotlb(dev))
  812. for_each_sg(sgl, sg, nelems, i)
  813. iommu_dma_sync_single_for_device(dev,
  814. sg_dma_address(sg),
  815. sg->length, dir);
  816. else if (!dev_is_dma_coherent(dev))
  817. for_each_sg(sgl, sg, nelems, i)
  818. arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
  819. }
  820. static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
  821. unsigned long offset, size_t size, enum dma_data_direction dir,
  822. unsigned long attrs)
  823. {
  824. phys_addr_t phys = page_to_phys(page) + offset;
  825. bool coherent = dev_is_dma_coherent(dev);
  826. int prot = dma_info_to_prot(dir, coherent, attrs);
  827. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  828. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  829. struct iova_domain *iovad = &cookie->iovad;
  830. dma_addr_t iova, dma_mask = dma_get_mask(dev);
  831. /*
  832. * If both the physical buffer start address and size are
  833. * page aligned, we don't need to use a bounce page.
  834. */
  835. if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
  836. void *padding_start;
  837. size_t padding_size, aligned_size;
  838. if (!is_swiotlb_active(dev)) {
  839. dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
  840. return DMA_MAPPING_ERROR;
  841. }
  842. aligned_size = iova_align(iovad, size);
  843. phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
  844. iova_mask(iovad), dir, attrs);
  845. if (phys == DMA_MAPPING_ERROR)
  846. return DMA_MAPPING_ERROR;
  847. /* Cleanup the padding area. */
  848. padding_start = phys_to_virt(phys);
  849. padding_size = aligned_size;
  850. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
  851. (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
  852. padding_start += size;
  853. padding_size -= size;
  854. }
  855. memset(padding_start, 0, padding_size);
  856. }
  857. if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
  858. arch_sync_dma_for_device(phys, size, dir);
  859. iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
  860. if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
  861. swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
  862. return iova;
  863. }
  864. static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
  865. size_t size, enum dma_data_direction dir, unsigned long attrs)
  866. {
  867. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  868. phys_addr_t phys;
  869. phys = iommu_iova_to_phys(domain, dma_handle);
  870. if (WARN_ON(!phys))
  871. return;
  872. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
  873. arch_sync_dma_for_cpu(phys, size, dir);
  874. __iommu_dma_unmap(dev, dma_handle, size);
  875. if (unlikely(is_swiotlb_buffer(dev, phys)))
  876. swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
  877. }
  878. /*
  879. * Prepare a successfully-mapped scatterlist to give back to the caller.
  880. *
  881. * At this point the segments are already laid out by iommu_dma_map_sg() to
  882. * avoid individually crossing any boundaries, so we merely need to check a
  883. * segment's start address to avoid concatenating across one.
  884. */
  885. static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
  886. dma_addr_t dma_addr)
  887. {
  888. struct scatterlist *s, *cur = sg;
  889. unsigned long seg_mask = dma_get_seg_boundary(dev);
  890. unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
  891. int i, count = 0;
  892. for_each_sg(sg, s, nents, i) {
  893. /* Restore this segment's original unaligned fields first */
  894. dma_addr_t s_dma_addr = sg_dma_address(s);
  895. unsigned int s_iova_off = sg_dma_address(s);
  896. unsigned int s_length = sg_dma_len(s);
  897. unsigned int s_iova_len = s->length;
  898. sg_dma_address(s) = DMA_MAPPING_ERROR;
  899. sg_dma_len(s) = 0;
  900. if (sg_is_dma_bus_address(s)) {
  901. if (i > 0)
  902. cur = sg_next(cur);
  903. sg_dma_unmark_bus_address(s);
  904. sg_dma_address(cur) = s_dma_addr;
  905. sg_dma_len(cur) = s_length;
  906. sg_dma_mark_bus_address(cur);
  907. count++;
  908. cur_len = 0;
  909. continue;
  910. }
  911. s->offset += s_iova_off;
  912. s->length = s_length;
  913. /*
  914. * Now fill in the real DMA data. If...
  915. * - there is a valid output segment to append to
  916. * - and this segment starts on an IOVA page boundary
  917. * - but doesn't fall at a segment boundary
  918. * - and wouldn't make the resulting output segment too long
  919. */
  920. if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
  921. (max_len - cur_len >= s_length)) {
  922. /* ...then concatenate it with the previous one */
  923. cur_len += s_length;
  924. } else {
  925. /* Otherwise start the next output segment */
  926. if (i > 0)
  927. cur = sg_next(cur);
  928. cur_len = s_length;
  929. count++;
  930. sg_dma_address(cur) = dma_addr + s_iova_off;
  931. }
  932. sg_dma_len(cur) = cur_len;
  933. dma_addr += s_iova_len;
  934. if (s_length + s_iova_off < s_iova_len)
  935. cur_len = 0;
  936. }
  937. return count;
  938. }
  939. /*
  940. * If mapping failed, then just restore the original list,
  941. * but making sure the DMA fields are invalidated.
  942. */
  943. static void __invalidate_sg(struct scatterlist *sg, int nents)
  944. {
  945. struct scatterlist *s;
  946. int i;
  947. for_each_sg(sg, s, nents, i) {
  948. if (sg_is_dma_bus_address(s)) {
  949. sg_dma_unmark_bus_address(s);
  950. } else {
  951. if (sg_dma_address(s) != DMA_MAPPING_ERROR)
  952. s->offset += sg_dma_address(s);
  953. if (sg_dma_len(s))
  954. s->length = sg_dma_len(s);
  955. }
  956. sg_dma_address(s) = DMA_MAPPING_ERROR;
  957. sg_dma_len(s) = 0;
  958. }
  959. }
  960. static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
  961. int nents, enum dma_data_direction dir, unsigned long attrs)
  962. {
  963. struct scatterlist *s;
  964. int i;
  965. for_each_sg(sg, s, nents, i)
  966. iommu_dma_unmap_page(dev, sg_dma_address(s),
  967. sg_dma_len(s), dir, attrs);
  968. }
  969. static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
  970. int nents, enum dma_data_direction dir, unsigned long attrs)
  971. {
  972. struct scatterlist *s;
  973. int i;
  974. for_each_sg(sg, s, nents, i) {
  975. sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
  976. s->offset, s->length, dir, attrs);
  977. if (sg_dma_address(s) == DMA_MAPPING_ERROR)
  978. goto out_unmap;
  979. sg_dma_len(s) = s->length;
  980. }
  981. return nents;
  982. out_unmap:
  983. iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
  984. return -EIO;
  985. }
  986. /*
  987. * The DMA API client is passing in a scatterlist which could describe
  988. * any old buffer layout, but the IOMMU API requires everything to be
  989. * aligned to IOMMU pages. Hence the need for this complicated bit of
  990. * impedance-matching, to be able to hand off a suitably-aligned list,
  991. * but still preserve the original offsets and sizes for the caller.
  992. */
  993. static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
  994. int nents, enum dma_data_direction dir, unsigned long attrs)
  995. {
  996. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  997. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  998. struct iova_domain *iovad = &cookie->iovad;
  999. struct scatterlist *s, *prev = NULL;
  1000. int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
  1001. struct pci_p2pdma_map_state p2pdma_state = {};
  1002. enum pci_p2pdma_map_type map;
  1003. dma_addr_t iova;
  1004. size_t iova_len = 0;
  1005. unsigned long mask = dma_get_seg_boundary(dev);
  1006. ssize_t ret;
  1007. int i;
  1008. if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
  1009. ret = iommu_deferred_attach(dev, domain);
  1010. if (ret)
  1011. goto out;
  1012. }
  1013. if (dev_use_swiotlb(dev))
  1014. return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
  1015. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
  1016. iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
  1017. /*
  1018. * Work out how much IOVA space we need, and align the segments to
  1019. * IOVA granules for the IOMMU driver to handle. With some clever
  1020. * trickery we can modify the list in-place, but reversibly, by
  1021. * stashing the unaligned parts in the as-yet-unused DMA fields.
  1022. */
  1023. for_each_sg(sg, s, nents, i) {
  1024. size_t s_iova_off = iova_offset(iovad, s->offset);
  1025. size_t s_length = s->length;
  1026. size_t pad_len = (mask - iova_len + 1) & mask;
  1027. if (is_pci_p2pdma_page(sg_page(s))) {
  1028. map = pci_p2pdma_map_segment(&p2pdma_state, dev, s);
  1029. switch (map) {
  1030. case PCI_P2PDMA_MAP_BUS_ADDR:
  1031. /*
  1032. * iommu_map_sg() will skip this segment as
  1033. * it is marked as a bus address,
  1034. * __finalise_sg() will copy the dma address
  1035. * into the output segment.
  1036. */
  1037. continue;
  1038. case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
  1039. /*
  1040. * Mapping through host bridge should be
  1041. * mapped with regular IOVAs, thus we
  1042. * do nothing here and continue below.
  1043. */
  1044. break;
  1045. default:
  1046. ret = -EREMOTEIO;
  1047. goto out_restore_sg;
  1048. }
  1049. }
  1050. sg_dma_address(s) = s_iova_off;
  1051. sg_dma_len(s) = s_length;
  1052. s->offset -= s_iova_off;
  1053. s_length = iova_align(iovad, s_length + s_iova_off);
  1054. s->length = s_length;
  1055. /*
  1056. * Due to the alignment of our single IOVA allocation, we can
  1057. * depend on these assumptions about the segment boundary mask:
  1058. * - If mask size >= IOVA size, then the IOVA range cannot
  1059. * possibly fall across a boundary, so we don't care.
  1060. * - If mask size < IOVA size, then the IOVA range must start
  1061. * exactly on a boundary, therefore we can lay things out
  1062. * based purely on segment lengths without needing to know
  1063. * the actual addresses beforehand.
  1064. * - The mask must be a power of 2, so pad_len == 0 if
  1065. * iova_len == 0, thus we cannot dereference prev the first
  1066. * time through here (i.e. before it has a meaningful value).
  1067. */
  1068. if (pad_len && pad_len < s_length - 1) {
  1069. prev->length += pad_len;
  1070. iova_len += pad_len;
  1071. }
  1072. iova_len += s_length;
  1073. prev = s;
  1074. }
  1075. if (!iova_len)
  1076. return __finalise_sg(dev, sg, nents, 0);
  1077. iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
  1078. if (!iova) {
  1079. ret = -ENOMEM;
  1080. goto out_restore_sg;
  1081. }
  1082. /*
  1083. * We'll leave any physical concatenation to the IOMMU driver's
  1084. * implementation - it knows better than we do.
  1085. */
  1086. ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
  1087. if (ret < 0 || ret < iova_len)
  1088. goto out_free_iova;
  1089. return __finalise_sg(dev, sg, nents, iova);
  1090. out_free_iova:
  1091. iommu_dma_free_iova(cookie, iova, iova_len, NULL);
  1092. out_restore_sg:
  1093. __invalidate_sg(sg, nents);
  1094. out:
  1095. if (ret != -ENOMEM && ret != -EREMOTEIO)
  1096. return -EINVAL;
  1097. return ret;
  1098. }
  1099. static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
  1100. int nents, enum dma_data_direction dir, unsigned long attrs)
  1101. {
  1102. dma_addr_t end = 0, start;
  1103. struct scatterlist *tmp;
  1104. int i;
  1105. if (dev_use_swiotlb(dev)) {
  1106. iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
  1107. return;
  1108. }
  1109. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
  1110. iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
  1111. /*
  1112. * The scatterlist segments are mapped into a single
  1113. * contiguous IOVA allocation, the start and end points
  1114. * just have to be determined.
  1115. */
  1116. for_each_sg(sg, tmp, nents, i) {
  1117. if (sg_is_dma_bus_address(tmp)) {
  1118. sg_dma_unmark_bus_address(tmp);
  1119. continue;
  1120. }
  1121. if (sg_dma_len(tmp) == 0)
  1122. break;
  1123. start = sg_dma_address(tmp);
  1124. break;
  1125. }
  1126. nents -= i;
  1127. for_each_sg(tmp, tmp, nents, i) {
  1128. if (sg_is_dma_bus_address(tmp)) {
  1129. sg_dma_unmark_bus_address(tmp);
  1130. continue;
  1131. }
  1132. if (sg_dma_len(tmp) == 0)
  1133. break;
  1134. end = sg_dma_address(tmp) + sg_dma_len(tmp);
  1135. }
  1136. if (end)
  1137. __iommu_dma_unmap(dev, start, end - start);
  1138. }
  1139. static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
  1140. size_t size, enum dma_data_direction dir, unsigned long attrs)
  1141. {
  1142. return __iommu_dma_map(dev, phys, size,
  1143. dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
  1144. dma_get_mask(dev));
  1145. }
  1146. static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
  1147. size_t size, enum dma_data_direction dir, unsigned long attrs)
  1148. {
  1149. __iommu_dma_unmap(dev, handle, size);
  1150. }
  1151. static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
  1152. {
  1153. size_t alloc_size = PAGE_ALIGN(size);
  1154. int count = alloc_size >> PAGE_SHIFT;
  1155. struct page *page = NULL, **pages = NULL;
  1156. /* Non-coherent atomic allocation? Easy */
  1157. if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
  1158. dma_free_from_pool(dev, cpu_addr, alloc_size))
  1159. return;
  1160. if (is_vmalloc_addr(cpu_addr)) {
  1161. /*
  1162. * If it the address is remapped, then it's either non-coherent
  1163. * or highmem CMA, or an iommu_dma_alloc_remap() construction.
  1164. */
  1165. pages = dma_common_find_pages(cpu_addr);
  1166. if (!pages)
  1167. page = vmalloc_to_page(cpu_addr);
  1168. dma_common_free_remap(cpu_addr, alloc_size);
  1169. } else {
  1170. /* Lowmem means a coherent atomic or CMA allocation */
  1171. page = virt_to_page(cpu_addr);
  1172. }
  1173. if (pages)
  1174. __iommu_dma_free_pages(pages, count);
  1175. if (page)
  1176. dma_free_contiguous(dev, page, alloc_size);
  1177. }
  1178. static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
  1179. dma_addr_t handle, unsigned long attrs)
  1180. {
  1181. __iommu_dma_unmap(dev, handle, size);
  1182. __iommu_dma_free(dev, size, cpu_addr);
  1183. }
  1184. static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
  1185. struct page **pagep, gfp_t gfp, unsigned long attrs)
  1186. {
  1187. bool coherent = dev_is_dma_coherent(dev);
  1188. size_t alloc_size = PAGE_ALIGN(size);
  1189. int node = dev_to_node(dev);
  1190. struct page *page = NULL;
  1191. void *cpu_addr;
  1192. page = dma_alloc_contiguous(dev, alloc_size, gfp);
  1193. if (!page)
  1194. page = alloc_pages_node(node, gfp, get_order(alloc_size));
  1195. if (!page)
  1196. return NULL;
  1197. if (!coherent || PageHighMem(page)) {
  1198. pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
  1199. cpu_addr = dma_common_contiguous_remap(page, alloc_size,
  1200. prot, __builtin_return_address(0));
  1201. if (!cpu_addr)
  1202. goto out_free_pages;
  1203. if (!coherent)
  1204. arch_dma_prep_coherent(page, size);
  1205. } else {
  1206. cpu_addr = page_address(page);
  1207. }
  1208. *pagep = page;
  1209. memset(cpu_addr, 0, alloc_size);
  1210. return cpu_addr;
  1211. out_free_pages:
  1212. dma_free_contiguous(dev, page, alloc_size);
  1213. return NULL;
  1214. }
  1215. static void *iommu_dma_alloc(struct device *dev, size_t size,
  1216. dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
  1217. {
  1218. bool coherent = dev_is_dma_coherent(dev);
  1219. int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
  1220. struct page *page = NULL;
  1221. void *cpu_addr;
  1222. gfp |= __GFP_ZERO;
  1223. if (gfpflags_allow_blocking(gfp) &&
  1224. !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
  1225. return iommu_dma_alloc_remap(dev, size, handle, gfp,
  1226. dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
  1227. }
  1228. if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
  1229. !gfpflags_allow_blocking(gfp) && !coherent)
  1230. page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
  1231. gfp, NULL);
  1232. else
  1233. cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
  1234. if (!cpu_addr)
  1235. return NULL;
  1236. *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
  1237. dev->coherent_dma_mask);
  1238. if (*handle == DMA_MAPPING_ERROR) {
  1239. __iommu_dma_free(dev, size, cpu_addr);
  1240. return NULL;
  1241. }
  1242. return cpu_addr;
  1243. }
  1244. static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
  1245. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  1246. unsigned long attrs)
  1247. {
  1248. unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
  1249. unsigned long pfn, off = vma->vm_pgoff;
  1250. int ret;
  1251. vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
  1252. if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
  1253. return ret;
  1254. if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
  1255. return -ENXIO;
  1256. if (is_vmalloc_addr(cpu_addr)) {
  1257. struct page **pages = dma_common_find_pages(cpu_addr);
  1258. if (pages)
  1259. return vm_map_pages(vma, pages, nr_pages);
  1260. pfn = vmalloc_to_pfn(cpu_addr);
  1261. } else {
  1262. pfn = page_to_pfn(virt_to_page(cpu_addr));
  1263. }
  1264. return remap_pfn_range(vma, vma->vm_start, pfn + off,
  1265. vma->vm_end - vma->vm_start,
  1266. vma->vm_page_prot);
  1267. }
  1268. static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
  1269. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  1270. unsigned long attrs)
  1271. {
  1272. struct page *page;
  1273. int ret;
  1274. if (is_vmalloc_addr(cpu_addr)) {
  1275. struct page **pages = dma_common_find_pages(cpu_addr);
  1276. if (pages) {
  1277. return sg_alloc_table_from_pages(sgt, pages,
  1278. PAGE_ALIGN(size) >> PAGE_SHIFT,
  1279. 0, size, GFP_KERNEL);
  1280. }
  1281. page = vmalloc_to_page(cpu_addr);
  1282. } else {
  1283. page = virt_to_page(cpu_addr);
  1284. }
  1285. ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
  1286. if (!ret)
  1287. sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
  1288. return ret;
  1289. }
  1290. static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
  1291. {
  1292. struct iommu_domain *domain = iommu_get_dma_domain(dev);
  1293. return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
  1294. }
  1295. static size_t iommu_dma_opt_mapping_size(void)
  1296. {
  1297. return iova_rcache_range();
  1298. }
  1299. static const struct dma_map_ops iommu_dma_ops = {
  1300. .flags = DMA_F_PCI_P2PDMA_SUPPORTED,
  1301. .alloc = iommu_dma_alloc,
  1302. .free = iommu_dma_free,
  1303. .alloc_pages = dma_common_alloc_pages,
  1304. .free_pages = dma_common_free_pages,
  1305. .alloc_noncontiguous = iommu_dma_alloc_noncontiguous,
  1306. .free_noncontiguous = iommu_dma_free_noncontiguous,
  1307. .mmap = iommu_dma_mmap,
  1308. .get_sgtable = iommu_dma_get_sgtable,
  1309. .map_page = iommu_dma_map_page,
  1310. .unmap_page = iommu_dma_unmap_page,
  1311. .map_sg = iommu_dma_map_sg,
  1312. .unmap_sg = iommu_dma_unmap_sg,
  1313. .sync_single_for_cpu = iommu_dma_sync_single_for_cpu,
  1314. .sync_single_for_device = iommu_dma_sync_single_for_device,
  1315. .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu,
  1316. .sync_sg_for_device = iommu_dma_sync_sg_for_device,
  1317. .map_resource = iommu_dma_map_resource,
  1318. .unmap_resource = iommu_dma_unmap_resource,
  1319. .get_merge_boundary = iommu_dma_get_merge_boundary,
  1320. .opt_mapping_size = iommu_dma_opt_mapping_size,
  1321. };
  1322. /*
  1323. * The IOMMU core code allocates the default DMA domain, which the underlying
  1324. * IOMMU driver needs to support via the dma-iommu layer.
  1325. */
  1326. void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
  1327. {
  1328. struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  1329. if (!domain)
  1330. goto out_err;
  1331. /*
  1332. * The IOMMU core code allocates the default DMA domain, which the
  1333. * underlying IOMMU driver needs to support via the dma-iommu layer.
  1334. */
  1335. if (iommu_is_dma_domain(domain)) {
  1336. if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
  1337. goto out_err;
  1338. dev->dma_ops = &iommu_dma_ops;
  1339. }
  1340. return;
  1341. out_err:
  1342. pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
  1343. dev_name(dev));
  1344. }
  1345. EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
  1346. static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
  1347. phys_addr_t msi_addr, struct iommu_domain *domain)
  1348. {
  1349. struct iommu_dma_cookie *cookie = domain->iova_cookie;
  1350. struct iommu_dma_msi_page *msi_page;
  1351. dma_addr_t iova;
  1352. int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
  1353. size_t size = cookie_msi_granule(cookie);
  1354. msi_addr &= ~(phys_addr_t)(size - 1);
  1355. list_for_each_entry(msi_page, &cookie->msi_page_list, list)
  1356. if (msi_page->phys == msi_addr)
  1357. return msi_page;
  1358. msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
  1359. if (!msi_page)
  1360. return NULL;
  1361. iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
  1362. if (!iova)
  1363. goto out_free_page;
  1364. if (iommu_map(domain, iova, msi_addr, size, prot))
  1365. goto out_free_iova;
  1366. INIT_LIST_HEAD(&msi_page->list);
  1367. msi_page->phys = msi_addr;
  1368. msi_page->iova = iova;
  1369. list_add(&msi_page->list, &cookie->msi_page_list);
  1370. return msi_page;
  1371. out_free_iova:
  1372. iommu_dma_free_iova(cookie, iova, size, NULL);
  1373. out_free_page:
  1374. kfree(msi_page);
  1375. return NULL;
  1376. }
  1377. /**
  1378. * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
  1379. * @desc: MSI descriptor, will store the MSI page
  1380. * @msi_addr: MSI target address to be mapped
  1381. *
  1382. * Return: 0 on success or negative error code if the mapping failed.
  1383. */
  1384. int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
  1385. {
  1386. struct device *dev = msi_desc_to_dev(desc);
  1387. struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  1388. struct iommu_dma_msi_page *msi_page;
  1389. static DEFINE_MUTEX(msi_prepare_lock); /* see below */
  1390. if (!domain || !domain->iova_cookie) {
  1391. desc->iommu_cookie = NULL;
  1392. return 0;
  1393. }
  1394. /*
  1395. * In fact the whole prepare operation should already be serialised by
  1396. * irq_domain_mutex further up the callchain, but that's pretty subtle
  1397. * on its own, so consider this locking as failsafe documentation...
  1398. */
  1399. mutex_lock(&msi_prepare_lock);
  1400. msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
  1401. mutex_unlock(&msi_prepare_lock);
  1402. msi_desc_set_iommu_cookie(desc, msi_page);
  1403. if (!msi_page)
  1404. return -ENOMEM;
  1405. return 0;
  1406. }
  1407. /**
  1408. * iommu_dma_compose_msi_msg() - Apply translation to an MSI message
  1409. * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
  1410. * @msg: MSI message containing target physical address
  1411. */
  1412. void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
  1413. {
  1414. struct device *dev = msi_desc_to_dev(desc);
  1415. const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  1416. const struct iommu_dma_msi_page *msi_page;
  1417. msi_page = msi_desc_get_iommu_cookie(desc);
  1418. if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
  1419. return;
  1420. msg->address_hi = upper_32_bits(msi_page->iova);
  1421. msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
  1422. msg->address_lo += lower_32_bits(msi_page->iova);
  1423. }
  1424. static int iommu_dma_init(void)
  1425. {
  1426. if (is_kdump_kernel())
  1427. static_branch_enable(&iommu_deferred_attach_enabled);
  1428. return iova_cache_get();
  1429. }
  1430. arch_initcall(iommu_dma_init);