swiotlb-xen.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright 2010
  4. * by Konrad Rzeszutek Wilk <[email protected]>
  5. *
  6. * This code provides a IOMMU for Xen PV guests with PCI passthrough.
  7. *
  8. * PV guests under Xen are running in an non-contiguous memory architecture.
  9. *
  10. * When PCI pass-through is utilized, this necessitates an IOMMU for
  11. * translating bus (DMA) to virtual and vice-versa and also providing a
  12. * mechanism to have contiguous pages for device drivers operations (say DMA
  13. * operations).
  14. *
  15. * Specifically, under Xen the Linux idea of pages is an illusion. It
  16. * assumes that pages start at zero and go up to the available memory. To
  17. * help with that, the Linux Xen MMU provides a lookup mechanism to
  18. * translate the page frame numbers (PFN) to machine frame numbers (MFN)
  19. * and vice-versa. The MFN are the "real" frame numbers. Furthermore
  20. * memory is not contiguous. Xen hypervisor stitches memory for guests
  21. * from different pools, which means there is no guarantee that PFN==MFN
  22. * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
  23. * allocated in descending order (high to low), meaning the guest might
  24. * never get any MFN's under the 4GB mark.
  25. */
  26. #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  27. #include <linux/memblock.h>
  28. #include <linux/dma-direct.h>
  29. #include <linux/dma-map-ops.h>
  30. #include <linux/export.h>
  31. #include <xen/swiotlb-xen.h>
  32. #include <xen/page.h>
  33. #include <xen/xen-ops.h>
  34. #include <xen/hvc-console.h>
  35. #include <asm/dma-mapping.h>
  36. #include <trace/events/swiotlb.h>
  37. #define MAX_DMA_BITS 32
  38. /*
  39. * Quick lookup value of the bus address of the IOTLB.
  40. */
  41. static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
  42. {
  43. unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
  44. phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
  45. baddr |= paddr & ~XEN_PAGE_MASK;
  46. return baddr;
  47. }
  48. static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
  49. {
  50. return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
  51. }
  52. static inline phys_addr_t xen_bus_to_phys(struct device *dev,
  53. phys_addr_t baddr)
  54. {
  55. unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
  56. phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
  57. (baddr & ~XEN_PAGE_MASK);
  58. return paddr;
  59. }
  60. static inline phys_addr_t xen_dma_to_phys(struct device *dev,
  61. dma_addr_t dma_addr)
  62. {
  63. return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
  64. }
  65. static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
  66. {
  67. unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
  68. unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
  69. next_bfn = pfn_to_bfn(xen_pfn);
  70. for (i = 1; i < nr_pages; i++)
  71. if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
  72. return 1;
  73. return 0;
  74. }
  75. static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
  76. {
  77. unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
  78. unsigned long xen_pfn = bfn_to_local_pfn(bfn);
  79. phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
  80. /* If the address is outside our domain, it CAN
  81. * have the same virtual address as another address
  82. * in our domain. Therefore _only_ check address within our domain.
  83. */
  84. if (pfn_valid(PFN_DOWN(paddr)))
  85. return is_swiotlb_buffer(dev, paddr);
  86. return 0;
  87. }
  88. #ifdef CONFIG_X86
  89. int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
  90. {
  91. int rc;
  92. unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
  93. unsigned int i, dma_bits = order + PAGE_SHIFT;
  94. dma_addr_t dma_handle;
  95. phys_addr_t p = virt_to_phys(buf);
  96. BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
  97. BUG_ON(nslabs % IO_TLB_SEGSIZE);
  98. i = 0;
  99. do {
  100. do {
  101. rc = xen_create_contiguous_region(
  102. p + (i << IO_TLB_SHIFT), order,
  103. dma_bits, &dma_handle);
  104. } while (rc && dma_bits++ < MAX_DMA_BITS);
  105. if (rc)
  106. return rc;
  107. i += IO_TLB_SEGSIZE;
  108. } while (i < nslabs);
  109. return 0;
  110. }
  111. static void *
  112. xen_swiotlb_alloc_coherent(struct device *dev, size_t size,
  113. dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
  114. {
  115. u64 dma_mask = dev->coherent_dma_mask;
  116. int order = get_order(size);
  117. phys_addr_t phys;
  118. void *ret;
  119. /* Align the allocation to the Xen page size */
  120. size = 1UL << (order + XEN_PAGE_SHIFT);
  121. ret = (void *)__get_free_pages(flags, get_order(size));
  122. if (!ret)
  123. return ret;
  124. phys = virt_to_phys(ret);
  125. *dma_handle = xen_phys_to_dma(dev, phys);
  126. if (*dma_handle + size - 1 > dma_mask ||
  127. range_straddles_page_boundary(phys, size)) {
  128. if (xen_create_contiguous_region(phys, order, fls64(dma_mask),
  129. dma_handle) != 0)
  130. goto out_free_pages;
  131. SetPageXenRemapped(virt_to_page(ret));
  132. }
  133. memset(ret, 0, size);
  134. return ret;
  135. out_free_pages:
  136. free_pages((unsigned long)ret, get_order(size));
  137. return NULL;
  138. }
  139. static void
  140. xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr,
  141. dma_addr_t dma_handle, unsigned long attrs)
  142. {
  143. phys_addr_t phys = virt_to_phys(vaddr);
  144. int order = get_order(size);
  145. /* Convert the size to actually allocated. */
  146. size = 1UL << (order + XEN_PAGE_SHIFT);
  147. if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) ||
  148. WARN_ON_ONCE(range_straddles_page_boundary(phys, size)))
  149. return;
  150. if (TestClearPageXenRemapped(virt_to_page(vaddr)))
  151. xen_destroy_contiguous_region(phys, order);
  152. free_pages((unsigned long)vaddr, get_order(size));
  153. }
  154. #endif /* CONFIG_X86 */
  155. /*
  156. * Map a single buffer of the indicated size for DMA in streaming mode. The
  157. * physical address to use is returned.
  158. *
  159. * Once the device is given the dma address, the device owns this memory until
  160. * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
  161. */
  162. static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
  163. unsigned long offset, size_t size,
  164. enum dma_data_direction dir,
  165. unsigned long attrs)
  166. {
  167. phys_addr_t map, phys = page_to_phys(page) + offset;
  168. dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
  169. BUG_ON(dir == DMA_NONE);
  170. /*
  171. * If the address happens to be in the device's DMA window,
  172. * we can safely return the device addr and not worry about bounce
  173. * buffering it.
  174. */
  175. if (dma_capable(dev, dev_addr, size, true) &&
  176. !range_straddles_page_boundary(phys, size) &&
  177. !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
  178. !is_swiotlb_force_bounce(dev))
  179. goto done;
  180. /*
  181. * Oh well, have to allocate and map a bounce buffer.
  182. */
  183. trace_swiotlb_bounced(dev, dev_addr, size);
  184. map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
  185. if (map == (phys_addr_t)DMA_MAPPING_ERROR)
  186. return DMA_MAPPING_ERROR;
  187. phys = map;
  188. dev_addr = xen_phys_to_dma(dev, map);
  189. /*
  190. * Ensure that the address returned is DMA'ble
  191. */
  192. if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
  193. swiotlb_tbl_unmap_single(dev, map, size, dir,
  194. attrs | DMA_ATTR_SKIP_CPU_SYNC);
  195. return DMA_MAPPING_ERROR;
  196. }
  197. done:
  198. if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
  199. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
  200. arch_sync_dma_for_device(phys, size, dir);
  201. else
  202. xen_dma_sync_for_device(dev, dev_addr, size, dir);
  203. }
  204. return dev_addr;
  205. }
  206. /*
  207. * Unmap a single streaming mode DMA translation. The dma_addr and size must
  208. * match what was provided for in a previous xen_swiotlb_map_page call. All
  209. * other usages are undefined.
  210. *
  211. * After this call, reads by the cpu to the buffer are guaranteed to see
  212. * whatever the device wrote there.
  213. */
  214. static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
  215. size_t size, enum dma_data_direction dir, unsigned long attrs)
  216. {
  217. phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
  218. BUG_ON(dir == DMA_NONE);
  219. if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
  220. if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
  221. arch_sync_dma_for_cpu(paddr, size, dir);
  222. else
  223. xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
  224. }
  225. /* NOTE: We use dev_addr here, not paddr! */
  226. if (is_xen_swiotlb_buffer(hwdev, dev_addr))
  227. swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
  228. }
  229. static void
  230. xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
  231. size_t size, enum dma_data_direction dir)
  232. {
  233. phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
  234. if (!dev_is_dma_coherent(dev)) {
  235. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
  236. arch_sync_dma_for_cpu(paddr, size, dir);
  237. else
  238. xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
  239. }
  240. if (is_xen_swiotlb_buffer(dev, dma_addr))
  241. swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
  242. }
  243. static void
  244. xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
  245. size_t size, enum dma_data_direction dir)
  246. {
  247. phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
  248. if (is_xen_swiotlb_buffer(dev, dma_addr))
  249. swiotlb_sync_single_for_device(dev, paddr, size, dir);
  250. if (!dev_is_dma_coherent(dev)) {
  251. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
  252. arch_sync_dma_for_device(paddr, size, dir);
  253. else
  254. xen_dma_sync_for_device(dev, dma_addr, size, dir);
  255. }
  256. }
  257. /*
  258. * Unmap a set of streaming mode DMA translations. Again, cpu read rules
  259. * concerning calls here are the same as for swiotlb_unmap_page() above.
  260. */
  261. static void
  262. xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
  263. enum dma_data_direction dir, unsigned long attrs)
  264. {
  265. struct scatterlist *sg;
  266. int i;
  267. BUG_ON(dir == DMA_NONE);
  268. for_each_sg(sgl, sg, nelems, i)
  269. xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
  270. dir, attrs);
  271. }
  272. static int
  273. xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
  274. enum dma_data_direction dir, unsigned long attrs)
  275. {
  276. struct scatterlist *sg;
  277. int i;
  278. BUG_ON(dir == DMA_NONE);
  279. for_each_sg(sgl, sg, nelems, i) {
  280. sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg),
  281. sg->offset, sg->length, dir, attrs);
  282. if (sg->dma_address == DMA_MAPPING_ERROR)
  283. goto out_unmap;
  284. sg_dma_len(sg) = sg->length;
  285. }
  286. return nelems;
  287. out_unmap:
  288. xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
  289. sg_dma_len(sgl) = 0;
  290. return -EIO;
  291. }
  292. static void
  293. xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
  294. int nelems, enum dma_data_direction dir)
  295. {
  296. struct scatterlist *sg;
  297. int i;
  298. for_each_sg(sgl, sg, nelems, i) {
  299. xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address,
  300. sg->length, dir);
  301. }
  302. }
  303. static void
  304. xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
  305. int nelems, enum dma_data_direction dir)
  306. {
  307. struct scatterlist *sg;
  308. int i;
  309. for_each_sg(sgl, sg, nelems, i) {
  310. xen_swiotlb_sync_single_for_device(dev, sg->dma_address,
  311. sg->length, dir);
  312. }
  313. }
  314. /*
  315. * Return whether the given device DMA address mask can be supported
  316. * properly. For example, if your device can only drive the low 24-bits
  317. * during bus mastering, then you would pass 0x00ffffff as the mask to
  318. * this function.
  319. */
  320. static int
  321. xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
  322. {
  323. return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
  324. }
  325. const struct dma_map_ops xen_swiotlb_dma_ops = {
  326. #ifdef CONFIG_X86
  327. .alloc = xen_swiotlb_alloc_coherent,
  328. .free = xen_swiotlb_free_coherent,
  329. #else
  330. .alloc = dma_direct_alloc,
  331. .free = dma_direct_free,
  332. #endif
  333. .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
  334. .sync_single_for_device = xen_swiotlb_sync_single_for_device,
  335. .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
  336. .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
  337. .map_sg = xen_swiotlb_map_sg,
  338. .unmap_sg = xen_swiotlb_unmap_sg,
  339. .map_page = xen_swiotlb_map_page,
  340. .unmap_page = xen_swiotlb_unmap_page,
  341. .dma_supported = xen_swiotlb_dma_supported,
  342. .mmap = dma_common_mmap,
  343. .get_sgtable = dma_common_get_sgtable,
  344. .alloc_pages = dma_common_alloc_pages,
  345. .free_pages = dma_common_free_pages,
  346. .max_mapping_size = swiotlb_max_mapping_size,
  347. };