Merge tag 'dma-mapping-4.16' of git://git.infradead.org/users/hch/dma-mapping

Pull dma mapping updates from Christoph Hellwig:
 "Except for a runtime warning fix from Christian this is all about
  consolidation of the generic no-IOMMU code, a well as the glue code
  for swiotlb.

  All the code is based on the x86 implementation with hooks to allow
  all architectures that aren't cache coherent to use it.

  The x86 conversion itself has been deferred because the x86
  maintainers were a little busy in the last months"

* tag 'dma-mapping-4.16' of git://git.infradead.org/users/hch/dma-mapping: (57 commits)
  MAINTAINERS: add the iommu list for swiotlb and xen-swiotlb
  arm64: use swiotlb_alloc and swiotlb_free
  arm64: replace ZONE_DMA with ZONE_DMA32
  mips: use swiotlb_{alloc,free}
  mips/netlogic: remove swiotlb support
  tile: use generic swiotlb_ops
  tile: replace ZONE_DMA with ZONE_DMA32
  unicore32: use generic swiotlb_ops
  ia64: remove an ifdef around the content of pci-dma.c
  ia64: clean up swiotlb support
  ia64: use generic swiotlb_ops
  ia64: replace ZONE_DMA with ZONE_DMA32
  swiotlb: remove various exports
  swiotlb: refactor coherent buffer allocation
  swiotlb: refactor coherent buffer freeing
  swiotlb: wire up ->dma_supported in swiotlb_dma_ops
  swiotlb: add common swiotlb_map_ops
  swiotlb: rename swiotlb_free to swiotlb_exit
  x86: rename swiotlb_dma_ops
  powerpc: rename swiotlb_dma_ops
  ...
This commit is contained in:
Linus Torvalds
2018-01-31 11:32:27 -08:00
119 changed files with 731 additions and 1269 deletions

View File

@@ -413,7 +413,7 @@ config SGL_ALLOC
bool
default n
config DMA_NOOP_OPS
config DMA_DIRECT_OPS
bool
depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
default n

View File

@@ -28,7 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
lib-y += kobject.o klist.o

156
lib/dma-direct.c Normal file
View File

@@ -0,0 +1,156 @@
// SPDX-License-Identifier: GPL-2.0
/*
* DMA operations that map physical memory directly without using an IOMMU or
* flushing caches.
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/dma-direct.h>
#include <linux/scatterlist.h>
#include <linux/dma-contiguous.h>
#include <linux/pfn.h>
#define DIRECT_MAPPING_ERROR 0
/*
* Most architectures use ZONE_DMA for the first 16 Megabytes, but
* some use it for entirely different regions:
*/
#ifndef ARCH_ZONE_DMA_BITS
#define ARCH_ZONE_DMA_BITS 24
#endif
static bool
check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
const char *caller)
{
if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
dev_err(dev,
"%s: overflow %pad+%zu of device mask %llx\n",
caller, &dma_addr, size, *dev->dma_mask);
}
return false;
}
return true;
}
static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
}
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
gfp |= GFP_DMA;
if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
gfp |= GFP_DMA32;
again:
/* CMA can be used only in the context which permits sleeping */
if (gfpflags_allow_blocking(gfp)) {
page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
}
}
if (!page)
page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
__free_pages(page, page_order);
page = NULL;
if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
!(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
}
}
if (!page)
return NULL;
*dma_handle = phys_to_dma(dev, page_to_phys(page));
memset(page_address(page), 0, size);
return page_address(page);
}
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
free_pages((unsigned long)cpu_addr, get_order(size));
}
static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
if (!check_addr(dev, dma_addr, size, __func__))
return DIRECT_MAPPING_ERROR;
return dma_addr;
}
static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
BUG_ON(!sg_page(sg));
sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
return 0;
sg_dma_len(sg) = sg->length;
}
return nents;
}
int dma_direct_supported(struct device *dev, u64 mask)
{
#ifdef CONFIG_ZONE_DMA
if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
return 0;
#else
/*
* Because 32-bit DMA masks are so common we expect every architecture
* to be able to satisfy them - either by not supporting more physical
* memory, or by providing a ZONE_DMA32. If neither is the case, the
* architecture needs to use an IOMMU instead of the direct mapping.
*/
if (mask < DMA_BIT_MASK(32))
return 0;
#endif
return 1;
}
static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == DIRECT_MAPPING_ERROR;
}
const struct dma_map_ops dma_direct_ops = {
.alloc = dma_direct_alloc,
.free = dma_direct_free,
.map_page = dma_direct_map_page,
.map_sg = dma_direct_map_sg,
.dma_supported = dma_direct_supported,
.mapping_error = dma_direct_mapping_error,
};
EXPORT_SYMBOL(dma_direct_ops);

View File

@@ -1,68 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* lib/dma-noop.c
*
* DMA operations that map to physical addresses without flushing memory.
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/pfn.h>
static void *dma_noop_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
unsigned long attrs)
{
void *ret;
ret = (void *)__get_free_pages(gfp, get_order(size));
if (ret)
*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
return ret;
}
static void dma_noop_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
unsigned long attrs)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset);
}
static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir,
unsigned long attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
void *va;
BUG_ON(!sg_page(sg));
va = sg_virt(sg);
sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset;
sg_dma_len(sg) = sg->length;
}
return nents;
}
const struct dma_map_ops dma_noop_ops = {
.alloc = dma_noop_alloc,
.free = dma_noop_free,
.map_page = dma_noop_map_page,
.map_sg = dma_noop_map_sg,
};
EXPORT_SYMBOL(dma_noop_ops);

View File

@@ -18,7 +18,7 @@
*/
#include <linux/cache.h>
#include <linux/dma-mapping.h>
#include <linux/dma-direct.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -417,7 +417,7 @@ cleanup2:
return -ENOMEM;
}
void __init swiotlb_free(void)
void __init swiotlb_exit(void)
{
if (!io_tlb_orig_addr)
return;
@@ -586,7 +586,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
not_found:
spin_unlock_irqrestore(&io_tlb_lock, flags);
if (printk_ratelimit())
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
return SWIOTLB_MAP_ERROR;
found:
@@ -605,7 +605,6 @@ found:
return tlb_addr;
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
/*
* Allocates bounce buffer and returns its kernel virtual address.
@@ -675,7 +674,6 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
}
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
@@ -707,92 +705,107 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
BUG();
}
}
EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
size_t size)
{
u64 mask = DMA_BIT_MASK(32);
if (dev && dev->coherent_dma_mask)
mask = dev->coherent_dma_mask;
return addr + size - 1 <= mask;
}
static void *
swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
unsigned long attrs)
{
phys_addr_t phys_addr;
if (swiotlb_force == SWIOTLB_NO_FORCE)
goto out_warn;
phys_addr = swiotlb_tbl_map_single(dev,
swiotlb_phys_to_dma(dev, io_tlb_start),
0, size, DMA_FROM_DEVICE, 0);
if (phys_addr == SWIOTLB_MAP_ERROR)
goto out_warn;
*dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
if (dma_coherent_ok(dev, *dma_handle, size))
goto out_unmap;
memset(phys_to_virt(phys_addr), 0, size);
return phys_to_virt(phys_addr);
out_unmap:
dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
(unsigned long long)(dev ? dev->coherent_dma_mask : 0),
(unsigned long long)*dma_handle);
/*
* DMA_TO_DEVICE to avoid memcpy in unmap_single.
* DMA_ATTR_SKIP_CPU_SYNC is optional.
*/
swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
out_warn:
if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
dev_warn(dev,
"swiotlb: coherent allocation failed, size=%zu\n",
size);
dump_stack();
}
return NULL;
}
void *
swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
dma_addr_t dev_addr;
void *ret;
int order = get_order(size);
u64 dma_mask = DMA_BIT_MASK(32);
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
void *ret;
ret = (void *)__get_free_pages(flags, order);
if (ret) {
dev_addr = swiotlb_virt_to_bus(hwdev, ret);
if (dev_addr + size - 1 > dma_mask) {
/*
* The allocated memory isn't reachable by the device.
*/
free_pages((unsigned long) ret, order);
ret = NULL;
}
}
if (!ret) {
/*
* We are either out of memory or the device can't DMA to
* GFP_DMA memory; fall back on map_single(), which
* will grab memory from the lowest available address range.
*/
phys_addr_t paddr = map_single(hwdev, 0, size,
DMA_FROM_DEVICE, 0);
if (paddr == SWIOTLB_MAP_ERROR)
goto err_warn;
ret = phys_to_virt(paddr);
dev_addr = swiotlb_phys_to_dma(hwdev, paddr);
/* Confirm address can be DMA'd by device */
if (dev_addr + size - 1 > dma_mask) {
printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
(unsigned long long)dma_mask,
(unsigned long long)dev_addr);
/*
* DMA_TO_DEVICE to avoid memcpy in unmap_single.
* The DMA_ATTR_SKIP_CPU_SYNC is optional.
*/
swiotlb_tbl_unmap_single(hwdev, paddr,
size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
goto err_warn;
*dma_handle = swiotlb_virt_to_bus(hwdev, ret);
if (dma_coherent_ok(hwdev, *dma_handle, size)) {
memset(ret, 0, size);
return ret;
}
free_pages((unsigned long)ret, order);
}
*dma_handle = dev_addr;
memset(ret, 0, size);
return ret;
err_warn:
pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n",
dev_name(hwdev), size);
dump_stack();
return NULL;
return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
}
EXPORT_SYMBOL(swiotlb_alloc_coherent);
static bool swiotlb_free_buffer(struct device *dev, size_t size,
dma_addr_t dma_addr)
{
phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
WARN_ON_ONCE(irqs_disabled());
if (!is_swiotlb_buffer(phys_addr))
return false;
/*
* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
* DMA_ATTR_SKIP_CPU_SYNC is optional.
*/
swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
return true;
}
void
swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dev_addr)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
WARN_ON(irqs_disabled());
if (!is_swiotlb_buffer(paddr))
if (!swiotlb_free_buffer(hwdev, size, dev_addr))
free_pages((unsigned long)vaddr, get_order(size));
else
/*
* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
* DMA_ATTR_SKIP_CPU_SYNC is optional.
*/
swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC);
}
EXPORT_SYMBOL(swiotlb_free_coherent);
@@ -868,7 +881,6 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
}
EXPORT_SYMBOL_GPL(swiotlb_map_page);
/*
* Unmap a single streaming mode DMA translation. The dma_addr and size must
@@ -909,7 +921,6 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
{
unmap_single(hwdev, dev_addr, size, dir, attrs);
}
EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
/*
* Make physical memory consistent for a single streaming mode DMA translation
@@ -947,7 +958,6 @@ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
}
EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
@@ -955,7 +965,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
{
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
}
EXPORT_SYMBOL(swiotlb_sync_single_for_device);
/*
* Map a set of buffers described by scatterlist in streaming mode for DMA.
@@ -1007,7 +1016,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
}
return nelems;
}
EXPORT_SYMBOL(swiotlb_map_sg_attrs);
/*
* Unmap a set of streaming mode DMA translations. Again, cpu read rules
@@ -1027,7 +1035,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
attrs);
}
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
/*
* Make physical memory consistent for a set of streaming mode DMA translations
@@ -1055,7 +1062,6 @@ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
}
EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
void
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
@@ -1063,14 +1069,12 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
{
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
}
EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
int
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
{
return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
}
EXPORT_SYMBOL(swiotlb_dma_mapping_error);
/*
* Return whether the given device DMA address mask can be supported
@@ -1083,4 +1087,49 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
EXPORT_SYMBOL(swiotlb_dma_supported);
#ifdef CONFIG_DMA_DIRECT_OPS
void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
void *vaddr;
/* temporary workaround: */
if (gfp & __GFP_NOWARN)
attrs |= DMA_ATTR_NO_WARN;
/*
* Don't print a warning when the first allocation attempt fails.
* swiotlb_alloc_coherent() will print a warning when the DMA memory
* allocation ultimately failed.
*/
gfp |= __GFP_NOWARN;
vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
if (!vaddr)
vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
return vaddr;
}
void swiotlb_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
if (!swiotlb_free_buffer(dev, size, dma_addr))
dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
const struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc = swiotlb_alloc,
.free = swiotlb_free,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.map_sg = swiotlb_map_sg_attrs,
.unmap_sg = swiotlb_unmap_sg_attrs,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.dma_supported = swiotlb_dma_supported,
};
#endif /* CONFIG_DMA_DIRECT_OPS */