dma-mapping: bypass indirect calls for dma-direct
Avoid expensive indirect calls in the fast path DMA mapping operations by directly calling the dma_direct_* ops if we are using the directly mapped DMA operations. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Tested-by: Jesper Dangaard Brouer <brouer@redhat.com> Tested-by: Tony Luck <tony.luck@intel.com>
此提交包含在:
@@ -223,6 +223,7 @@ void dma_direct_sync_single_for_device(struct device *dev,
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_device(dev, paddr, size, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_sync_single_for_device);
|
||||
|
||||
void dma_direct_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
@@ -240,6 +241,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
|
||||
dir);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
|
||||
@@ -258,6 +260,7 @@ void dma_direct_sync_single_for_cpu(struct device *dev,
|
||||
if (unlikely(is_swiotlb_buffer(paddr)))
|
||||
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
|
||||
|
||||
void dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
|
||||
@@ -277,6 +280,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
arch_sync_dma_for_cpu_all(dev);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
|
||||
|
||||
void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
@@ -289,6 +293,7 @@ void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
|
||||
if (unlikely(is_swiotlb_buffer(phys)))
|
||||
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_unmap_page);
|
||||
|
||||
void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
@@ -300,11 +305,7 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
|
||||
attrs);
|
||||
}
|
||||
#else
|
||||
void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_unmap_sg);
|
||||
#endif
|
||||
|
||||
static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
|
||||
@@ -331,6 +332,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
|
||||
arch_sync_dma_for_device(dev, phys, size, dir);
|
||||
return dma_addr;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_map_page);
|
||||
|
||||
int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
@@ -352,6 +354,7 @@ out_unmap:
|
||||
dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_direct_map_sg);
|
||||
|
||||
/*
|
||||
* Because 32-bit DMA masks are so common we expect every architecture to be
|
||||
@@ -372,27 +375,3 @@ int dma_direct_supported(struct device *dev, u64 mask)
|
||||
|
||||
return mask >= phys_to_dma(dev, min_mask);
|
||||
}
|
||||
|
||||
const struct dma_map_ops dma_direct_ops = {
|
||||
.alloc = dma_direct_alloc,
|
||||
.free = dma_direct_free,
|
||||
.map_page = dma_direct_map_page,
|
||||
.map_sg = dma_direct_map_sg,
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
|
||||
defined(CONFIG_SWIOTLB)
|
||||
.sync_single_for_device = dma_direct_sync_single_for_device,
|
||||
.sync_sg_for_device = dma_direct_sync_sg_for_device,
|
||||
#endif
|
||||
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
|
||||
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
|
||||
defined(CONFIG_SWIOTLB)
|
||||
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
|
||||
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
|
||||
.unmap_page = dma_direct_unmap_page,
|
||||
.unmap_sg = dma_direct_unmap_sg,
|
||||
#endif
|
||||
.get_required_mask = dma_direct_get_required_mask,
|
||||
.dma_supported = dma_direct_supported,
|
||||
.cache_sync = arch_dma_cache_sync,
|
||||
};
|
||||
EXPORT_SYMBOL(dma_direct_ops);
|
||||
|
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <linux/memblock.h> /* for max_pfn */
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/dma-direct.h>
|
||||
#include <linux/dma-noncoherent.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp.h>
|
||||
@@ -229,8 +230,8 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
|
||||
unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
BUG_ON(!ops);
|
||||
if (ops->get_sgtable)
|
||||
|
||||
if (!dma_is_direct(ops) && ops->get_sgtable)
|
||||
return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
|
||||
attrs);
|
||||
return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
|
||||
@@ -293,8 +294,8 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
|
||||
unsigned long attrs)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
BUG_ON(!ops);
|
||||
if (ops->mmap)
|
||||
|
||||
if (!dma_is_direct(ops) && ops->mmap)
|
||||
return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
|
||||
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
|
||||
}
|
||||
@@ -324,6 +325,8 @@ u64 dma_get_required_mask(struct device *dev)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_is_direct(ops))
|
||||
return dma_direct_get_required_mask(dev);
|
||||
if (ops->get_required_mask)
|
||||
return ops->get_required_mask(dev);
|
||||
return dma_default_get_required_mask(dev);
|
||||
@@ -341,7 +344,6 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
void *cpu_addr;
|
||||
|
||||
BUG_ON(!ops);
|
||||
WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
|
||||
|
||||
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
|
||||
@@ -352,10 +354,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
||||
|
||||
if (!arch_dma_alloc_attrs(&dev))
|
||||
return NULL;
|
||||
if (!ops->alloc)
|
||||
|
||||
if (dma_is_direct(ops))
|
||||
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
|
||||
else if (ops->alloc)
|
||||
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
|
||||
else
|
||||
return NULL;
|
||||
|
||||
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
|
||||
debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
|
||||
return cpu_addr;
|
||||
}
|
||||
@@ -366,8 +372,6 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
BUG_ON(!ops);
|
||||
|
||||
if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
|
||||
return;
|
||||
/*
|
||||
@@ -379,11 +383,14 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
|
||||
*/
|
||||
WARN_ON(irqs_disabled());
|
||||
|
||||
if (!ops->free || !cpu_addr)
|
||||
if (!cpu_addr)
|
||||
return;
|
||||
|
||||
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
|
||||
ops->free(dev, size, cpu_addr, dma_handle, attrs);
|
||||
if (dma_is_direct(ops))
|
||||
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
|
||||
else if (ops->free)
|
||||
ops->free(dev, size, cpu_addr, dma_handle, attrs);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_free_attrs);
|
||||
|
||||
@@ -397,9 +404,9 @@ int dma_supported(struct device *dev, u64 mask)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (!ops)
|
||||
return 0;
|
||||
if (!ops->dma_supported)
|
||||
if (dma_is_direct(ops))
|
||||
return dma_direct_supported(dev, mask);
|
||||
if (ops->dma_supported)
|
||||
return 1;
|
||||
return ops->dma_supported(dev, mask);
|
||||
}
|
||||
@@ -437,7 +444,10 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
BUG_ON(!valid_dma_direction(dir));
|
||||
if (ops->cache_sync)
|
||||
|
||||
if (dma_is_direct(ops))
|
||||
arch_dma_cache_sync(dev, vaddr, size, dir);
|
||||
else if (ops->cache_sync)
|
||||
ops->cache_sync(dev, vaddr, size, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_cache_sync);
|
||||
|
新增問題並參考
封鎖使用者