Merge tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - add dma-mapping and block layer helpers to take care of IOMMU merging
   for mmc plus subsequent fixups (Yoshihiro Shimoda)

 - rework handling of the pgprot bits for remapping (me)

 - take care of the dma direct infrastructure for swiotlb-xen (me)

 - improve the dma noncoherent remapping infrastructure (me)

 - better defaults for ->mmap, ->get_sgtable and ->get_required_mask
   (me)

 - cleanup mmaping of coherent DMA allocations (me)

 - various misc cleanups (Andy Shevchenko, me)

* tag 'dma-mapping-5.4' of git://git.infradead.org/users/hch/dma-mapping: (41 commits)
  mmc: renesas_sdhi_internal_dmac: Add MMC_CAP2_MERGE_CAPABLE
  mmc: queue: Fix bigger segments usage
  arm64: use asm-generic/dma-mapping.h
  swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page
  swiotlb-xen: simplify cache maintainance
  swiotlb-xen: use the same foreign page check everywhere
  swiotlb-xen: remove xen_swiotlb_dma_mmap and xen_swiotlb_dma_get_sgtable
  xen: remove the exports for xen_{create,destroy}_contiguous_region
  xen/arm: remove xen_dma_ops
  xen/arm: simplify dma_cache_maint
  xen/arm: use dev_is_dma_coherent
  xen/arm: consolidate page-coherent.h
  xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance
  arm: remove wrappers for the generic dma remap helpers
  dma-mapping: introduce a dma_common_find_pages helper
  dma-mapping: always use VM_DMA_COHERENT for generic DMA remap
  vmalloc: lift the arm flag for coherent mappings to common code
  dma-mapping: provide a better default ->get_required_mask
  dma-mapping: remove the dma_declare_coherent_memory export
  remoteproc: don't allow modular build
  ...
This commit is contained in:
Linus Torvalds
2019-09-19 13:27:23 -07:00
當前提交 671df18953
共有 73 個文件被更改,包括 397 次插入674 次删除

查看文件

@@ -8,7 +8,7 @@ config ARM
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD

查看文件

@@ -14,9 +14,6 @@ struct dev_archdata {
#endif
#ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping;
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif
unsigned int dma_coherent:1;
unsigned int dma_ops_setup:1;

查看文件

@@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
}
#endif
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
return dev->archdata.dma_coherent;
}
/**
* arm_dma_alloc - allocate consistent memory for DMA
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices

查看文件

@@ -62,7 +62,6 @@ typedef pte_t *pte_addr_t;
*/
#define pgprot_noncached(prot) (prot)
#define pgprot_writecombine(prot) (prot)
#define pgprot_dmacoherent(prot) (prot)
#define pgprot_device(prot) (prot)

查看文件

@@ -1,95 +1,2 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
#define _ASM_ARM_XEN_PAGE_COHERENT_H
#include <linux/dma-mapping.h>
#include <asm/page.h>
#include <xen/arm/page-coherent.h>
static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dev_dma_ops)
return dev->archdata.dev_dma_ops;
return get_arch_dma_ops(NULL);
}
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page_pfn = page_to_xen_pfn(page);
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
unsigned long compound_pages =
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
bool local = (page_pfn <= dev_pfn) &&
(dev_pfn - page_pfn < compound_pages);
/*
* Dom0 is mapped 1:1, while the Linux page can span across
* multiple Xen pages, it's not possible for it to contain a
* mix of local and foreign Xen pages. So if the first xen_pfn
* == mfn the page is local otherwise it's a foreign page
* grant-mapped in dom0. If the page is local we can safely
* call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (local)
xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/*
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
* multiple Xen page, it's not possible to have a mix of local and
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
* foreign mfn will always return false. If the page is local we can
* safely call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->unmap_page)
xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
} else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_device)
xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */

查看文件

@@ -68,8 +68,9 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
return ret;
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
return -ENXIO;
}

查看文件

@@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/dma-contiguous.h>
@@ -35,6 +36,7 @@
#include <asm/mach/map.h>
#include <asm/system_info.h>
#include <asm/dma-contiguous.h>
#include <xen/swiotlb-xen.h>
#include "dma.h"
#include "mm.h"
@@ -192,6 +194,7 @@ const struct dma_map_ops arm_dma_ops = {
.sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
.sync_sg_for_device = arm_dma_sync_sg_for_device,
.dma_supported = arm_dma_supported,
.get_required_mask = dma_direct_get_required_mask,
};
EXPORT_SYMBOL(arm_dma_ops);
@@ -212,6 +215,7 @@ const struct dma_map_ops arm_coherent_dma_ops = {
.map_sg = arm_dma_map_sg,
.map_resource = dma_direct_map_resource,
.dma_supported = arm_dma_supported,
.get_required_mask = dma_direct_get_required_mask,
};
EXPORT_SYMBOL(arm_coherent_dma_ops);
@@ -336,25 +340,6 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr);
static void *
__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
{
/*
* DMA allocation can be mapped to user space, so lets
* set VM_USERMAP flags too.
*/
return dma_common_contiguous_remap(page, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP,
prot, caller);
}
static void __dma_free_remap(void *cpu_addr, size_t size)
{
dma_common_free_remap(cpu_addr, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP);
}
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static struct gen_pool *atomic_pool __ro_after_init;
@@ -510,7 +495,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
if (!want_vaddr)
goto out;
ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
__dma_free_buffer(page, size);
return NULL;
@@ -577,7 +562,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
goto out;
if (PageHighMem(page)) {
ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
dma_release_from_contiguous(dev, page, count);
return NULL;
@@ -597,7 +582,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
{
if (want_vaddr) {
if (PageHighMem(page))
__dma_free_remap(cpu_addr, size);
dma_common_free_remap(cpu_addr, size);
else
__dma_remap(page, size, PAGE_KERNEL);
}
@@ -689,7 +674,7 @@ static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
static void remap_allocator_free(struct arm_dma_free_args *args)
{
if (args->want_vaddr)
__dma_free_remap(args->cpu_addr, args->size);
dma_common_free_remap(args->cpu_addr, args->size);
__dma_free_buffer(args->page, args->size);
}
@@ -877,17 +862,6 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
}
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
* that the intention is to allow exporting memory allocated via the
* coherent DMA APIs through the dma_buf API, which only accepts a
* scattertable. This presents a couple of problems:
* 1. Not all memory allocated via the coherent DMA APIs is backed by
* a struct page
* 2. Passing coherent DMA memory into the streaming APIs is not allowed
* as we will try to flush the memory through a different alias to that
* actually being used (and the flushes are redundant.)
*/
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
unsigned long attrs)
@@ -1132,10 +1106,6 @@ static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
* 32-bit DMA.
* Use the generic dma-direct / swiotlb ops code in that case, as that
* handles bounce buffering for us.
*
* Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the
* latter is also selected by the Xen code, but that code for now relies
* on non-NULL dev_dma_ops. To be cleaned up later.
*/
if (IS_ENABLED(CONFIG_ARM_LPAE))
return NULL;
@@ -1372,17 +1342,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
return 0;
}
/*
* Create a CPU mapping for a specified pages
*/
static void *
__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
{
return dma_common_pages_remap(pages, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
}
/*
* Create a mapping in device IO address space for specified pages
*/
@@ -1455,18 +1414,13 @@ static struct page **__atomic_get_pages(void *addr)
static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
{
struct vm_struct *area;
if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
return __atomic_get_pages(cpu_addr);
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return cpu_addr;
area = find_vm_area(cpu_addr);
if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
return area->pages;
return NULL;
return dma_common_find_pages(cpu_addr);
}
static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
@@ -1539,7 +1493,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return pages;
addr = __iommu_alloc_remap(pages, size, gfp, prot,
addr = dma_common_pages_remap(pages, size, prot,
__builtin_return_address(0));
if (!addr)
goto err_mapping;
@@ -1622,10 +1576,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
}
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
dma_common_free_remap(cpu_addr, size,
VM_ARM_DMA_CONSISTENT | VM_USERMAP);
}
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
dma_common_free_remap(cpu_addr, size);
__iommu_remove_mapping(dev, handle, size);
__iommu_free_buffer(dev, pages, size, attrs);
@@ -2363,10 +2315,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
set_dma_ops(dev, dma_ops);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
if (xen_initial_domain())
dev->dma_ops = &xen_swiotlb_dma_ops;
#endif
dev->archdata.dma_ops_setup = true;
}
@@ -2402,12 +2352,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
return dma_to_pfn(dev, dma_addr);
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
return __get_dma_pgprot(attrs, prot);
}
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{

查看文件

@@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
#define VM_ARM_MTYPE(mt) ((mt) << 20)
#define VM_ARM_MTYPE_MASK (0x1f << 20)
/* consistent regions used by dma_alloc_attrs() */
#define VM_ARM_DMA_CONSISTENT 0x20000000
struct static_vm {
struct vm_struct vm;

查看文件

@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/cpu.h>
#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/export.h>
@@ -35,105 +35,56 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order)
return __get_free_pages(flags, order);
}
enum dma_cache_op {
DMA_UNMAP,
DMA_MAP,
};
static bool hypercall_cflush = false;
/* functions called by SWIOTLB */
static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
size_t size, enum dma_data_direction dir, enum dma_cache_op op)
/* buffers in highmem or foreign pages cannot cross page boundaries */
static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
{
struct gnttab_cache_flush cflush;
unsigned long xen_pfn;
size_t left = size;
xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
offset %= XEN_PAGE_SIZE;
cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;
cflush.offset = xen_offset_in_page(handle);
cflush.op = op;
do {
size_t len = left;
/* buffers in highmem or foreign pages cannot cross page
* boundaries */
if (len + offset > XEN_PAGE_SIZE)
len = XEN_PAGE_SIZE - offset;
if (size + cflush.offset > XEN_PAGE_SIZE)
cflush.length = XEN_PAGE_SIZE - cflush.offset;
else
cflush.length = size;
cflush.op = 0;
cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
cflush.offset = offset;
cflush.length = len;
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
if (op == DMA_MAP) {
if (dir == DMA_FROM_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
else
cflush.op = GNTTAB_CACHE_CLEAN;
}
if (cflush.op)
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
offset = 0;
xen_pfn++;
left -= len;
} while (left);
cflush.offset = 0;
cflush.a.dev_bus_addr += cflush.length;
size -= cflush.length;
} while (size);
}
static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
/*
* Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen
* pages, it is not possible for it to contain a mix of local and foreign Xen
* pages. Calling pfn_valid on a foreign mfn will always return false, so if
* pfn_valid returns true the pages is local and we can use the native
* dma-direct functions, otherwise we call the Xen specific version.
*/
void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP);
if (pfn_valid(PFN_DOWN(handle)))
arch_sync_dma_for_cpu(dev, paddr, size, dir);
else if (dir != DMA_TO_DEVICE)
dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
}
static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
phys_addr_t paddr, size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP);
}
void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
if (is_device_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
__xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
}
void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
if (is_device_dma_coherent(hwdev))
return;
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
if (pfn_valid(PFN_DOWN(handle)))
arch_sync_dma_for_device(dev, paddr, size, dir);
else if (dir == DMA_FROM_DEVICE)
dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
else
dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
}
bool xen_arch_need_swiotlb(struct device *dev,
@@ -159,7 +110,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
* memory and we are not able to flush the cache.
*/
return (!hypercall_cflush && (xen_pfn != bfn) &&
!is_device_dma_coherent(dev));
!dev_is_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
@@ -173,16 +124,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
*dma_handle = pstart;
return 0;
}
EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
return;
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
const struct dma_map_ops *xen_dma_ops;
EXPORT_SYMBOL(xen_dma_ops);
int __init xen_mm_init(void)
{
@@ -190,7 +136,6 @@ int __init xen_mm_init(void)
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
xen_dma_ops = &xen_swiotlb_dma_ops;
cflush.op = 0;
cflush.a.dev_bus_addr = 0;