sparc: Make sparc64 use scalable lib/iommu-common.c functions
In iperf experiments running linux as the Tx side (TCP client) with 10 threads results in a severe performance drop when TSO is disabled, indicating a weakness in the software that can be avoided by using the scalable IOMMU arena DMA allocation. Baseline numbers before this patch: with default settings (TSO enabled) : 9-9.5 Gbps Disable TSO using ethtool- drops badly: 2-3 Gbps. After this patch, iperf client with 10 threads, can give a throughput of at least 8.5 Gbps, even when TSO is disabled. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
ff7d37a502
commit
bb620c3d39
@@ -13,6 +13,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/iommu-helper.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/iommu-common.h>
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
#include <linux/pci.h>
|
||||
@@ -45,8 +46,9 @@
|
||||
"i" (ASI_PHYS_BYPASS_EC_E))
|
||||
|
||||
/* Must be invoked under the IOMMU lock. */
|
||||
static void iommu_flushall(struct iommu *iommu)
|
||||
static void iommu_flushall(struct iommu_map_table *iommu_map_table)
|
||||
{
|
||||
struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
|
||||
if (iommu->iommu_flushinv) {
|
||||
iommu_write(iommu->iommu_flushinv, ~(u64)0);
|
||||
} else {
|
||||
@@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
|
||||
iopte_val(*iopte) = val;
|
||||
}
|
||||
|
||||
/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
|
||||
* facility it must all be done in one pass while under the iommu lock.
|
||||
*
|
||||
* On sun4u platforms, we only flush the IOMMU once every time we've passed
|
||||
* over the entire page table doing allocations. Therefore we only ever advance
|
||||
* the hint and cannot backtrack it.
|
||||
*/
|
||||
unsigned long iommu_range_alloc(struct device *dev,
|
||||
struct iommu *iommu,
|
||||
unsigned long npages,
|
||||
unsigned long *handle)
|
||||
{
|
||||
unsigned long n, end, start, limit, boundary_size;
|
||||
struct iommu_arena *arena = &iommu->arena;
|
||||
int pass = 0;
|
||||
|
||||
/* This allocator was derived from x86_64's bit string search */
|
||||
|
||||
/* Sanity check */
|
||||
if (unlikely(npages == 0)) {
|
||||
if (printk_ratelimit())
|
||||
WARN_ON(1);
|
||||
return DMA_ERROR_CODE;
|
||||
}
|
||||
|
||||
if (handle && *handle)
|
||||
start = *handle;
|
||||
else
|
||||
start = arena->hint;
|
||||
|
||||
limit = arena->limit;
|
||||
|
||||
/* The case below can happen if we have a small segment appended
|
||||
* to a large, or when the previous alloc was at the very end of
|
||||
* the available space. If so, go back to the beginning and flush.
|
||||
*/
|
||||
if (start >= limit) {
|
||||
start = 0;
|
||||
if (iommu->flush_all)
|
||||
iommu->flush_all(iommu);
|
||||
}
|
||||
|
||||
again:
|
||||
|
||||
if (dev)
|
||||
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
|
||||
1 << IO_PAGE_SHIFT);
|
||||
else
|
||||
boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
|
||||
|
||||
n = iommu_area_alloc(arena->map, limit, start, npages,
|
||||
iommu->page_table_map_base >> IO_PAGE_SHIFT,
|
||||
boundary_size >> IO_PAGE_SHIFT, 0);
|
||||
if (n == -1) {
|
||||
if (likely(pass < 1)) {
|
||||
/* First failure, rescan from the beginning. */
|
||||
start = 0;
|
||||
if (iommu->flush_all)
|
||||
iommu->flush_all(iommu);
|
||||
pass++;
|
||||
goto again;
|
||||
} else {
|
||||
/* Second failure, give up */
|
||||
return DMA_ERROR_CODE;
|
||||
}
|
||||
}
|
||||
|
||||
end = n + npages;
|
||||
|
||||
arena->hint = end;
|
||||
|
||||
/* Update handle for SG allocations */
|
||||
if (handle)
|
||||
*handle = end;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
|
||||
{
|
||||
struct iommu_arena *arena = &iommu->arena;
|
||||
unsigned long entry;
|
||||
|
||||
entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
|
||||
|
||||
bitmap_clear(arena->map, entry, npages);
|
||||
}
|
||||
|
||||
int iommu_table_init(struct iommu *iommu, int tsbsize,
|
||||
u32 dma_offset, u32 dma_addr_mask,
|
||||
int numa_node)
|
||||
@@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
|
||||
/* Setup initial software IOMMU state. */
|
||||
spin_lock_init(&iommu->lock);
|
||||
iommu->ctx_lowest_free = 1;
|
||||
iommu->page_table_map_base = dma_offset;
|
||||
iommu->tbl.table_map_base = dma_offset;
|
||||
iommu->dma_addr_mask = dma_addr_mask;
|
||||
|
||||
/* Allocate and initialize the free area map. */
|
||||
sz = num_tsb_entries / 8;
|
||||
sz = (sz + 7UL) & ~7UL;
|
||||
iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
|
||||
if (!iommu->arena.map) {
|
||||
printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
|
||||
iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
|
||||
if (!iommu->tbl.map)
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(iommu->arena.map, 0, sz);
|
||||
iommu->arena.limit = num_tsb_entries;
|
||||
memset(iommu->tbl.map, 0, sz);
|
||||
|
||||
if (tlb_type != hypervisor)
|
||||
iommu->flush_all = iommu_flushall;
|
||||
iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
|
||||
(tlb_type != hypervisor ? iommu_flushall : NULL),
|
||||
false, 1, false);
|
||||
|
||||
/* Allocate and initialize the dummy page which we
|
||||
* set inactive IO PTEs to point to.
|
||||
@@ -235,18 +147,20 @@ out_free_dummy_page:
|
||||
iommu->dummy_page = 0UL;
|
||||
|
||||
out_free_map:
|
||||
kfree(iommu->arena.map);
|
||||
iommu->arena.map = NULL;
|
||||
kfree(iommu->tbl.map);
|
||||
iommu->tbl.map = NULL;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
|
||||
static inline iopte_t *alloc_npages(struct device *dev,
|
||||
struct iommu *iommu,
|
||||
unsigned long npages)
|
||||
{
|
||||
unsigned long entry;
|
||||
|
||||
entry = iommu_range_alloc(dev, iommu, npages, NULL);
|
||||
entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
|
||||
(unsigned long)(-1), 0);
|
||||
if (unlikely(entry == DMA_ERROR_CODE))
|
||||
return NULL;
|
||||
|
||||
@@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_addrp, gfp_t gfp,
|
||||
struct dma_attrs *attrs)
|
||||
{
|
||||
unsigned long flags, order, first_page;
|
||||
unsigned long order, first_page;
|
||||
struct iommu *iommu;
|
||||
struct page *page;
|
||||
int npages, nid;
|
||||
@@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
|
||||
|
||||
iommu = dev->archdata.iommu;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
if (unlikely(iopte == NULL)) {
|
||||
free_pages(first_page, order);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*dma_addrp = (iommu->page_table_map_base +
|
||||
*dma_addrp = (iommu->tbl.table_map_base +
|
||||
((iopte - iommu->page_table) << IO_PAGE_SHIFT));
|
||||
ret = (void *) first_page;
|
||||
npages = size >> IO_PAGE_SHIFT;
|
||||
@@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
|
||||
struct dma_attrs *attrs)
|
||||
{
|
||||
struct iommu *iommu;
|
||||
unsigned long flags, order, npages;
|
||||
unsigned long order, npages;
|
||||
|
||||
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
|
||||
iommu = dev->archdata.iommu;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
|
||||
iommu_range_free(iommu, dvma, npages);
|
||||
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
|
||||
|
||||
order = get_order(size);
|
||||
if (order < 10)
|
||||
@@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
|
||||
npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
|
||||
npages >>= IO_PAGE_SHIFT;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
base = alloc_npages(dev, iommu, npages);
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
ctx = 0;
|
||||
if (iommu->iommu_ctxflush)
|
||||
ctx = iommu_alloc_ctx(iommu);
|
||||
@@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
|
||||
if (unlikely(!base))
|
||||
goto bad;
|
||||
|
||||
bus_addr = (iommu->page_table_map_base +
|
||||
bus_addr = (iommu->tbl.table_map_base +
|
||||
((base - iommu->page_table) << IO_PAGE_SHIFT));
|
||||
ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
|
||||
base_paddr = __pa(oaddr & IO_PAGE_MASK);
|
||||
@@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
|
||||
npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
|
||||
npages >>= IO_PAGE_SHIFT;
|
||||
base = iommu->page_table +
|
||||
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
|
||||
((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
|
||||
bus_addr &= IO_PAGE_MASK;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
@@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
|
||||
for (i = 0; i < npages; i++)
|
||||
iopte_make_dummy(iommu, base + i);
|
||||
|
||||
iommu_range_free(iommu, bus_addr, npages);
|
||||
|
||||
iommu_free_ctx(iommu, ctx);
|
||||
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
|
||||
}
|
||||
|
||||
static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
@@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
max_seg_size = dma_get_max_seg_size(dev);
|
||||
seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
|
||||
IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
|
||||
base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
|
||||
base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
|
||||
for_each_sg(sglist, s, nelems, i) {
|
||||
unsigned long paddr, npages, entry, out_entry = 0, slen;
|
||||
iopte_t *base;
|
||||
@@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
/* Allocate iommu entries for that segment */
|
||||
paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
|
||||
npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
|
||||
entry = iommu_range_alloc(dev, iommu, npages, &handle);
|
||||
entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
|
||||
&handle, (unsigned long)(-1), 0);
|
||||
|
||||
/* Handle failure */
|
||||
if (unlikely(entry == DMA_ERROR_CODE)) {
|
||||
@@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
|
||||
base = iommu->page_table + entry;
|
||||
|
||||
/* Convert entry to a dma_addr_t */
|
||||
dma_addr = iommu->page_table_map_base +
|
||||
dma_addr = iommu->tbl.table_map_base +
|
||||
(entry << IO_PAGE_SHIFT);
|
||||
dma_addr |= (s->offset & ~IO_PAGE_MASK);
|
||||
|
||||
@@ -654,15 +562,17 @@ iommu_map_failed:
|
||||
vaddr = s->dma_address & IO_PAGE_MASK;
|
||||
npages = iommu_num_pages(s->dma_address, s->dma_length,
|
||||
IO_PAGE_SIZE);
|
||||
iommu_range_free(iommu, vaddr, npages);
|
||||
|
||||
entry = (vaddr - iommu->page_table_map_base)
|
||||
entry = (vaddr - iommu->tbl.table_map_base)
|
||||
>> IO_PAGE_SHIFT;
|
||||
base = iommu->page_table + entry;
|
||||
|
||||
for (j = 0; j < npages; j++)
|
||||
iopte_make_dummy(iommu, base + j);
|
||||
|
||||
iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
|
||||
DMA_ERROR_CODE);
|
||||
|
||||
s->dma_address = DMA_ERROR_CODE;
|
||||
s->dma_length = 0;
|
||||
}
|
||||
@@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
|
||||
if (iommu->iommu_ctxflush) {
|
||||
iopte_t *base;
|
||||
u32 bus_addr;
|
||||
struct iommu_map_table *tbl = &iommu->tbl;
|
||||
|
||||
bus_addr = sg->dma_address & IO_PAGE_MASK;
|
||||
base = iommu->page_table +
|
||||
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
|
||||
((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
|
||||
|
||||
ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
|
||||
}
|
||||
@@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
|
||||
if (!len)
|
||||
break;
|
||||
npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
|
||||
iommu_range_free(iommu, dma_handle, npages);
|
||||
|
||||
entry = ((dma_handle - iommu->page_table_map_base)
|
||||
entry = ((dma_handle - iommu->tbl.table_map_base)
|
||||
>> IO_PAGE_SHIFT);
|
||||
base = iommu->page_table + entry;
|
||||
|
||||
@@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
|
||||
for (i = 0; i < npages; i++)
|
||||
iopte_make_dummy(iommu, base + i);
|
||||
|
||||
iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
|
||||
DMA_ERROR_CODE);
|
||||
sg = sg_next(sg);
|
||||
}
|
||||
|
||||
@@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
|
||||
if (iommu->iommu_ctxflush &&
|
||||
strbuf->strbuf_ctxflush) {
|
||||
iopte_t *iopte;
|
||||
struct iommu_map_table *tbl = &iommu->tbl;
|
||||
|
||||
iopte = iommu->page_table +
|
||||
((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
|
||||
((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
|
||||
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
|
||||
}
|
||||
|
||||
@@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
|
||||
if (iommu->iommu_ctxflush &&
|
||||
strbuf->strbuf_ctxflush) {
|
||||
iopte_t *iopte;
|
||||
struct iommu_map_table *tbl = &iommu->tbl;
|
||||
|
||||
iopte = iommu->page_table +
|
||||
((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
|
||||
iopte = iommu->page_table + ((sglist[0].dma_address -
|
||||
tbl->table_map_base) >> IO_PAGE_SHIFT);
|
||||
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user