Merge branch 'next' of git://git.infradead.org/users/vkoul/slave-dma

Pull slave-dmaengine changes from Vinod Koul:
 "This brings for slave dmaengine:

   - Change dma notification flag to DMA_COMPLETE from DMA_SUCCESS as
     dmaengine can only transfer and not verify validaty of dma
     transfers

   - Bunch of fixes across drivers:

      - cppi41 driver fixes from Daniel

      - 8 channel freescale dma engine support and updated bindings from
        Hongbo

      - msx-dma fixes and cleanup by Markus

   - DMAengine updates from Dan:

      - Bartlomiej and Dan finalized a rework of the dma address unmap
        implementation.

      - In the course of testing 1/ a collection of enhancements to
        dmatest fell out.  Notably basic performance statistics, and
        fixed / enhanced test control through new module parameters
        'run', 'wait', 'noverify', and 'verbose'.  Thanks to Andriy and
        Linus [Walleij] for their review.

      - Testing the raid related corner cases of 1/ triggered bugs in
        the recently added 16-source operation support in the ioatdma
        driver.

      - Some minor fixes / cleanups to mv_xor and ioatdma"

* 'next' of git://git.infradead.org/users/vkoul/slave-dma: (99 commits)
  dma: mv_xor: Fix mis-usage of mmio 'base' and 'high_base' registers
  dma: mv_xor: Remove unneeded NULL address check
  ioat: fix ioat3_irq_reinit
  ioat: kill msix_single_vector support
  raid6test: add new corner case for ioatdma driver
  ioatdma: clean up sed pool kmem_cache
  ioatdma: fix selection of 16 vs 8 source path
  ioatdma: fix sed pool selection
  ioatdma: Fix bug in selftest after removal of DMA_MEMSET.
  dmatest: verbose mode
  dmatest: convert to dmaengine_unmap_data
  dmatest: add a 'wait' parameter
  dmatest: add basic performance metrics
  dmatest: add support for skipping verification and random data setup
  dmatest: use pseudo random numbers
  dmatest: support xor-only, or pq-only channels in tests
  dmatest: restore ability to start test at module load and init
  dmatest: cleanup redundant "dmatest: " prefixes
  dmatest: replace stored results mechanism, with uniform messages
  Revert "dmatest: append verify result to results"
  ...
This commit is contained in:
Linus Torvalds
2013-11-20 13:20:24 -08:00
71 changed files with 1994 additions and 2287 deletions

View File

@@ -46,14 +46,21 @@
#define EDMA_CHANS 64
#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
/* Max of 16 segments per channel to conserve PaRAM slots */
#define MAX_NR_SG 16
/*
* Max of 20 segments per channel to conserve PaRAM slots
* Also note that MAX_NR_SG should be atleast the no.of periods
* that are required for ASoC, otherwise DMA prep calls will
* fail. Today davinci-pcm is the only user of this driver and
* requires atleast 17 slots, so we setup the default to 20.
*/
#define MAX_NR_SG 20
#define EDMA_MAX_SLOTS MAX_NR_SG
#define EDMA_DESCRIPTORS 16
struct edma_desc {
struct virt_dma_desc vdesc;
struct list_head node;
int cyclic;
int absync;
int pset_nr;
int processed;
@@ -167,8 +174,13 @@ static void edma_execute(struct edma_chan *echan)
* then setup a link to the dummy slot, this results in all future
* events being absorbed and that's OK because we're done
*/
if (edesc->processed == edesc->pset_nr)
edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
if (edesc->processed == edesc->pset_nr) {
if (edesc->cyclic)
edma_link(echan->slot[nslots-1], echan->slot[1]);
else
edma_link(echan->slot[nslots-1],
echan->ecc->dummy_slot);
}
edma_resume(echan->ch_num);
@@ -250,6 +262,117 @@ static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
return ret;
}
/*
* A PaRAM set configuration abstraction used by other modes
* @chan: Channel who's PaRAM set we're configuring
* @pset: PaRAM set to initialize and setup.
* @src_addr: Source address of the DMA
* @dst_addr: Destination address of the DMA
* @burst: In units of dev_width, how much to send
* @dev_width: How much is the dev_width
* @dma_length: Total length of the DMA transfer
* @direction: Direction of the transfer
*/
static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
enum dma_slave_buswidth dev_width, unsigned int dma_length,
enum dma_transfer_direction direction)
{
struct edma_chan *echan = to_edma_chan(chan);
struct device *dev = chan->device->dev;
int acnt, bcnt, ccnt, cidx;
int src_bidx, dst_bidx, src_cidx, dst_cidx;
int absync;
acnt = dev_width;
/*
* If the maxburst is equal to the fifo width, use
* A-synced transfers. This allows for large contiguous
* buffer transfers using only one PaRAM set.
*/
if (burst == 1) {
/*
* For the A-sync case, bcnt and ccnt are the remainder
* and quotient respectively of the division of:
* (dma_length / acnt) by (SZ_64K -1). This is so
* that in case bcnt over flows, we have ccnt to use.
* Note: In A-sync tranfer only, bcntrld is used, but it
* only applies for sg_dma_len(sg) >= SZ_64K.
* In this case, the best way adopted is- bccnt for the
* first frame will be the remainder below. Then for
* every successive frame, bcnt will be SZ_64K-1. This
* is assured as bcntrld = 0xffff in end of function.
*/
absync = false;
ccnt = dma_length / acnt / (SZ_64K - 1);
bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
/*
* If bcnt is non-zero, we have a remainder and hence an
* extra frame to transfer, so increment ccnt.
*/
if (bcnt)
ccnt++;
else
bcnt = SZ_64K - 1;
cidx = acnt;
} else {
/*
* If maxburst is greater than the fifo address_width,
* use AB-synced transfers where A count is the fifo
* address_width and B count is the maxburst. In this
* case, we are limited to transfers of C count frames
* of (address_width * maxburst) where C count is limited
* to SZ_64K-1. This places an upper bound on the length
* of an SG segment that can be handled.
*/
absync = true;
bcnt = burst;
ccnt = dma_length / (acnt * bcnt);
if (ccnt > (SZ_64K - 1)) {
dev_err(dev, "Exceeded max SG segment size\n");
return -EINVAL;
}
cidx = acnt * bcnt;
}
if (direction == DMA_MEM_TO_DEV) {
src_bidx = acnt;
src_cidx = cidx;
dst_bidx = 0;
dst_cidx = 0;
} else if (direction == DMA_DEV_TO_MEM) {
src_bidx = 0;
src_cidx = 0;
dst_bidx = acnt;
dst_cidx = cidx;
} else {
dev_err(dev, "%s: direction not implemented yet\n", __func__);
return -EINVAL;
}
pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
/* Configure A or AB synchronized transfers */
if (absync)
pset->opt |= SYNCDIM;
pset->src = src_addr;
pset->dst = dst_addr;
pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
pset->a_b_cnt = bcnt << 16 | acnt;
pset->ccnt = ccnt;
/*
* Only time when (bcntrld) auto reload is required is for
* A-sync case, and in this case, a requirement of reload value
* of SZ_64K-1 only is assured. 'link' is initially set to NULL
* and then later will be populated by edma_execute.
*/
pset->link_bcntrld = 0xffffffff;
return absync;
}
static struct dma_async_tx_descriptor *edma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
struct edma_chan *echan = to_edma_chan(chan);
struct device *dev = chan->device->dev;
struct edma_desc *edesc;
dma_addr_t dev_addr;
dma_addr_t src_addr = 0, dst_addr = 0;
enum dma_slave_buswidth dev_width;
u32 burst;
struct scatterlist *sg;
int acnt, bcnt, ccnt, src, dst, cidx;
int src_bidx, dst_bidx, src_cidx, dst_cidx;
int i, nslots;
int i, nslots, ret;
if (unlikely(!echan || !sgl || !sg_len))
return NULL;
if (direction == DMA_DEV_TO_MEM) {
dev_addr = echan->cfg.src_addr;
src_addr = echan->cfg.src_addr;
dev_width = echan->cfg.src_addr_width;
burst = echan->cfg.src_maxburst;
} else if (direction == DMA_MEM_TO_DEV) {
dev_addr = echan->cfg.dst_addr;
dst_addr = echan->cfg.dst_addr;
dev_width = echan->cfg.dst_addr_width;
burst = echan->cfg.dst_maxburst;
} else {
@@ -307,7 +428,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
if (echan->slot[i] < 0) {
kfree(edesc);
dev_err(dev, "Failed to allocate slot\n");
kfree(edesc);
return NULL;
}
}
@@ -315,64 +435,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
/* Configure PaRAM sets for each SG */
for_each_sg(sgl, sg, sg_len, i) {
/* Get address for each SG */
if (direction == DMA_DEV_TO_MEM)
dst_addr = sg_dma_address(sg);
else
src_addr = sg_dma_address(sg);
acnt = dev_width;
/*
* If the maxburst is equal to the fifo width, use
* A-synced transfers. This allows for large contiguous
* buffer transfers using only one PaRAM set.
*/
if (burst == 1) {
edesc->absync = false;
ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
if (bcnt)
ccnt++;
else
bcnt = SZ_64K - 1;
cidx = acnt;
/*
* If maxburst is greater than the fifo address_width,
* use AB-synced transfers where A count is the fifo
* address_width and B count is the maxburst. In this
* case, we are limited to transfers of C count frames
* of (address_width * maxburst) where C count is limited
* to SZ_64K-1. This places an upper bound on the length
* of an SG segment that can be handled.
*/
} else {
edesc->absync = true;
bcnt = burst;
ccnt = sg_dma_len(sg) / (acnt * bcnt);
if (ccnt > (SZ_64K - 1)) {
dev_err(dev, "Exceeded max SG segment size\n");
kfree(edesc);
return NULL;
}
cidx = acnt * bcnt;
ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
dst_addr, burst, dev_width,
sg_dma_len(sg), direction);
if (ret < 0) {
kfree(edesc);
return NULL;
}
if (direction == DMA_MEM_TO_DEV) {
src = sg_dma_address(sg);
dst = dev_addr;
src_bidx = acnt;
src_cidx = cidx;
dst_bidx = 0;
dst_cidx = 0;
} else {
src = dev_addr;
dst = sg_dma_address(sg);
src_bidx = 0;
src_cidx = 0;
dst_bidx = acnt;
dst_cidx = cidx;
}
edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
/* Configure A or AB synchronized transfers */
if (edesc->absync)
edesc->pset[i].opt |= SYNCDIM;
edesc->absync = ret;
/* If this is the last in a current SG set of transactions,
enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
/* If this is the last set, enable completion interrupt flag */
if (i == sg_len - 1)
edesc->pset[i].opt |= TCINTEN;
}
edesc->pset[i].src = src;
edesc->pset[i].dst = dst;
return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
}
edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
size_t period_len, enum dma_transfer_direction direction,
unsigned long tx_flags, void *context)
{
struct edma_chan *echan = to_edma_chan(chan);
struct device *dev = chan->device->dev;
struct edma_desc *edesc;
dma_addr_t src_addr, dst_addr;
enum dma_slave_buswidth dev_width;
u32 burst;
int i, ret, nslots;
edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
edesc->pset[i].ccnt = ccnt;
edesc->pset[i].link_bcntrld = 0xffffffff;
if (unlikely(!echan || !buf_len || !period_len))
return NULL;
if (direction == DMA_DEV_TO_MEM) {
src_addr = echan->cfg.src_addr;
dst_addr = buf_addr;
dev_width = echan->cfg.src_addr_width;
burst = echan->cfg.src_maxburst;
} else if (direction == DMA_MEM_TO_DEV) {
src_addr = buf_addr;
dst_addr = echan->cfg.dst_addr;
dev_width = echan->cfg.dst_addr_width;
burst = echan->cfg.dst_maxburst;
} else {
dev_err(dev, "%s: bad direction?\n", __func__);
return NULL;
}
if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
dev_err(dev, "Undefined slave buswidth\n");
return NULL;
}
if (unlikely(buf_len % period_len)) {
dev_err(dev, "Period should be multiple of Buffer length\n");
return NULL;
}
nslots = (buf_len / period_len) + 1;
/*
* Cyclic DMA users such as audio cannot tolerate delays introduced
* by cases where the number of periods is more than the maximum
* number of SGs the EDMA driver can handle at a time. For DMA types
* such as Slave SGs, such delays are tolerable and synchronized,
* but the synchronization is difficult to achieve with Cyclic and
* cannot be guaranteed, so we error out early.
*/
if (nslots > MAX_NR_SG)
return NULL;
edesc = kzalloc(sizeof(*edesc) + nslots *
sizeof(edesc->pset[0]), GFP_ATOMIC);
if (!edesc) {
dev_dbg(dev, "Failed to allocate a descriptor\n");
return NULL;
}
edesc->cyclic = 1;
edesc->pset_nr = nslots;
dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
for (i = 0; i < nslots; i++) {
/* Allocate a PaRAM slot, if needed */
if (echan->slot[i] < 0) {
echan->slot[i] =
edma_alloc_slot(EDMA_CTLR(echan->ch_num),
EDMA_SLOT_ANY);
if (echan->slot[i] < 0) {
dev_err(dev, "Failed to allocate slot\n");
return NULL;
}
}
if (i == nslots - 1) {
memcpy(&edesc->pset[i], &edesc->pset[0],
sizeof(edesc->pset[0]));
break;
}
ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
dst_addr, burst, dev_width, period_len,
direction);
if (ret < 0)
return NULL;
if (direction == DMA_DEV_TO_MEM)
dst_addr += period_len;
else
src_addr += period_len;
dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
dev_dbg(dev,
"\n pset[%d]:\n"
" chnum\t%d\n"
" slot\t%d\n"
" opt\t%08x\n"
" src\t%08x\n"
" dst\t%08x\n"
" abcnt\t%08x\n"
" ccnt\t%08x\n"
" bidx\t%08x\n"
" cidx\t%08x\n"
" lkrld\t%08x\n",
i, echan->ch_num, echan->slot[i],
edesc->pset[i].opt,
edesc->pset[i].src,
edesc->pset[i].dst,
edesc->pset[i].a_b_cnt,
edesc->pset[i].ccnt,
edesc->pset[i].src_dst_bidx,
edesc->pset[i].src_dst_cidx,
edesc->pset[i].link_bcntrld);
edesc->absync = ret;
/*
* Enable interrupts for every period because callback
* has to be called for every period.
*/
edesc->pset[i].opt |= TCINTEN;
}
return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
unsigned long flags;
struct edmacc_param p;
/* Pause the channel */
edma_pause(echan->ch_num);
edesc = echan->edesc;
/* Pause the channel for non-cyclic */
if (!edesc || (edesc && !edesc->cyclic))
edma_pause(echan->ch_num);
switch (ch_status) {
case DMA_COMPLETE:
case EDMA_DMA_COMPLETE:
spin_lock_irqsave(&echan->vchan.lock, flags);
edesc = echan->edesc;
if (edesc) {
if (edesc->processed == edesc->pset_nr) {
if (edesc->cyclic) {
vchan_cyclic_callback(&edesc->vdesc);
} else if (edesc->processed == edesc->pset_nr) {
dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
edma_stop(echan->ch_num);
vchan_cookie_complete(&edesc->vdesc);
edma_execute(echan);
} else {
dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
edma_execute(echan);
}
edma_execute(echan);
}
spin_unlock_irqrestore(&echan->vchan.lock, flags);
break;
case DMA_CC_ERROR:
case EDMA_DMA_CC_ERROR:
spin_lock_irqsave(&echan->vchan.lock, flags);
edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
unsigned long flags;
ret = dma_cookie_status(chan, cookie, txstate);
if (ret == DMA_SUCCESS || !txstate)
if (ret == DMA_COMPLETE || !txstate)
return ret;
spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
struct device *dev)
{
dma->device_prep_slave_sg = edma_prep_slave_sg;
dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
dma->device_alloc_chan_resources = edma_alloc_chan_resources;
dma->device_free_chan_resources = edma_free_chan_resources;
dma->device_issue_pending = edma_issue_pending;