drm/nouveau: port all engines to new engine module format

This is a HUGE commit, but it's not nearly as bad as it looks - any problems
can be isolated to a particular chipset and engine combination.  It was
simply too difficult to port each one at a time, the compat layers are
*already* ridiculous.

Most of the changes here are simply to the glue, the process for each of the
engine modules was to start with a standard skeleton and copy+paste the old
code into the appropriate places, fixing up variable names etc as needed.

v2: Marcin Slusarz <marcin.slusarz@gmail.com>
- fix find/replace bug in license header

v3: Ben Skeggs <bskeggs@redhat.com>
- bump indirect pushbuf size to 8KiB, 4KiB barely enough for userspace and
  left no space for kernel's requirements during GEM pushbuf submission.
- fix duplicate assignments noticed by clang

v4: Marcin Slusarz <marcin.slusarz@gmail.com>
- add sparse annotations to nv04_fifo_pause/nv04_fifo_start
- use ioread32_native/iowrite32_native for fifo control registers

v5: Ben Skeggs <bskeggs@redhat.com>
- rebase on v3.6-rc4, modified to keep copy engine fix intact
- nv10/fence: unmap fence bo before destroying
- fixed fermi regression when using nvidia gr fuc
- fixed typo in supported dma_mask checking

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
This commit is contained in:
Ben Skeggs
2012-07-20 08:17:34 +10:00
父節點 ac1499d957
當前提交 ebb945a94b
共有 146 個文件被更改,包括 14480 次插入11360 次删除

查看文件

@@ -24,41 +24,16 @@
*
*/
#include "drmP.h"
#include "drm.h"
#include "nouveau_drv.h"
#include <core/client.h>
#include "nouveau_drm.h"
#include "nouveau_dma.h"
#include <core/ramht.h>
void
nouveau_dma_init(struct nouveau_channel *chan)
{
struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
struct nouveau_bo *pushbuf = chan->pushbuf_bo;
if (dev_priv->card_type >= NV_50) {
const int ib_size = pushbuf->bo.mem.size / 2;
chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2;
chan->dma.ib_max = (ib_size / 8) - 1;
chan->dma.ib_put = 0;
chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put;
chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2;
} else {
chan->dma.max = (pushbuf->bo.mem.size >> 2) - 2;
}
chan->dma.put = 0;
chan->dma.cur = chan->dma.put;
chan->dma.free = chan->dma.max - chan->dma.cur;
}
void
OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
{
bool is_iomem;
u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
u32 *mem = ttm_kmap_obj_virtual(&chan->push.buffer->kmap, &is_iomem);
mem = &mem[chan->dma.cur];
if (is_iomem)
memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4);
@@ -79,9 +54,9 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
{
uint64_t val;
val = nvchan_rd32(chan, chan->user_get);
val = nv_ro32(chan->object, chan->user_get);
if (chan->user_get_hi)
val |= (uint64_t)nvchan_rd32(chan, chan->user_get_hi) << 32;
val |= (uint64_t)nv_ro32(chan->object, chan->user_get_hi) << 32;
/* reset counter as long as GET is still advancing, this is
* to avoid misdetecting a GPU lockup if the GPU happens to
@@ -93,32 +68,33 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
}
if ((++*timeout & 0xff) == 0) {
DRM_UDELAY(1);
udelay(1);
if (*timeout > 100000)
return -EBUSY;
}
if (val < chan->pushbuf_base ||
val > chan->pushbuf_base + (chan->dma.max << 2))
if (val < chan->push.vma.offset ||
val > chan->push.vma.offset + (chan->dma.max << 2))
return -EINVAL;
return (val - chan->pushbuf_base) >> 2;
return (val - chan->push.vma.offset) >> 2;
}
void
nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
int delta, int length)
{
struct nouveau_bo *pb = chan->pushbuf_bo;
struct nouveau_bo *pb = chan->push.buffer;
struct nouveau_vma *vma;
int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
u64 offset;
vma = nouveau_bo_vma_find(bo, chan->vm);
vma = nouveau_bo_vma_find(bo, nv_client(chan->cli)->vm);
BUG_ON(!vma);
offset = vma->offset + delta;
BUG_ON(chan->dma.ib_free < 1);
nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
@@ -128,7 +104,7 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
/* Flush writes. */
nouveau_bo_rd32(pb, 0);
nvchan_wr32(chan, 0x8c, chan->dma.ib_put);
nv_wo32(chan->object, 0x8c, chan->dma.ib_put);
chan->dma.ib_free--;
}
@@ -138,7 +114,7 @@ nv50_dma_push_wait(struct nouveau_channel *chan, int count)
uint32_t cnt = 0, prev_get = 0;
while (chan->dma.ib_free < count) {
uint32_t get = nvchan_rd32(chan, 0x88);
uint32_t get = nv_ro32(chan->object, 0x88);
if (get != prev_get) {
prev_get = get;
cnt = 0;
@@ -249,7 +225,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size)
* instruct the GPU to jump back to the start right
* after processing the currently pending commands.
*/
OUT_RING(chan, chan->pushbuf_base | 0x20000000);
OUT_RING(chan, chan->push.vma.offset | 0x20000000);
/* wait for GET to depart from the skips area.
* prevents writing GET==PUT and causing a race