drm/nv40: implement ctxprog/state generation

The context programs are *very* simple compared to the ones used by
the binary driver.  There's notes in nv40_grctx.c explaining most of
the things we don't implement.  If we discover if/why any of it is
required further down the track, we'll handle it then.

The PGRAPH state generated for each chipset should match what NVIDIA
do almost exactly (there's a couple of exceptions).  If someone has
a lot of time on their hands, they could figure out the mapping of
object/method to PGRAPH register and demagic the initial state a little,
it's not terribly important however.

At time of commit, confirmed to be working at least well enough for
accelerated X (and where tested, for 3D apps) on NV40, NV43, NV44, NV46,
NV49, NV4A, NV4B and NV4E.

A module option has been added to force the use of external firmware
blobs if it becomes required.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
This commit is contained in:
Ben Skeggs
2009-12-15 22:02:47 +10:00
parent 15bee69ee1
commit 054b93e444
8 changed files with 1031 additions and 182 deletions

View File

@@ -24,36 +24,10 @@
*
*/
#include <linux/firmware.h>
#include "drmP.h"
#include "drm.h"
#include "nouveau_drv.h"
MODULE_FIRMWARE("nouveau/nv40.ctxprog");
MODULE_FIRMWARE("nouveau/nv40.ctxvals");
MODULE_FIRMWARE("nouveau/nv41.ctxprog");
MODULE_FIRMWARE("nouveau/nv41.ctxvals");
MODULE_FIRMWARE("nouveau/nv42.ctxprog");
MODULE_FIRMWARE("nouveau/nv42.ctxvals");
MODULE_FIRMWARE("nouveau/nv43.ctxprog");
MODULE_FIRMWARE("nouveau/nv43.ctxvals");
MODULE_FIRMWARE("nouveau/nv44.ctxprog");
MODULE_FIRMWARE("nouveau/nv44.ctxvals");
MODULE_FIRMWARE("nouveau/nv46.ctxprog");
MODULE_FIRMWARE("nouveau/nv46.ctxvals");
MODULE_FIRMWARE("nouveau/nv47.ctxprog");
MODULE_FIRMWARE("nouveau/nv47.ctxvals");
MODULE_FIRMWARE("nouveau/nv49.ctxprog");
MODULE_FIRMWARE("nouveau/nv49.ctxvals");
MODULE_FIRMWARE("nouveau/nv4a.ctxprog");
MODULE_FIRMWARE("nouveau/nv4a.ctxvals");
MODULE_FIRMWARE("nouveau/nv4b.ctxprog");
MODULE_FIRMWARE("nouveau/nv4b.ctxvals");
MODULE_FIRMWARE("nouveau/nv4c.ctxprog");
MODULE_FIRMWARE("nouveau/nv4c.ctxvals");
MODULE_FIRMWARE("nouveau/nv4e.ctxprog");
MODULE_FIRMWARE("nouveau/nv4e.ctxvals");
#include "nouveau_grctx.h"
struct nouveau_channel *
nv40_graph_channel(struct drm_device *dev)
@@ -83,27 +57,30 @@ nv40_graph_create_context(struct nouveau_channel *chan)
{
struct drm_device *dev = chan->dev;
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_gpuobj *ctx;
struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
int ret;
/* Allocate a 175KiB block of PRAMIN to store the context. This
* is massive overkill for a lot of chipsets, but it should be safe
* until we're able to implement this properly (will happen at more
* or less the same time we're able to write our own context programs.
*/
ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 175*1024, 16,
NVOBJ_FLAG_ZERO_ALLOC,
&chan->ramin_grctx);
ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
16, NVOBJ_FLAG_ZERO_ALLOC,
&chan->ramin_grctx);
if (ret)
return ret;
ctx = chan->ramin_grctx->gpuobj;
/* Initialise default context values */
dev_priv->engine.instmem.prepare_access(dev, true);
nv40_grctx_vals_load(dev, ctx);
nv_wo32(dev, ctx, 0, ctx->im_pramin->start);
dev_priv->engine.instmem.finish_access(dev);
if (!pgraph->ctxprog) {
struct nouveau_grctx ctx = {};
ctx.dev = chan->dev;
ctx.mode = NOUVEAU_GRCTX_VALS;
ctx.data = chan->ramin_grctx->gpuobj;
nv40_grctx_init(&ctx);
} else {
nouveau_grctx_vals_load(dev, chan->ramin_grctx->gpuobj);
}
nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
chan->ramin_grctx->gpuobj->im_pramin->start);
dev_priv->engine.instmem.finish_access(dev);
return 0;
}
@@ -204,139 +181,6 @@ nv40_graph_unload_context(struct drm_device *dev)
return ret;
}
struct nouveau_ctxprog {
uint32_t signature;
uint8_t version;
uint16_t length;
uint32_t data[];
} __attribute__ ((packed));
struct nouveau_ctxvals {
uint32_t signature;
uint8_t version;
uint32_t length;
struct {
uint32_t offset;
uint32_t value;
} data[];
} __attribute__ ((packed));
int
nv40_grctx_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
const int chipset = dev_priv->chipset;
const struct firmware *fw;
const struct nouveau_ctxprog *cp;
const struct nouveau_ctxvals *cv;
char name[32];
int ret, i;
pgraph->accel_blocked = true;
if (!pgraph->ctxprog) {
sprintf(name, "nouveau/nv%02x.ctxprog", chipset);
ret = request_firmware(&fw, name, &dev->pdev->dev);
if (ret) {
NV_ERROR(dev, "No ctxprog for NV%02x\n", chipset);
return ret;
}
pgraph->ctxprog = kmalloc(fw->size, GFP_KERNEL);
if (!pgraph->ctxprog) {
NV_ERROR(dev, "OOM copying ctxprog\n");
release_firmware(fw);
return -ENOMEM;
}
memcpy(pgraph->ctxprog, fw->data, fw->size);
cp = pgraph->ctxprog;
if (le32_to_cpu(cp->signature) != 0x5043564e ||
cp->version != 0 ||
le16_to_cpu(cp->length) != ((fw->size - 7) / 4)) {
NV_ERROR(dev, "ctxprog invalid\n");
release_firmware(fw);
nv40_grctx_fini(dev);
return -EINVAL;
}
release_firmware(fw);
}
if (!pgraph->ctxvals) {
sprintf(name, "nouveau/nv%02x.ctxvals", chipset);
ret = request_firmware(&fw, name, &dev->pdev->dev);
if (ret) {
NV_ERROR(dev, "No ctxvals for NV%02x\n", chipset);
nv40_grctx_fini(dev);
return ret;
}
pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL);
if (!pgraph->ctxprog) {
NV_ERROR(dev, "OOM copying ctxprog\n");
release_firmware(fw);
nv40_grctx_fini(dev);
return -ENOMEM;
}
memcpy(pgraph->ctxvals, fw->data, fw->size);
cv = (void *)pgraph->ctxvals;
if (le32_to_cpu(cv->signature) != 0x5643564e ||
cv->version != 0 ||
le32_to_cpu(cv->length) != ((fw->size - 9) / 8)) {
NV_ERROR(dev, "ctxvals invalid\n");
release_firmware(fw);
nv40_grctx_fini(dev);
return -EINVAL;
}
release_firmware(fw);
}
cp = pgraph->ctxprog;
nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
for (i = 0; i < le16_to_cpu(cp->length); i++)
nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA,
le32_to_cpu(cp->data[i]));
pgraph->accel_blocked = false;
return 0;
}
void
nv40_grctx_fini(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
if (pgraph->ctxprog) {
kfree(pgraph->ctxprog);
pgraph->ctxprog = NULL;
}
if (pgraph->ctxvals) {
kfree(pgraph->ctxprog);
pgraph->ctxvals = NULL;
}
}
void
nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
struct nouveau_ctxvals *cv = pgraph->ctxvals;
int i;
if (!cv)
return;
for (i = 0; i < le32_to_cpu(cv->length); i++)
nv_wo32(dev, ctx, le32_to_cpu(cv->data[i].offset),
le32_to_cpu(cv->data[i].value));
}
/*
* G70 0x47
* G71 0x49
@@ -359,7 +203,26 @@ nv40_graph_init(struct drm_device *dev)
nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
NV_PMC_ENABLE_PGRAPH);
nv40_grctx_init(dev);
if (nouveau_ctxfw) {
nouveau_grctx_prog_load(dev);
dev_priv->engine.graph.grctx_size = 175 * 1024;
}
if (!dev_priv->engine.graph.ctxprog) {
struct nouveau_grctx ctx = {};
uint32_t cp[256];
ctx.dev = dev;
ctx.mode = NOUVEAU_GRCTX_PROG;
ctx.data = cp;
ctx.ctxprog_max = 256;
nv40_grctx_init(&ctx);
dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
for (i = 0; i < ctx.ctxprog_len; i++)
nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
}
/* No context present currently */
nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
@@ -539,6 +402,7 @@ nv40_graph_init(struct drm_device *dev)
void nv40_graph_takedown(struct drm_device *dev)
{
nouveau_grctx_fini(dev);
}
struct nouveau_pgraph_object_class nv40_graph_grclass[] = {