From a7cf01809bf23b95413d8047bd91cdc3cedd1ca1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: [PATCH] drm/nouveau/fifo/gk104-: require explicit runlist selection for channel allocation We didn't used to be aware that runlist/engine IDs weren't the same thing, or that there was such variability in configuration between GPUs. By exposing this information to a client, and giving it explicit control of which runlist it's allocating a channel on, we're able to make better choices. The immediate effect of this is that on GPUs where CE0 is the "GRCE", we will now be allocating a copy engine running asynchronously to GR for BO migrations - as intended. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/cla06f.h | 18 +--- drivers/gpu/drm/nouveau/nouveau_abi16.c | 35 ++++---- drivers/gpu/drm/nouveau/nouveau_chan.c | 4 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 8 +- .../nouveau/nvkm/engine/fifo/gpfifogk104.c | 83 ++++--------------- 5 files changed, 43 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h index 56f5bd81e480..fbfcffc5feb2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h @@ -4,25 +4,11 @@ struct kepler_channel_gpfifo_a_v0 { __u8 version; - __u8 pad01[5]; + __u8 pad01[1]; __u16 chid; -#define NVA06F_V0_ENGINE_SW 0x00000001 -#define NVA06F_V0_ENGINE_GR 0x00000002 -#define NVA06F_V0_ENGINE_SEC 0x00000004 -#define NVA06F_V0_ENGINE_MSVLD 0x00000010 -#define NVA06F_V0_ENGINE_MSPDEC 0x00000020 -#define NVA06F_V0_ENGINE_MSPPP 0x00000040 -#define NVA06F_V0_ENGINE_MSENC 0x00000080 -#define NVA06F_V0_ENGINE_VIC 0x00000100 -#define NVA06F_V0_ENGINE_NVDEC 0x00000200 -#define NVA06F_V0_ENGINE_NVENC0 0x00000400 -#define NVA06F_V0_ENGINE_NVENC1 0x00000800 -#define NVA06F_V0_ENGINE_CE0 0x00010000 -#define NVA06F_V0_ENGINE_CE1 0x00020000 -#define NVA06F_V0_ENGINE_CE2 0x00040000 - __u32 engines; __u32 ilength; __u64 ioffset; + __u64 runlist; __u64 vmm; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ece650a0c5f9..ea2472770b21 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -256,6 +257,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); struct nouveau_abi16_chan *chan; struct nvif_device *device; + u64 engine; int ret; if (unlikely(!abi16)) @@ -268,25 +270,26 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) /* hack to allow channel engine type specification on kepler */ if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { - if (init->fb_ctxdma_handle != ~0) - init->fb_ctxdma_handle = NVA06F_V0_ENGINE_GR; - else { - init->fb_ctxdma_handle = 0; -#define _(A,B) if (init->tt_ctxdma_handle & (A)) init->fb_ctxdma_handle |= (B) - _(0x01, NVA06F_V0_ENGINE_GR); - _(0x02, NVA06F_V0_ENGINE_MSPDEC); - _(0x04, NVA06F_V0_ENGINE_MSPPP); - _(0x08, NVA06F_V0_ENGINE_MSVLD); - _(0x10, NVA06F_V0_ENGINE_CE0); - _(0x20, NVA06F_V0_ENGINE_CE1); - _(0x40, NVA06F_V0_ENGINE_MSENC); -#undef _ + if (init->fb_ctxdma_handle == ~0) { + switch (init->tt_ctxdma_handle) { + case 0x01: engine = NV_DEVICE_INFO_ENGINE_GR ; break; + case 0x02: engine = NV_DEVICE_INFO_ENGINE_MSPDEC; break; + case 0x04: engine = NV_DEVICE_INFO_ENGINE_MSPPP ; break; + case 0x08: engine = NV_DEVICE_INFO_ENGINE_MSVLD ; break; + case 0x30: engine = NV_DEVICE_INFO_ENGINE_CE ; break; + default: + return nouveau_abi16_put(abi16, -ENOSYS); + } + } else { + engine = NV_DEVICE_INFO_ENGINE_GR; } - /* allow flips to be executed if this is a graphics channel */ + if (engine != NV_DEVICE_INFO_ENGINE_CE) + engine = nvif_fifo_runlist(device, engine); + else + engine = nvif_fifo_runlist_ce(device); + init->fb_ctxdma_handle = engine; init->tt_ctxdma_handle = 0; - if (init->fb_ctxdma_handle == NVA06F_V0_ENGINE_GR) - init->tt_ctxdma_handle = 1; } if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index db69d13f32a7..67950a5c56ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -214,7 +214,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, - u32 engine, struct nouveau_channel **pchan) + u64 runlist, struct nouveau_channel **pchan) { struct nouveau_cli *cli = (void *)device->object.client; static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A, @@ -245,9 +245,9 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, do { if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) { args.kepler.version = 0; - args.kepler.engines = engine; args.kepler.ilength = 0x02000; args.kepler.ioffset = 0x10000 + chan->push.addr; + args.kepler.runlist = runlist; args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.kepler); } else diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index dddd42592472..6caece4f2f5f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -38,6 +38,7 @@ #include #include +#include #include #include @@ -358,13 +359,12 @@ nouveau_accel_init(struct nouveau_drm *drm) if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { ret = nouveau_channel_new(drm, &drm->client.device, - NVA06F_V0_ENGINE_CE0 | - NVA06F_V0_ENGINE_CE1, - 0, &drm->cechan); + nvif_fifo_runlist_ce(device), 0, + &drm->cechan); if (ret) NV_ERROR(drm, "failed to create ce channel, %d\n", ret); - arg0 = NVA06F_V0_ENGINE_GR; + arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR); arg1 = 1; } else if (device->info.chipset >= 0xa3 && diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index 68461993394f..e331ab1b702b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -222,62 +222,30 @@ gk104_fifo_gpfifo_func = { .engine_fini = gk104_fifo_gpfifo_engine_fini, }; -struct gk104_fifo_chan_func { - u32 engine; - u64 subdev; -}; - static int -gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, - struct gk104_fifo *fifo, u32 *engmask, u16 *chid, +gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid, u64 vmm, u64 ioffset, u64 ilength, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { struct gk104_fifo_chan *chan; - int runlist = -1, ret = -ENOSYS, i, j; - u32 engines = 0, present = 0; + int runlist = ffs(*runlists) -1, ret, i; + unsigned long engm; u64 subdevs = 0; u64 usermem; - if (!vmm) + if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr) return -EINVAL; + *runlists = BIT_ULL(runlist); - /* Determine which downstream engines are present */ - for (i = 0; i < fifo->engine_nr; i++) { - struct nvkm_engine *engine = fifo->engine[i].engine; - if (engine) { - u64 submask = BIT_ULL(engine->subdev.index); - for (j = 0; func[j].subdev; j++) { - if (func[j].subdev & submask) { - present |= func[j].engine; - break; - } - } - - if (!func[j].subdev) - continue; - - if (runlist < 0 && (*engmask & present)) - runlist = fifo->engine[i].runl; - if (runlist == fifo->engine[i].runl) { - engines |= func[j].engine; - subdevs |= func[j].subdev; - } - } + engm = fifo->runlist[runlist].engm; + for_each_set_bit(i, &engm, fifo->engine_nr) { + if (fifo->engine[i].engine) + subdevs |= BIT_ULL(fifo->engine[i].engine->subdev.index); } - /* Just an engine mask query? All done here! */ - if (!*engmask) { - *engmask = present; - return nvkm_object_new(oclass, NULL, 0, pobject); - } - - /* No runlist? No supported engines. */ - *engmask = present; - if (runlist < 0) - return -ENODEV; - *engmask = engines; + if (subdevs & BIT_ULL(NVKM_ENGINE_GR)) + subdevs |= BIT_ULL(NVKM_ENGINE_SW); /* Allocate the channel. */ if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) @@ -327,26 +295,6 @@ gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, return 0; } -static const struct gk104_fifo_chan_func -gk104_fifo_gpfifo[] = { - { NVA06F_V0_ENGINE_SW | NVA06F_V0_ENGINE_GR, - BIT_ULL(NVKM_ENGINE_SW) | BIT_ULL(NVKM_ENGINE_GR) - }, - { NVA06F_V0_ENGINE_SEC , BIT_ULL(NVKM_ENGINE_SEC ) }, - { NVA06F_V0_ENGINE_MSVLD , BIT_ULL(NVKM_ENGINE_MSVLD ) }, - { NVA06F_V0_ENGINE_MSPDEC, BIT_ULL(NVKM_ENGINE_MSPDEC) }, - { NVA06F_V0_ENGINE_MSPPP , BIT_ULL(NVKM_ENGINE_MSPPP ) }, - { NVA06F_V0_ENGINE_MSENC , BIT_ULL(NVKM_ENGINE_MSENC ) }, - { NVA06F_V0_ENGINE_VIC , BIT_ULL(NVKM_ENGINE_VIC ) }, - { NVA06F_V0_ENGINE_NVDEC , BIT_ULL(NVKM_ENGINE_NVDEC ) }, - { NVA06F_V0_ENGINE_NVENC0, BIT_ULL(NVKM_ENGINE_NVENC0) }, - { NVA06F_V0_ENGINE_NVENC1, BIT_ULL(NVKM_ENGINE_NVENC1) }, - { NVA06F_V0_ENGINE_CE0 , BIT_ULL(NVKM_ENGINE_CE0 ) }, - { NVA06F_V0_ENGINE_CE1 , BIT_ULL(NVKM_ENGINE_CE1 ) }, - { NVA06F_V0_ENGINE_CE2 , BIT_ULL(NVKM_ENGINE_CE2 ) }, - {} -}; - int gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) @@ -361,11 +309,12 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " - "ioffset %016llx ilength %08x engine %08x\n", + "ioffset %016llx ilength %08x " + "runlist %016llx\n", args->v0.version, args->v0.vmm, args->v0.ioffset, - args->v0.ilength, args->v0.engines); - return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo, - &args->v0.engines, + args->v0.ilength, args->v0.runlist); + return gk104_fifo_gpfifo_new_(fifo, + &args->v0.runlist, &args->v0.chid, args->v0.vmm, args->v0.ioffset, -- 2.45.2