From 7c9cf4e33a72c36a62471709d85d096eaac86dc6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:25 +0100 Subject: [PATCH] drm/i915: Reduce engine->emit_flush() to a single mode parameter Rather than passing a complete set of GPU cache domains for either invalidation or for flushing, or even both, just pass a single parameter to the engine->emit_flush to determine the required operations. engine->emit_flush(GPU, 0) -> engine->emit_flush(EMIT_INVALIDATE) engine->emit_flush(0, GPU) -> engine->emit_flush(EMIT_FLUSH) engine->emit_flush(GPU, GPU) -> engine->emit_flush(EMIT_FLUSH | EMIT_INVALIDATE) This allows us to extend the behaviour easily in future, for example if we want just a command barrier without the overhead of flushing. Signed-off-by: Chris Wilson Cc: Dave Gordon Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-8-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ++-- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 23 +++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 57 ++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++- 7 files changed, 38 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index beece8feb8fe..edde8411c478 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 35c4c595e5ba..e49776e34eed 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,7 +999,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, wmb(); /* Unconditionally invalidate GPU caches and TLBs. */ - return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + return req->engine->emit_flush(req, EMIT_INVALIDATE); } static bool diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 39fa9eb10514..671b1cab5e54 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1666,8 +1666,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1694,8 +1693,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1713,9 +1711,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 7e3206051ced..67f16feb0552 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -451,7 +451,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, * what. */ if (flush_caches) { - ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(request, EMIT_FLUSH); /* Not allowed to fail! */ WARN(ret, "engine->emit_flush() failed: %d!\n", ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e8d971e81491..af7d7e07748e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -672,7 +672,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + return req->engine->emit_flush(req, EMIT_INVALIDATE); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -998,9 +998,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1017,9 +1015,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1598,9 +1594,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 unused) +static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { struct intel_ring *ring = request->ring; u32 cmd; @@ -1619,7 +1613,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, */ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - if (invalidate_domains & I915_GEM_GPU_DOMAINS) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; @@ -1637,8 +1631,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains) + u32 mode) { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; @@ -1650,14 +1643,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9e4b49644553..1f876e7ce582 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -67,19 +67,15 @@ static void __intel_engine_submit(struct intel_engine_cs *engine) } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 cmd; int ret; cmd = MI_FLUSH; - if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) - cmd |= MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; ret = intel_ring_begin(req, 2); @@ -94,9 +90,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 cmd; @@ -131,7 +125,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, */ cmd = MI_FLUSH; - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; if (IS_G4X(req->i915) || IS_GEN5(req->i915)) cmd |= MI_INVALIDATE_ISP; @@ -222,8 +216,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = @@ -240,7 +233,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* @@ -249,7 +242,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, */ flags |= PIPE_CONTROL_CS_STALL; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -297,8 +290,7 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = @@ -320,13 +312,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -384,8 +376,7 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, } static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -393,13 +384,13 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -688,9 +679,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -707,9 +696,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1700,9 +1687,7 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; int ret; @@ -2533,8 +2518,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; uint32_t cmd; @@ -2561,7 +2545,7 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_GPU_DOMAINS) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; intel_ring_emit(ring, cmd); @@ -2653,8 +2637,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; uint32_t cmd; @@ -2681,7 +2664,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_DOMAIN_RENDER) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; intel_ring_emit(ring, cmd); intel_ring_emit(ring, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 00723401f98c..76d0495943c3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -292,8 +292,10 @@ struct intel_engine_cs { u32 ctx_desc_template; int (*emit_request)(struct drm_i915_gem_request *request); int (*emit_flush)(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains); + u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) int (*emit_bb_start)(struct drm_i915_gem_request *req, u64 offset, unsigned dispatch_flags); -- 2.45.2