]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/i915/gt: Split intel_ring_submission
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 24 Oct 2019 10:03:44 +0000 (11:03 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 24 Oct 2019 11:14:21 +0000 (12:14 +0100)
Split the legacy submission backend from the common CS ring buffer
handling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191024100344.5041-1-chris@chris-wilson.co.uk
30 files changed:
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_overlay.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_context.h
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_pm.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_mocs.c
drivers/gpu/drm/i915/gt/intel_renderstate.c
drivers/gpu/drm/i915/gt/intel_ring.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_ring.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_ring_submission.c [moved from drivers/gpu/drm/i915/gt/intel_ringbuffer.c with 88% similarity]
drivers/gpu/drm/i915/gt/intel_ring_types.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_timeline.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/mock_engine.c
drivers/gpu/drm/i915/gt/selftest_timeline.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/i915_active.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_request.c

index 21601bb27b22f9e9b86e70ae35b4589cff1b7dcd..9bf1c0b2037055f33dc4e3264150a5749c19702a 100644 (file)
@@ -89,11 +89,12 @@ gt-y += \
        gt/intel_gt_requests.o \
        gt/intel_llc.o \
        gt/intel_lrc.o \
+       gt/intel_mocs.o \
        gt/intel_rc6.o \
        gt/intel_renderstate.o \
        gt/intel_reset.o \
-       gt/intel_ringbuffer.o \
-       gt/intel_mocs.o \
+       gt/intel_ring.o \
+       gt/intel_ring_submission.o \
        gt/intel_sseu.o \
        gt/intel_timeline.o \
        gt/intel_workarounds.o
index 2360f19f96945f79952f5d59b9361d4245273745..848ce07a8ec2e505a4d561d2d27cf96785e359e6 100644 (file)
@@ -30,6 +30,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
index b2f042d87be06827bac3c37c23af3425c8c1bb37..55f1f93c0925c100d51a235280087369b205c657 100644 (file)
 
 #include <drm/i915_drm.h>
 
-#include "gt/intel_lrc_reg.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
index e96901888323535818a60fdd1d22e3e503010e80..e4f5c269150ab26789a0d30814e9aa48bee9db92 100644 (file)
@@ -19,6 +19,7 @@
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
index 5bd8de124d74db6d8167c139e57440c0d61e6bc6..516e61e992122f0303834392fa678d287fc313e3 100644 (file)
@@ -8,6 +8,7 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
 
index 549810f70aeb6f4b1ffd95435b168b940077481a..0877ef4dff6366bba1082ec3ee5c6c8964c6813a 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
index 59c3083c1ec17bbd91b0f204fd5891f62a2483a2..ee9d2bcd2c13d5b2641a2a2668f70dfd8bf66f3a 100644 (file)
@@ -13,6 +13,7 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_ring.h"
 
 static struct i915_global_context {
        struct i915_global base;
index dd742ac2fbdb743db6ef5b0ad776bc0b2137ca6d..68b3d317d959eefb341961bb8c64df174ff314be 100644 (file)
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_ring_types.h"
 #include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
index ee47444a6ad4b7a50f9ad1636b037bfaee7780d3..97bbdd9773c9c49426f58670b1660d6bcdfe5195 100644 (file)
@@ -19,7 +19,6 @@
 #include "intel_workarounds.h"
 
 struct drm_printer;
-
 struct intel_gt;
 
 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
@@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_HWS_CSB_WRITE_INDEX       0x1f
 #define CNL_HWS_CSB_WRITE_INDEX                0x2f
 
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
-       kref_get(&ring->ref);
-       return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
-       kref_put(&ring->ref, intel_ring_free);
-}
-
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
-       /* Dummy function.
-        *
-        * This serves as a placeholder in the code so that the reader
-        * can compare against the preceding intel_ring_begin() and
-        * check that the number of dwords emitted matches the space
-        * reserved for the command packet (i.e. the value passed to
-        * intel_ring_begin()).
-        */
-       GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
-       return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
-                       unsigned int pos)
-{
-       if (pos & -ring->size) /* must be strictly within the ring */
-               return false;
-
-       if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
-               return false;
-
-       return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
-       /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-       u32 offset = addr - rq->ring->vaddr;
-       GEM_BUG_ON(offset > rq->ring->size);
-       return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
-       GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
-       /*
-        * "Ring Buffer Use"
-        *      Gen2 BSpec "1. Programming Environment" / 1.4.4.6
-        *      Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
-        *      Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
-        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-        * same cacheline, the Head Pointer must not be greater than the Tail
-        * Pointer."
-        *
-        * We use ring->head as the last known location of the actual RING_HEAD,
-        * it may have advanced but in the worst case it is equally the same
-        * as ring->head and so we should never program RING_TAIL to advance
-        * into the same cacheline as ring->head.
-        */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
-       GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
-                  tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
-       /* Whilst writes to the tail are strictly order, there is no
-        * serialisation between readers and the writers. The tail may be
-        * read by i915_request_retire() just as it is being updated
-        * by execlists, as although the breadcrumb is complete, the context
-        * switch hasn't been seen.
-        */
-       assert_ring_tail_valid(ring, tail);
-       ring->tail = tail;
-       return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
-       /*
-        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-        * same cacheline, the Head Pointer must not be greater than the Tail
-        * Pointer."
-        */
-       GEM_BUG_ON(!is_power_of_2(size));
-       return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
 int intel_engines_init_mmio(struct intel_gt *gt);
 int intel_engines_setup(struct intel_gt *gt);
 int intel_engines_init(struct intel_gt *gt);
index 8b5129e0b49d0b1db3712a838cfccbb684d735c3..9cc1ea6519ecda70ea46bebc956b8deae608311d 100644 (file)
@@ -37,6 +37,7 @@
 #include "intel_context.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
index 6fbfa2162e54d261cc1584dcefaef3f113c95511..3c0f490ff2c7625adb42fbd6a603b86d41f88749 100644 (file)
@@ -13,6 +13,7 @@
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
+#include "intel_ring.h"
 
 static int __engine_unpark(struct intel_wakeref *wf)
 {
index 26998901feffe5de7594a26c02e62d88364c70f0..e8ea12b96755cbf34bb639db02f0896988b8765e 100644 (file)
@@ -59,6 +59,7 @@ struct i915_gem_context;
 struct i915_request;
 struct i915_sched_attr;
 struct intel_gt;
+struct intel_ring;
 struct intel_uncore;
 
 typedef u8 intel_engine_mask_t;
@@ -77,32 +78,6 @@ struct intel_instdone {
        u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_ring {
-       struct kref ref;
-       struct i915_vma *vma;
-       void *vaddr;
-
-       /*
-        * As we have two types of rings, one global to the engine used
-        * by ringbuffer submission and those that are exclusive to a
-        * context used by execlists, we have to play safe and allow
-        * atomic updates to the pin_count. However, the actual pinning
-        * of the context is either done during initialisation for
-        * ringbuffer submission or serialised as part of the context
-        * pinning for execlists, and so we do not need a mutex ourselves
-        * to serialise intel_ring_pin/intel_ring_unpin.
-        */
-       atomic_t pin_count;
-
-       u32 head;
-       u32 tail;
-       u32 emit;
-
-       u32 space;
-       u32 size;
-       u32 effective_size;
-};
-
 /*
  * we use a single page to load ctx workarounds so all of these
  * values are referred in terms of dwords
index 651d5dd39464f4a91e0817fc4006069bceb754b9..73eae85a2cc919acc723a7f9fef6dc1d78da864a 100644 (file)
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL            (1 << 0x2)
index 5bac3966906b00e3b3d2aa2407361ecb3847973c..932833e5b712c00664b1377c7dc385ff16ce2b09 100644 (file)
@@ -26,6 +26,7 @@
 #include "intel_gt.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
+#include "intel_ring.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
index 6d05f9c64178fee70355bc8be4c528dd6440a38d..c4edc35e7d8909cab9b32a622ee44b2340333ba8 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "intel_ring.h"
 
 struct intel_renderstate {
        const struct intel_renderstate_rodata *rodata;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644 (file)
index 0000000..fa01c14
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+       unsigned int space;
+
+       space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+       ring->space = space;
+       return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+       struct i915_vma *vma = ring->vma;
+       unsigned int flags;
+       void *addr;
+       int ret;
+
+       if (atomic_fetch_inc(&ring->pin_count))
+               return 0;
+
+       flags = PIN_GLOBAL;
+
+       /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+       flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+       if (vma->obj->stolen)
+               flags |= PIN_MAPPABLE;
+       else
+               flags |= PIN_HIGH;
+
+       ret = i915_vma_pin(vma, 0, 0, flags);
+       if (unlikely(ret))
+               goto err_unpin;
+
+       if (i915_vma_is_map_and_fenceable(vma))
+               addr = (void __force *)i915_vma_pin_iomap(vma);
+       else
+               addr = i915_gem_object_pin_map(vma->obj,
+                                              i915_coherent_map_type(vma->vm->i915));
+       if (IS_ERR(addr)) {
+               ret = PTR_ERR(addr);
+               goto err_ring;
+       }
+
+       i915_vma_make_unshrinkable(vma);
+
+       GEM_BUG_ON(ring->vaddr);
+       ring->vaddr = addr;
+
+       return 0;
+
+err_ring:
+       i915_vma_unpin(vma);
+err_unpin:
+       atomic_dec(&ring->pin_count);
+       return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+       tail = intel_ring_wrap(ring, tail);
+       ring->tail = tail;
+       ring->head = tail;
+       ring->emit = tail;
+       intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+       struct i915_vma *vma = ring->vma;
+
+       if (!atomic_dec_and_test(&ring->pin_count))
+               return;
+
+       /* Discard any unused bytes beyond that submitted to hw. */
+       intel_ring_reset(ring, ring->emit);
+
+       i915_vma_unset_ggtt_write(vma);
+       if (i915_vma_is_map_and_fenceable(vma))
+               i915_vma_unpin_iomap(vma);
+       else
+               i915_gem_object_unpin_map(vma->obj);
+
+       GEM_BUG_ON(!ring->vaddr);
+       ring->vaddr = NULL;
+
+       i915_vma_unpin(vma);
+       i915_vma_make_purgeable(vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+       struct i915_address_space *vm = &ggtt->vm;
+       struct drm_i915_private *i915 = vm->i915;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+
+       obj = i915_gem_object_create_stolen(i915, size);
+       if (IS_ERR(obj))
+               obj = i915_gem_object_create_internal(i915, size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       /*
+        * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+        * if supported by the platform's GGTT.
+        */
+       if (vm->has_read_only)
+               i915_gem_object_set_readonly(obj);
+
+       vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma))
+               goto err;
+
+       return vma;
+
+err:
+       i915_gem_object_put(obj);
+       return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+       struct drm_i915_private *i915 = engine->i915;
+       struct intel_ring *ring;
+       struct i915_vma *vma;
+
+       GEM_BUG_ON(!is_power_of_2(size));
+       GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+       if (!ring)
+               return ERR_PTR(-ENOMEM);
+
+       kref_init(&ring->ref);
+       ring->size = size;
+
+       /*
+        * Workaround an erratum on the i830 which causes a hang if
+        * the TAIL pointer points to within the last 2 cachelines
+        * of the buffer.
+        */
+       ring->effective_size = size;
+       if (IS_I830(i915) || IS_I845G(i915))
+               ring->effective_size -= 2 * CACHELINE_BYTES;
+
+       intel_ring_update_space(ring);
+
+       vma = create_ring_vma(engine->gt->ggtt, size);
+       if (IS_ERR(vma)) {
+               kfree(ring);
+               return ERR_CAST(vma);
+       }
+       ring->vma = vma;
+
+       return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+       struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+       i915_vma_put(ring->vma);
+       kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+              struct intel_timeline *tl,
+              unsigned int bytes)
+{
+       struct i915_request *target;
+       long timeout;
+
+       if (intel_ring_update_space(ring) >= bytes)
+               return 0;
+
+       GEM_BUG_ON(list_empty(&tl->requests));
+       list_for_each_entry(target, &tl->requests, link) {
+               if (target->ring != ring)
+                       continue;
+
+               /* Would completion of this request free enough space? */
+               if (bytes <= __intel_ring_space(target->postfix,
+                                               ring->emit, ring->size))
+                       break;
+       }
+
+       if (GEM_WARN_ON(&target->link == &tl->requests))
+               return -ENOSPC;
+
+       timeout = i915_request_wait(target,
+                                   I915_WAIT_INTERRUPTIBLE,
+                                   MAX_SCHEDULE_TIMEOUT);
+       if (timeout < 0)
+               return timeout;
+
+       i915_request_retire_upto(target);
+
+       intel_ring_update_space(ring);
+       GEM_BUG_ON(ring->space < bytes);
+       return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+       struct intel_ring *ring = rq->ring;
+       const unsigned int remain_usable = ring->effective_size - ring->emit;
+       const unsigned int bytes = num_dwords * sizeof(u32);
+       unsigned int need_wrap = 0;
+       unsigned int total_bytes;
+       u32 *cs;
+
+       /* Packets must be qword aligned. */
+       GEM_BUG_ON(num_dwords & 1);
+
+       total_bytes = bytes + rq->reserved_space;
+       GEM_BUG_ON(total_bytes > ring->effective_size);
+
+       if (unlikely(total_bytes > remain_usable)) {
+               const int remain_actual = ring->size - ring->emit;
+
+               if (bytes > remain_usable) {
+                       /*
+                        * Not enough space for the basic request. So need to
+                        * flush out the remainder and then wait for
+                        * base + reserved.
+                        */
+                       total_bytes += remain_actual;
+                       need_wrap = remain_actual | 1;
+               } else  {
+                       /*
+                        * The base request will fit but the reserved space
+                        * falls off the end. So we don't need an immediate
+                        * wrap and only need to effectively wait for the
+                        * reserved size from the start of ringbuffer.
+                        */
+                       total_bytes = rq->reserved_space + remain_actual;
+               }
+       }
+
+       if (unlikely(total_bytes > ring->space)) {
+               int ret;
+
+               /*
+                * Space is reserved in the ringbuffer for finalising the
+                * request, as that cannot be allowed to fail. During request
+                * finalisation, reserved_space is set to 0 to stop the
+                * overallocation and the assumption is that then we never need
+                * to wait (which has the risk of failing with EINTR).
+                *
+                * See also i915_request_alloc() and i915_request_add().
+                */
+               GEM_BUG_ON(!rq->reserved_space);
+
+               ret = wait_for_space(ring,
+                                    i915_request_timeline(rq),
+                                    total_bytes);
+               if (unlikely(ret))
+                       return ERR_PTR(ret);
+       }
+
+       if (unlikely(need_wrap)) {
+               need_wrap &= ~1;
+               GEM_BUG_ON(need_wrap > ring->space);
+               GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+               GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+               /* Fill the tail with MI_NOOP */
+               memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+               ring->space -= need_wrap;
+               ring->emit = 0;
+       }
+
+       GEM_BUG_ON(ring->emit > ring->size - bytes);
+       GEM_BUG_ON(ring->space < bytes);
+       cs = ring->vaddr + ring->emit;
+       GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+       ring->emit += bytes;
+       ring->space -= bytes;
+
+       return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+       int num_dwords;
+       void *cs;
+
+       num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+       if (num_dwords == 0)
+               return 0;
+
+       num_dwords = CACHELINE_DWORDS - num_dwords;
+       GEM_BUG_ON(num_dwords & 1);
+
+       cs = intel_ring_begin(rq, num_dwords);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+       intel_ring_advance(rq, cs + num_dwords);
+
+       GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644 (file)
index 0000000..ea2839d
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+       kref_get(&ring->ref);
+       return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+       kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+       /* Dummy function.
+        *
+        * This serves as a placeholder in the code so that the reader
+        * can compare against the preceding intel_ring_begin() and
+        * check that the number of dwords emitted matches the space
+        * reserved for the command packet (i.e. the value passed to
+        * intel_ring_begin()).
+        */
+       GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+       return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+                       unsigned int pos)
+{
+       if (pos & -ring->size) /* must be strictly within the ring */
+               return false;
+
+       if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+               return false;
+
+       return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+       /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+       u32 offset = addr - rq->ring->vaddr;
+       GEM_BUG_ON(offset > rq->ring->size);
+       return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+       GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+       /*
+        * "Ring Buffer Use"
+        *      Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+        *      Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+        *      Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+        * same cacheline, the Head Pointer must not be greater than the Tail
+        * Pointer."
+        *
+        * We use ring->head as the last known location of the actual RING_HEAD,
+        * it may have advanced but in the worst case it is equally the same
+        * as ring->head and so we should never program RING_TAIL to advance
+        * into the same cacheline as ring->head.
+        */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+       GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+                  tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+       /* Whilst writes to the tail are strictly order, there is no
+        * serialisation between readers and the writers. The tail may be
+        * read by i915_request_retire() just as it is being updated
+        * by execlists, as although the breadcrumb is complete, the context
+        * switch hasn't been seen.
+        */
+       assert_ring_tail_valid(ring, tail);
+       ring->tail = tail;
+       return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+       /*
+        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+        * same cacheline, the Head Pointer must not be greater than the Tail
+        * Pointer."
+        */
+       GEM_BUG_ON(!is_power_of_2(size));
+       return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
similarity index 88%
rename from drivers/gpu/drm/i915/gt/intel_ringbuffer.c
rename to drivers/gpu/drm/i915/gt/intel_ring_submission.c
index bf631f15aa781a36de3a106444662c634b8f2903..a47d5a7c32c9633f5e1a839b1ee68a5107579055 100644 (file)
@@ -40,6 +40,7 @@
 #include "intel_gt_irq.h"
 #include "intel_gt_pm_irq.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 /* Rough estimate of the typical request size, performing a flush,
  */
 #define LEGACY_REQUEST_SIZE 200
 
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
-       unsigned int space;
-
-       space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
-       ring->space = space;
-       return space;
-}
-
 static int
 gen2_render_ring_flush(struct i915_request *rq, u32 mode)
 {
@@ -1186,162 +1177,6 @@ i915_emit_bb_start(struct i915_request *rq,
        return 0;
 }
 
-int intel_ring_pin(struct intel_ring *ring)
-{
-       struct i915_vma *vma = ring->vma;
-       unsigned int flags;
-       void *addr;
-       int ret;
-
-       if (atomic_fetch_inc(&ring->pin_count))
-               return 0;
-
-       flags = PIN_GLOBAL;
-
-       /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-       flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-       if (vma->obj->stolen)
-               flags |= PIN_MAPPABLE;
-       else
-               flags |= PIN_HIGH;
-
-       ret = i915_vma_pin(vma, 0, 0, flags);
-       if (unlikely(ret))
-               goto err_unpin;
-
-       if (i915_vma_is_map_and_fenceable(vma))
-               addr = (void __force *)i915_vma_pin_iomap(vma);
-       else
-               addr = i915_gem_object_pin_map(vma->obj,
-                                              i915_coherent_map_type(vma->vm->i915));
-       if (IS_ERR(addr)) {
-               ret = PTR_ERR(addr);
-               goto err_ring;
-       }
-
-       i915_vma_make_unshrinkable(vma);
-
-       GEM_BUG_ON(ring->vaddr);
-       ring->vaddr = addr;
-
-       return 0;
-
-err_ring:
-       i915_vma_unpin(vma);
-err_unpin:
-       atomic_dec(&ring->pin_count);
-       return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
-       tail = intel_ring_wrap(ring, tail);
-       ring->tail = tail;
-       ring->head = tail;
-       ring->emit = tail;
-       intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
-       struct i915_vma *vma = ring->vma;
-
-       if (!atomic_dec_and_test(&ring->pin_count))
-               return;
-
-       /* Discard any unused bytes beyond that submitted to hw. */
-       intel_ring_reset(ring, ring->emit);
-
-       i915_vma_unset_ggtt_write(vma);
-       if (i915_vma_is_map_and_fenceable(vma))
-               i915_vma_unpin_iomap(vma);
-       else
-               i915_gem_object_unpin_map(vma->obj);
-
-       GEM_BUG_ON(!ring->vaddr);
-       ring->vaddr = NULL;
-
-       i915_vma_unpin(vma);
-       i915_vma_make_purgeable(vma);
-}
-
-static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
-{
-       struct i915_address_space *vm = &ggtt->vm;
-       struct drm_i915_private *i915 = vm->i915;
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-
-       obj = i915_gem_object_create_stolen(i915, size);
-       if (IS_ERR(obj))
-               obj = i915_gem_object_create_internal(i915, size);
-       if (IS_ERR(obj))
-               return ERR_CAST(obj);
-
-       /*
-        * Mark ring buffers as read-only from GPU side (so no stray overwrites)
-        * if supported by the platform's GGTT.
-        */
-       if (vm->has_read_only)
-               i915_gem_object_set_readonly(obj);
-
-       vma = i915_vma_instance(obj, vm, NULL);
-       if (IS_ERR(vma))
-               goto err;
-
-       return vma;
-
-err:
-       i915_gem_object_put(obj);
-       return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
-{
-       struct drm_i915_private *i915 = engine->i915;
-       struct intel_ring *ring;
-       struct i915_vma *vma;
-
-       GEM_BUG_ON(!is_power_of_2(size));
-       GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-
-       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-       if (!ring)
-               return ERR_PTR(-ENOMEM);
-
-       kref_init(&ring->ref);
-
-       ring->size = size;
-       /* Workaround an erratum on the i830 which causes a hang if
-        * the TAIL pointer points to within the last 2 cachelines
-        * of the buffer.
-        */
-       ring->effective_size = size;
-       if (IS_I830(i915) || IS_I845G(i915))
-               ring->effective_size -= 2 * CACHELINE_BYTES;
-
-       intel_ring_update_space(ring);
-
-       vma = create_ring_vma(engine->gt->ggtt, size);
-       if (IS_ERR(vma)) {
-               kfree(ring);
-               return ERR_CAST(vma);
-       }
-       ring->vma = vma;
-
-       return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
-       struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-
-       i915_vma_put(ring->vma);
-       kfree(ring);
-}
-
 static void __ring_context_fini(struct intel_context *ce)
 {
        i915_vma_put(ce->state);
@@ -1836,148 +1671,6 @@ static int ring_request_alloc(struct i915_request *request)
        return 0;
 }
 
-static noinline int
-wait_for_space(struct intel_ring *ring,
-              struct intel_timeline *tl,
-              unsigned int bytes)
-{
-       struct i915_request *target;
-       long timeout;
-
-       if (intel_ring_update_space(ring) >= bytes)
-               return 0;
-
-       GEM_BUG_ON(list_empty(&tl->requests));
-       list_for_each_entry(target, &tl->requests, link) {
-               if (target->ring != ring)
-                       continue;
-
-               /* Would completion of this request free enough space? */
-               if (bytes <= __intel_ring_space(target->postfix,
-                                               ring->emit, ring->size))
-                       break;
-       }
-
-       if (GEM_WARN_ON(&target->link == &tl->requests))
-               return -ENOSPC;
-
-       timeout = i915_request_wait(target,
-                                   I915_WAIT_INTERRUPTIBLE,
-                                   MAX_SCHEDULE_TIMEOUT);
-       if (timeout < 0)
-               return timeout;
-
-       i915_request_retire_upto(target);
-
-       intel_ring_update_space(ring);
-       GEM_BUG_ON(ring->space < bytes);
-       return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
-       struct intel_ring *ring = rq->ring;
-       const unsigned int remain_usable = ring->effective_size - ring->emit;
-       const unsigned int bytes = num_dwords * sizeof(u32);
-       unsigned int need_wrap = 0;
-       unsigned int total_bytes;
-       u32 *cs;
-
-       /* Packets must be qword aligned. */
-       GEM_BUG_ON(num_dwords & 1);
-
-       total_bytes = bytes + rq->reserved_space;
-       GEM_BUG_ON(total_bytes > ring->effective_size);
-
-       if (unlikely(total_bytes > remain_usable)) {
-               const int remain_actual = ring->size - ring->emit;
-
-               if (bytes > remain_usable) {
-                       /*
-                        * Not enough space for the basic request. So need to
-                        * flush out the remainder and then wait for
-                        * base + reserved.
-                        */
-                       total_bytes += remain_actual;
-                       need_wrap = remain_actual | 1;
-               } else  {
-                       /*
-                        * The base request will fit but the reserved space
-                        * falls off the end. So we don't need an immediate
-                        * wrap and only need to effectively wait for the
-                        * reserved size from the start of ringbuffer.
-                        */
-                       total_bytes = rq->reserved_space + remain_actual;
-               }
-       }
-
-       if (unlikely(total_bytes > ring->space)) {
-               int ret;
-
-               /*
-                * Space is reserved in the ringbuffer for finalising the
-                * request, as that cannot be allowed to fail. During request
-                * finalisation, reserved_space is set to 0 to stop the
-                * overallocation and the assumption is that then we never need
-                * to wait (which has the risk of failing with EINTR).
-                *
-                * See also i915_request_alloc() and i915_request_add().
-                */
-               GEM_BUG_ON(!rq->reserved_space);
-
-               ret = wait_for_space(ring,
-                                    i915_request_timeline(rq),
-                                    total_bytes);
-               if (unlikely(ret))
-                       return ERR_PTR(ret);
-       }
-
-       if (unlikely(need_wrap)) {
-               need_wrap &= ~1;
-               GEM_BUG_ON(need_wrap > ring->space);
-               GEM_BUG_ON(ring->emit + need_wrap > ring->size);
-               GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
-               /* Fill the tail with MI_NOOP */
-               memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
-               ring->space -= need_wrap;
-               ring->emit = 0;
-       }
-
-       GEM_BUG_ON(ring->emit > ring->size - bytes);
-       GEM_BUG_ON(ring->space < bytes);
-       cs = ring->vaddr + ring->emit;
-       GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
-       ring->emit += bytes;
-       ring->space -= bytes;
-
-       return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
-       int num_dwords;
-       void *cs;
-
-       num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
-       if (num_dwords == 0)
-               return 0;
-
-       num_dwords = CACHELINE_DWORDS - num_dwords;
-       GEM_BUG_ON(num_dwords & 1);
-
-       cs = intel_ring_begin(rq, num_dwords);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
-       intel_ring_advance(rq, cs);
-
-       GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
-       return 0;
-}
-
 static void gen6_bsd_submit_request(struct i915_request *request)
 {
        struct intel_uncore *uncore = request->engine->uncore;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
new file mode 100644 (file)
index 0000000..d9f17f3
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_TYPES_H
+#define INTEL_RING_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+/*
+ * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+struct i915_vma;
+
+struct intel_ring {
+       struct kref ref;
+       struct i915_vma *vma;
+       void *vaddr;
+
+       /*
+        * As we have two types of rings, one global to the engine used
+        * by ringbuffer submission and those that are exclusive to a
+        * context used by execlists, we have to play safe and allow
+        * atomic updates to the pin_count. However, the actual pinning
+        * of the context is either done during initialisation for
+        * ringbuffer submission or serialised as part of the context
+        * pinning for execlists, and so we do not need a mutex ourselves
+        * to serialise intel_ring_pin/intel_ring_unpin.
+        */
+       atomic_t pin_count;
+
+       u32 head;
+       u32 tail;
+       u32 emit;
+
+       u32 space;
+       u32 size;
+       u32 effective_size;
+};
+
+#endif /* INTEL_RING_TYPES_H */
index 0f959694303c86bbf45713848e3e72778c73ed43..14ad10acd548819dcdcdeebc8a965af9decbae3c 100644 (file)
@@ -4,13 +4,13 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "gt/intel_gt_types.h"
-
 #include "i915_drv.h"
 
 #include "i915_active.h"
 #include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
 
 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
index af8a8183154aedc992446357b612d720ca14bab3..7cb6dab4399d857a55465750047d58f7cd1770a1 100644 (file)
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 /**
index 123db2c3f9564ac2198d8fa221e3dcc487775b24..83f549d203a0d0ef75678edcdc2fc6ba85672f20 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "intel_context.h"
index dac86f699a4cf2061ba54de066ffce879d7536fd..f04a59fe5d2cdd632ea11afacd72c514eb5478ee 100644 (file)
@@ -9,6 +9,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
+#include "intel_ring.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
index 009e54a3764f9fc8114069d121791ebda8e25d26..1b1691aaed2881aabaa9e19b7c46574e9fd6f81e 100644 (file)
@@ -6,12 +6,13 @@
 #include <linux/circ_buf.h>
 
 #include "gem/i915_gem_context.h"
-
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
index e753b1e706e23543efb398587da51b8b489327ac..6a3ac8cde95d667ee20540044a351d88c5b83d88 100644 (file)
@@ -35,7 +35,9 @@
  */
 
 #include <linux/slab.h>
+
 #include "i915_drv.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
 #include "trace.h"
index 4208e40445b138fe2ec5b2bb0ec109315e32c394..aaf15916d29a6507c1ee416b3fc122a524d976d5 100644 (file)
@@ -35,6 +35,7 @@
 
 #include "i915_drv.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "trace.h"
 
index a5b942ee3ceb98f95ba8c27a9ead921de8b807c1..377811f8853fad918388a6705b85add787e6ac43 100644 (file)
@@ -38,6 +38,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "gvt.h"
index 07d39f22a2c3a4ced1d5cd40b81e3d321f86eaa0..207383dda84db329a2ba43b0edce6691858002fd 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/debugobjects.h>
 
 #include "gt/intel_engine_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_active.h"
index 3130b0c7ed83ecf7e177123ccfa066d9e3318556..38d3de2dfaa6b37f74e4729a533d6fedb0ecd99d 100644 (file)
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_perf.h"
index 4575f368455d581589d594d6346efc6da6b7d83e..932c5cf190b559ce5801d1cb9fc02b8e7c4f2046 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_active.h"
 #include "i915_drv.h"