]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
drm/i915: Protect the request->global_seqno with the engine->timeline lock
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 23 Feb 2017 07:44:14 +0000 (07:44 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 23 Feb 2017 14:49:32 +0000 (14:49 +0000)
A request is assigned a global seqno only when it is on the hardware
execution queue. The global seqno can be used to maintain a list of
requests on the same engine in retirement order, for example for
constructing a priority queue for waiting. Prior to its execution, or
if it is subsequently removed in the event of preemption, its global
seqno is zero. As both insertion and removal from the execution queue
may operate in IRQ context, it is not guarded by the usual struct_mutex
BKL. Instead those relying on the global seqno must be prepared for its
value to change between reads. Only when the request is complete can
the global seqno be stable (due to the memory barriers on submitting
the commands to the hardware to write the breadcrumb, if the HWS shows
that it has passed the global seqno and the global seqno is unchanged
after the read, it is indeed complete).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-9-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_gem_request.h
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c

index 9220a28fbaa87692816a0e95a01a16bf43dc3025..e5b34eaf41ca5a5fdc24e21e7f7f12ce810f7dea 100644 (file)
@@ -4009,9 +4009,10 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 }
 
 static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
 {
        struct intel_engine_cs *engine = req->engine;
+       u32 seqno;
 
        /* Note that the engine may have wrapped around the seqno, and
         * so our request->global_seqno will be ahead of the hardware,
@@ -4022,10 +4023,20 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags))
                return true;
 
+       /* The request was dequeued before we were awoken. We check after
+        * inspecting the hw to confirm that this was the same request
+        * that generated the HWS update. The memory barriers within
+        * the request execution are sufficient to ensure that a check
+        * after reading the value from hw matches this request.
+        */
+       seqno = i915_gem_request_global_seqno(req);
+       if (!seqno)
+               return false;
+
        /* Before we do the heavier coherent read of the seqno,
         * check the value (hopefully) in the CPU cacheline.
         */
-       if (__i915_gem_request_completed(req))
+       if (__i915_gem_request_completed(req, seqno))
                return true;
 
        /* Ensure our read of the seqno is coherent so that we
@@ -4076,7 +4087,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
                        wake_up_process(tsk);
                rcu_read_unlock();
 
-               if (__i915_gem_request_completed(req))
+               if (__i915_gem_request_completed(req, seqno))
                        return true;
        }
 
index e29b6f0dda29da4ded8e72ff73fb07aeb11b4811..a8167003c10b14caf7453f94e48afd2f13f11809 100644 (file)
@@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
        if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
                i915_gem_request_retire_upto(rq);
 
-       if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+       if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
                /* The GPU is now idle and this client has stalled.
                 * Since no other client has submitted a request in the
                 * meantime, assume that this client is the only one
@@ -2608,7 +2608,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
-       struct drm_i915_gem_request *request;
+       struct drm_i915_gem_request *request, *active = NULL;
+       unsigned long flags;
 
        /* We are called by the error capture and reset at a random
         * point in time. In particular, note that neither is crucially
@@ -2618,17 +2619,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
         * extra delay for a recent interrupt is pointless. Hence, we do
         * not need an engine->irq_seqno_barrier() before the seqno reads.
         */
+       spin_lock_irqsave(&engine->timeline->lock, flags);
        list_for_each_entry(request, &engine->timeline->requests, link) {
-               if (__i915_gem_request_completed(request))
+               if (__i915_gem_request_completed(request,
+                                                request->global_seqno))
                        continue;
 
                GEM_BUG_ON(request->engine != engine);
                GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
                                    &request->fence.flags));
-               return request;
+
+               active = request;
+               break;
        }
+       spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
-       return NULL;
+       return active;
 }
 
 static bool engine_stalled(struct intel_engine_cs *engine)
index 37ef7084fce5ee34933391053ef3d7d51a7cfb23..d394cbe8de11419c3fef3e5e1b464d9c050f6183 100644 (file)
@@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
                intel_engine_enable_signaling(request);
        spin_unlock(&request->lock);
 
-       GEM_BUG_ON(!request->global_seqno);
        engine->emit_breadcrumb(request,
                                request->ring->vaddr + request->postfix);
 
@@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
        /* Move the oldest request to the slab-cache (if not in use!) */
        req = list_first_entry_or_null(&engine->timeline->requests,
                                       typeof(*req), link);
-       if (req && __i915_gem_request_completed(req))
+       if (req && i915_gem_request_completed(req))
                i915_gem_request_retire(req);
 
        /* Beware: Dragons be flying overhead.
@@ -611,6 +610,7 @@ static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
                               struct drm_i915_gem_request *from)
 {
+       u32 seqno;
        int ret;
 
        GEM_BUG_ON(to == from);
@@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
                return ret < 0 ? ret : 0;
        }
 
-       if (!from->global_seqno) {
+       seqno = i915_gem_request_global_seqno(from);
+       if (!seqno) {
                ret = i915_sw_fence_await_dma_fence(&to->submit,
                                                    &from->fence, 0,
                                                    GFP_KERNEL);
                return ret < 0 ? ret : 0;
        }
 
-       if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
+       if (seqno <= to->timeline->sync_seqno[from->engine->id])
                return 0;
 
        trace_i915_gem_ring_sync_to(to, from);
@@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
                        return ret;
        }
 
-       to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+       to->timeline->sync_seqno[from->engine->id] = seqno;
        return 0;
 }
 
@@ -938,7 +939,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *req,
-                        int state, unsigned long timeout_us)
+                        u32 seqno, int state, unsigned long timeout_us)
 {
        struct intel_engine_cs *engine = req->engine;
        unsigned int irq, cpu;
@@ -956,7 +957,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
        irq = atomic_read(&engine->irq_count);
        timeout_us += local_clock_us(&cpu);
        do {
-               if (__i915_gem_request_completed(req))
+               if (seqno != i915_gem_request_global_seqno(req))
+                       break;
+
+               if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
+                                     seqno))
                        return true;
 
                /* Seqno are meant to be ordered *before* the interrupt. If
@@ -1028,11 +1033,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
        if (flags & I915_WAIT_LOCKED)
                add_wait_queue(errq, &reset);
 
+       intel_wait_init(&wait);
+
        reset_wait_queue(&req->execute, &exec);
-       if (!req->global_seqno) {
+       if (!intel_wait_update_request(&wait, req)) {
                do {
                        set_current_state(state);
-                       if (req->global_seqno)
+
+                       if (intel_wait_update_request(&wait, req))
                                break;
 
                        if (flags & I915_WAIT_LOCKED &&
@@ -1060,7 +1068,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
                if (timeout < 0)
                        goto complete;
 
-               GEM_BUG_ON(!req->global_seqno);
+               GEM_BUG_ON(!intel_wait_has_seqno(&wait));
        }
        GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
@@ -1069,7 +1077,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
                goto complete;
 
        set_current_state(state);
-       intel_wait_init(&wait, req->global_seqno);
        if (intel_engine_add_wait(req->engine, &wait))
                /* In order to check that we haven't missed the interrupt
                 * as we enabled it, we need to kick ourselves to do a
@@ -1090,7 +1097,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
                timeout = io_schedule_timeout(timeout);
 
-               if (intel_wait_complete(&wait))
+               if (intel_wait_complete(&wait) &&
+                   intel_wait_check_request(&wait, req))
                        break;
 
                set_current_state(state);
@@ -1141,14 +1149,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 static void engine_retire_requests(struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_request *request, *next;
+       u32 seqno = intel_engine_get_seqno(engine);
+       LIST_HEAD(retire);
 
+       spin_lock_irq(&engine->timeline->lock);
        list_for_each_entry_safe(request, next,
                                 &engine->timeline->requests, link) {
-               if (!__i915_gem_request_completed(request))
-                       return;
+               if (!i915_seqno_passed(seqno, request->global_seqno))
+                       break;
 
-               i915_gem_request_retire(request);
+               list_move_tail(&request->link, &retire);
        }
+       spin_unlock_irq(&engine->timeline->lock);
+
+       list_for_each_entry_safe(request, next, &retire, link)
+               i915_gem_request_retire(request);
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
index 467d3e13fce04066ebfe54269c1e7e1af371bf7f..b81f6709905c2646e1f92a0582eada3e39859d18 100644 (file)
@@ -135,6 +135,11 @@ struct drm_i915_gem_request {
        struct i915_priotree priotree;
        struct i915_dependency dep;
 
+       /** GEM sequence number associated with this request on the
+        * global execution timeline. It is zero when the request is not
+        * on the HW queue (i.e. not on the engine timeline list).
+        * Its value is guarded by the timeline spinlock.
+        */
        u32 global_seqno;
 
        /** Position in the ring of the start of the request */
@@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
        *pdst = src;
 }
 
+/**
+ * i915_gem_request_global_seqno - report the current global seqno
+ * @request - the request
+ *
+ * A request is assigned a global seqno only when it is on the hardware
+ * execution queue. The global seqno can be used to maintain a list of
+ * requests on the same engine in retirement order, for example for
+ * constructing a priority queue for waiting. Prior to its execution, or
+ * if it is subsequently removed in the event of preemption, its global
+ * seqno is zero. As both insertion and removal from the execution queue
+ * may operate in IRQ context, it is not guarded by the usual struct_mutex
+ * BKL. Instead those relying on the global seqno must be prepared for its
+ * value to change between reads. Only when the request is complete can
+ * the global seqno be stable (due to the memory barriers on submitting
+ * the commands to the hardware to write the breadcrumb, if the HWS shows
+ * that it has passed the global seqno and the global seqno is unchanged
+ * after the read, it is indeed complete).
+ */
+static u32
+i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
+{
+       return READ_ONCE(request->global_seqno);
+}
+
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
                              struct drm_i915_gem_object *obj,
@@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-__i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
 {
-       GEM_BUG_ON(!req->global_seqno);
+       GEM_BUG_ON(!seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-                                req->global_seqno - 1);
+                                seqno - 1);
 }
 
 static inline bool
 i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-       if (!req->global_seqno)
+       u32 seqno;
+
+       seqno = i915_gem_request_global_seqno(req);
+       if (!seqno)
                return false;
 
-       return __i915_gem_request_started(req);
+       return __i915_gem_request_started(req, seqno);
 }
 
 static inline bool
-__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
 {
-       GEM_BUG_ON(!req->global_seqno);
-       return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-                                req->global_seqno);
+       GEM_BUG_ON(!seqno);
+       return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
+               seqno == i915_gem_request_global_seqno(req);
 }
 
 static inline bool
 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-       if (!req->global_seqno)
+       u32 seqno;
+
+       seqno = i915_gem_request_global_seqno(req);
+       if (!seqno)
                return false;
 
-       return __i915_gem_request_completed(req);
+       return __i915_gem_request_completed(req, seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
-                        int state, unsigned long timeout_us);
+                        u32 seqno, int state, unsigned long timeout_us);
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
                                     int state, unsigned long timeout_us)
 {
-       return (__i915_gem_request_started(request) &&
-               __i915_spin_request(request, state, timeout_us));
+       u32 seqno;
+
+       seqno = i915_gem_request_global_seqno(request);
+       return (__i915_gem_request_started(request, seqno) &&
+               __i915_spin_request(request, seqno, state, timeout_us));
 }
 
 /* We treat requests as fences. This is not be to confused with our
index 4f4e703d1b147e7a0b7869e1cb5928da52deb645..d5bf4c0b2debb63750cdb0545724a67bb2af7fa6 100644 (file)
@@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
        struct rb_node *parent, **p;
        bool first, wakeup;
+       u32 seqno;
 
        /* Note that we may be called from an interrupt handler on another
         * device (e.g. nouveau signaling a fence completion causing us
@@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
        /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
        assert_spin_locked(&request->lock);
-       if (!request->global_seqno)
+
+       seqno = i915_gem_request_global_seqno(request);
+       if (!seqno)
                return;
 
        request->signaling.wait.tsk = b->signaler;
-       request->signaling.wait.seqno = request->global_seqno;
+       request->signaling.wait.seqno = seqno;
        i915_gem_request_get(request);
 
        spin_lock(&b->lock);
@@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
        p = &b->signals.rb_node;
        while (*p) {
                parent = *p;
-               if (i915_seqno_passed(request->global_seqno,
-                                     to_signaler(parent)->global_seqno)) {
+               if (i915_seqno_passed(seqno,
+                                     to_signaler(parent)->signaling.wait.seqno)) {
                        p = &parent->rb_right;
                        first = false;
                } else {
index b91c2c7ef5a625a6d178c9f2a7b252818f9122df..04e7d5a9ee3ab791060e4755f746dcb4a3a0d2a6 100644 (file)
@@ -582,10 +582,47 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
+static inline void intel_wait_init(struct intel_wait *wait)
 {
        wait->tsk = current;
+}
+
+static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
+{
+       wait->tsk = current;
+       wait->seqno = seqno;
+}
+
+static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
+{
+       return wait->seqno;
+}
+
+static inline bool
+intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
+{
        wait->seqno = seqno;
+       return intel_wait_has_seqno(wait);
+}
+
+static inline bool
+intel_wait_update_request(struct intel_wait *wait,
+                         const struct drm_i915_gem_request *rq)
+{
+       return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
+}
+
+static inline bool
+intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
+{
+       return wait->seqno == seqno;
+}
+
+static inline bool
+intel_wait_check_request(const struct intel_wait *wait,
+                        const struct drm_i915_gem_request *rq)
+{
+       return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
 }
 
 static inline bool intel_wait_complete(const struct intel_wait *wait)
index 6426acc9fdca1062e8cff36061cc1296c5d1e3fe..621be1ca53d8f840ccc6dd2ad0ad664ef145f8f8 100644 (file)
@@ -131,7 +131,7 @@ static int igt_random_insert_remove(void *arg)
                goto out_bitmap;
 
        for (n = 0; n < count; n++)
-               intel_wait_init(&waiters[n], seqno_bias + n);
+               intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
 
        err = check_rbtree(engine, bitmap, waiters, count);
        if (err)
@@ -197,7 +197,7 @@ static int igt_insert_complete(void *arg)
                goto out_waiters;
 
        for (n = 0; n < count; n++) {
-               intel_wait_init(&waiters[n], n + seqno_bias);
+               intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
                intel_engine_add_wait(engine, &waiters[n]);
                __set_bit(n, bitmap);
        }
@@ -318,7 +318,7 @@ static int igt_wakeup_thread(void *arg)
        while (wait_for_ready(w)) {
                GEM_BUG_ON(kthread_should_stop());
 
-               intel_wait_init(&wait, w->seqno);
+               intel_wait_init_for_seqno(&wait, w->seqno);
                intel_engine_add_wait(w->engine, &wait);
                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE);