]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/i915/gt/intel_lrc.c
Merge tag 'amlogic-fixes' of https://git.kernel.org/pub/scm/linux/kernel/git/khilman...
[linux.git] / drivers / gpu / drm / i915 / gt / intel_lrc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Ben Widawsky <ben@bwidawsk.net>
25  *    Michel Thierry <michel.thierry@intel.com>
26  *    Thomas Daniel <thomas.daniel@intel.com>
27  *    Oscar Mateo <oscar.mateo@intel.com>
28  *
29  */
30
31 /**
32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
33  *
34  * Motivation:
35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36  * These expanded contexts enable a number of new abilities, especially
37  * "Execlists" (also implemented in this file).
38  *
39  * One of the main differences with the legacy HW contexts is that logical
40  * ring contexts incorporate many more things to the context's state, like
41  * PDPs or ringbuffer control registers:
42  *
43  * The reason why PDPs are included in the context is straightforward: as
44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46  * instead, the GPU will do it for you on the context switch.
47  *
48  * But, what about the ringbuffer control registers (head, tail, etc..)?
49  * shouldn't we just need a set of those per engine command streamer? This is
50  * where the name "Logical Rings" starts to make sense: by virtualizing the
51  * rings, the engine cs shifts to a new "ring buffer" with every context
52  * switch. When you want to submit a workload to the GPU you: A) choose your
53  * context, B) find its appropriate virtualized ring, C) write commands to it
54  * and then, finally, D) tell the GPU to switch to that context.
55  *
56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57  * to a contexts is via a context execution list, ergo "Execlists".
58  *
59  * LRC implementation:
60  * Regarding the creation of contexts, we have:
61  *
62  * - One global default context.
63  * - One local default context for each opened fd.
64  * - One local extra context for each context create ioctl call.
65  *
66  * Now that ringbuffers belong per-context (and not per-engine, like before)
67  * and that contexts are uniquely tied to a given engine (and not reusable,
68  * like before) we need:
69  *
70  * - One ringbuffer per-engine inside each context.
71  * - One backing object per-engine inside each context.
72  *
73  * The global default context starts its life with these new objects fully
74  * allocated and populated. The local default context for each opened fd is
75  * more complex, because we don't know at creation time which engine is going
76  * to use them. To handle this, we have implemented a deferred creation of LR
77  * contexts:
78  *
79  * The local context starts its life as a hollow or blank holder, that only
80  * gets populated for a given engine once we receive an execbuffer. If later
81  * on we receive another execbuffer ioctl for the same context but a different
82  * engine, we allocate/populate a new ringbuffer and context backing object and
83  * so on.
84  *
85  * Finally, regarding local contexts created using the ioctl call: as they are
86  * only allowed with the render ring, we can allocate & populate them right
87  * away (no need to defer anything, at least for now).
88  *
89  * Execlists implementation:
90  * Execlists are the new method by which, on gen8+ hardware, workloads are
91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92  * This method works as follows:
93  *
94  * When a request is committed, its commands (the BB start and any leading or
95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96  * for the appropriate context. The tail pointer in the hardware context is not
97  * updated at this time, but instead, kept by the driver in the ringbuffer
98  * structure. A structure representing this request is added to a request queue
99  * for the appropriate engine: this structure contains a copy of the context's
100  * tail after the request was written to the ring buffer and a pointer to the
101  * context itself.
102  *
103  * If the engine's request queue was empty before the request was added, the
104  * queue is processed immediately. Otherwise the queue will be processed during
105  * a context switch interrupt. In any case, elements on the queue will get sent
106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107  * globally unique 20-bits submission ID.
108  *
109  * When execution of a request completes, the GPU updates the context status
110  * buffer with a context complete event and generates a context switch interrupt.
111  * During the interrupt handling, the driver examines the events in the buffer:
112  * for each context complete event, if the announced ID matches that on the head
113  * of the request queue, then that request is retired and removed from the queue.
114  *
115  * After processing, if any requests were retired and the queue is not empty
116  * then a new execution list can be submitted. The two requests at the front of
117  * the queue are next to be submitted but since a context may not occur twice in
118  * an execution list, if subsequent requests have the same ID as the first then
119  * the two requests must be combined. This is done simply by discarding requests
120  * at the head of the queue until either only one requests is left (in which case
121  * we use a NULL second context) or the first two requests have unique IDs.
122  *
123  * By always executing the first two requests in the queue the driver ensures
124  * that the GPU is kept as busy as possible. In the case where a single context
125  * completes but a second context is still executing, the request for this second
126  * context will be at the head of the queue when we remove the first one. This
127  * request will then be resubmitted along with a new request for a different context,
128  * which will cause the hardware to continue executing the second request and queue
129  * the new request (the GPU detects the condition of a context getting preempted
130  * with the same context and optimizes the context switch flow by not doing
131  * preemption, but just sampling the new tail pointer).
132  *
133  */
134 #include <linux/interrupt.h>
135
136 #include "i915_drv.h"
137 #include "i915_perf.h"
138 #include "i915_trace.h"
139 #include "i915_vgpu.h"
140 #include "intel_context.h"
141 #include "intel_engine_pm.h"
142 #include "intel_gt.h"
143 #include "intel_gt_pm.h"
144 #include "intel_gt_requests.h"
145 #include "intel_lrc_reg.h"
146 #include "intel_mocs.h"
147 #include "intel_reset.h"
148 #include "intel_ring.h"
149 #include "intel_workarounds.h"
150
151 #define RING_EXECLIST_QFULL             (1 << 0x2)
152 #define RING_EXECLIST1_VALID            (1 << 0x3)
153 #define RING_EXECLIST0_VALID            (1 << 0x4)
154 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
155 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
156 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
157
158 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
159 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
160 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
161 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
162 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
163 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
164
165 #define GEN8_CTX_STATUS_COMPLETED_MASK \
166          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
167
168 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
169
170 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
171 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
172 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
173 #define GEN12_IDLE_CTX_ID               0x7FF
174 #define GEN12_CSB_CTX_VALID(csb_dw) \
175         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
176
177 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
178 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
179 #define WA_TAIL_DWORDS 2
180 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
181
182 struct virtual_engine {
183         struct intel_engine_cs base;
184         struct intel_context context;
185
186         /*
187          * We allow only a single request through the virtual engine at a time
188          * (each request in the timeline waits for the completion fence of
189          * the previous before being submitted). By restricting ourselves to
190          * only submitting a single request, each request is placed on to a
191          * physical to maximise load spreading (by virtue of the late greedy
192          * scheduling -- each real engine takes the next available request
193          * upon idling).
194          */
195         struct i915_request *request;
196
197         /*
198          * We keep a rbtree of available virtual engines inside each physical
199          * engine, sorted by priority. Here we preallocate the nodes we need
200          * for the virtual engine, indexed by physical_engine->id.
201          */
202         struct ve_node {
203                 struct rb_node rb;
204                 int prio;
205         } nodes[I915_NUM_ENGINES];
206
207         /*
208          * Keep track of bonded pairs -- restrictions upon on our selection
209          * of physical engines any particular request may be submitted to.
210          * If we receive a submit-fence from a master engine, we will only
211          * use one of sibling_mask physical engines.
212          */
213         struct ve_bond {
214                 const struct intel_engine_cs *master;
215                 intel_engine_mask_t sibling_mask;
216         } *bonds;
217         unsigned int num_bonds;
218
219         /* And finally, which physical engines this virtual engine maps onto. */
220         unsigned int num_siblings;
221         struct intel_engine_cs *siblings[0];
222 };
223
224 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
225 {
226         GEM_BUG_ON(!intel_engine_is_virtual(engine));
227         return container_of(engine, struct virtual_engine, base);
228 }
229
230 static int __execlists_context_alloc(struct intel_context *ce,
231                                      struct intel_engine_cs *engine);
232
233 static void execlists_init_reg_state(u32 *reg_state,
234                                      const struct intel_context *ce,
235                                      const struct intel_engine_cs *engine,
236                                      const struct intel_ring *ring,
237                                      bool close);
238 static void
239 __execlists_update_reg_state(const struct intel_context *ce,
240                              const struct intel_engine_cs *engine);
241
242 static void mark_eio(struct i915_request *rq)
243 {
244         if (i915_request_completed(rq))
245                 return;
246
247         GEM_BUG_ON(i915_request_signaled(rq));
248
249         dma_fence_set_error(&rq->fence, -EIO);
250         i915_request_mark_complete(rq);
251 }
252
253 static struct i915_request *
254 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
255 {
256         struct i915_request *active = rq;
257
258         rcu_read_lock();
259         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
260                 if (i915_request_completed(rq))
261                         break;
262
263                 active = rq;
264         }
265         rcu_read_unlock();
266
267         return active;
268 }
269
270 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
271 {
272         return (i915_ggtt_offset(engine->status_page.vma) +
273                 I915_GEM_HWS_PREEMPT_ADDR);
274 }
275
276 static inline void
277 ring_set_paused(const struct intel_engine_cs *engine, int state)
278 {
279         /*
280          * We inspect HWS_PREEMPT with a semaphore inside
281          * engine->emit_fini_breadcrumb. If the dword is true,
282          * the ring is paused as the semaphore will busywait
283          * until the dword is false.
284          */
285         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
286         if (state)
287                 wmb();
288 }
289
290 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
291 {
292         return rb_entry(rb, struct i915_priolist, node);
293 }
294
295 static inline int rq_prio(const struct i915_request *rq)
296 {
297         return rq->sched.attr.priority;
298 }
299
300 static int effective_prio(const struct i915_request *rq)
301 {
302         int prio = rq_prio(rq);
303
304         /*
305          * If this request is special and must not be interrupted at any
306          * cost, so be it. Note we are only checking the most recent request
307          * in the context and so may be masking an earlier vip request. It
308          * is hoped that under the conditions where nopreempt is used, this
309          * will not matter (i.e. all requests to that context will be
310          * nopreempt for as long as desired).
311          */
312         if (i915_request_has_nopreempt(rq))
313                 prio = I915_PRIORITY_UNPREEMPTABLE;
314
315         /*
316          * On unwinding the active request, we give it a priority bump
317          * if it has completed waiting on any semaphore. If we know that
318          * the request has already started, we can prevent an unwanted
319          * preempt-to-idle cycle by taking that into account now.
320          */
321         if (__i915_request_has_started(rq))
322                 prio |= I915_PRIORITY_NOSEMAPHORE;
323
324         /* Restrict mere WAIT boosts from triggering preemption */
325         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
326         return prio | __NO_PREEMPTION;
327 }
328
329 static int queue_prio(const struct intel_engine_execlists *execlists)
330 {
331         struct i915_priolist *p;
332         struct rb_node *rb;
333
334         rb = rb_first_cached(&execlists->queue);
335         if (!rb)
336                 return INT_MIN;
337
338         /*
339          * As the priolist[] are inverted, with the highest priority in [0],
340          * we have to flip the index value to become priority.
341          */
342         p = to_priolist(rb);
343         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
344 }
345
346 static inline bool need_preempt(const struct intel_engine_cs *engine,
347                                 const struct i915_request *rq,
348                                 struct rb_node *rb)
349 {
350         int last_prio;
351
352         if (!intel_engine_has_semaphores(engine))
353                 return false;
354
355         /*
356          * Check if the current priority hint merits a preemption attempt.
357          *
358          * We record the highest value priority we saw during rescheduling
359          * prior to this dequeue, therefore we know that if it is strictly
360          * less than the current tail of ESLP[0], we do not need to force
361          * a preempt-to-idle cycle.
362          *
363          * However, the priority hint is a mere hint that we may need to
364          * preempt. If that hint is stale or we may be trying to preempt
365          * ourselves, ignore the request.
366          *
367          * More naturally we would write
368          *      prio >= max(0, last);
369          * except that we wish to prevent triggering preemption at the same
370          * priority level: the task that is running should remain running
371          * to preserve FIFO ordering of dependencies.
372          */
373         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
374         if (engine->execlists.queue_priority_hint <= last_prio)
375                 return false;
376
377         /*
378          * Check against the first request in ELSP[1], it will, thanks to the
379          * power of PI, be the highest priority of that context.
380          */
381         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
382             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
383                 return true;
384
385         if (rb) {
386                 struct virtual_engine *ve =
387                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
388                 bool preempt = false;
389
390                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
391                         struct i915_request *next;
392
393                         rcu_read_lock();
394                         next = READ_ONCE(ve->request);
395                         if (next)
396                                 preempt = rq_prio(next) > last_prio;
397                         rcu_read_unlock();
398                 }
399
400                 if (preempt)
401                         return preempt;
402         }
403
404         /*
405          * If the inflight context did not trigger the preemption, then maybe
406          * it was the set of queued requests? Pick the highest priority in
407          * the queue (the first active priolist) and see if it deserves to be
408          * running instead of ELSP[0].
409          *
410          * The highest priority request in the queue can not be either
411          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
412          * context, it's priority would not exceed ELSP[0] aka last_prio.
413          */
414         return queue_prio(&engine->execlists) > last_prio;
415 }
416
417 __maybe_unused static inline bool
418 assert_priority_queue(const struct i915_request *prev,
419                       const struct i915_request *next)
420 {
421         /*
422          * Without preemption, the prev may refer to the still active element
423          * which we refuse to let go.
424          *
425          * Even with preemption, there are times when we think it is better not
426          * to preempt and leave an ostensibly lower priority request in flight.
427          */
428         if (i915_request_is_active(prev))
429                 return true;
430
431         return rq_prio(prev) >= rq_prio(next);
432 }
433
434 /*
435  * The context descriptor encodes various attributes of a context,
436  * including its GTT address and some flags. Because it's fairly
437  * expensive to calculate, we'll just do it once and cache the result,
438  * which remains valid until the context is unpinned.
439  *
440  * This is what a descriptor looks like, from LSB to MSB::
441  *
442  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
443  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
444  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
445  *      bits 53-54:    mbz, reserved for use by hardware
446  *      bits 55-63:    group ID, currently unused and set to 0
447  *
448  * Starting from Gen11, the upper dword of the descriptor has a new format:
449  *
450  *      bits 32-36:    reserved
451  *      bits 37-47:    SW context ID
452  *      bits 48:53:    engine instance
453  *      bit 54:        mbz, reserved for use by hardware
454  *      bits 55-60:    SW counter
455  *      bits 61-63:    engine class
456  *
457  * engine info, SW context ID and SW counter need to form a unique number
458  * (Context ID) per lrc.
459  */
460 static u64
461 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
462 {
463         u64 desc;
464
465         desc = INTEL_LEGACY_32B_CONTEXT;
466         if (i915_vm_is_4lvl(ce->vm))
467                 desc = INTEL_LEGACY_64B_CONTEXT;
468         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
469
470         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
471         if (IS_GEN(engine->i915, 8))
472                 desc |= GEN8_CTX_L3LLC_COHERENT;
473
474         desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
475         /*
476          * The following 32bits are copied into the OA reports (dword 2).
477          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
478          * anything below.
479          */
480         if (INTEL_GEN(engine->i915) >= 11) {
481                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
482                                                                 /* bits 48-53 */
483
484                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
485                                                                 /* bits 61-63 */
486         }
487
488         return desc;
489 }
490
491 static inline unsigned int dword_in_page(void *addr)
492 {
493         return offset_in_page(addr) / sizeof(u32);
494 }
495
496 static void set_offsets(u32 *regs,
497                         const u8 *data,
498                         const struct intel_engine_cs *engine,
499                         bool clear)
500 #define NOP(x) (BIT(7) | (x))
501 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
502 #define POSTED BIT(0)
503 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
504 #define REG16(x) \
505         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
506         (((x) >> 2) & 0x7f)
507 #define END(x) 0, (x)
508 {
509         const u32 base = engine->mmio_base;
510
511         while (*data) {
512                 u8 count, flags;
513
514                 if (*data & BIT(7)) { /* skip */
515                         count = *data++ & ~BIT(7);
516                         if (clear)
517                                 memset32(regs, MI_NOOP, count);
518                         regs += count;
519                         continue;
520                 }
521
522                 count = *data & 0x3f;
523                 flags = *data >> 6;
524                 data++;
525
526                 *regs = MI_LOAD_REGISTER_IMM(count);
527                 if (flags & POSTED)
528                         *regs |= MI_LRI_FORCE_POSTED;
529                 if (INTEL_GEN(engine->i915) >= 11)
530                         *regs |= MI_LRI_CS_MMIO;
531                 regs++;
532
533                 GEM_BUG_ON(!count);
534                 do {
535                         u32 offset = 0;
536                         u8 v;
537
538                         do {
539                                 v = *data++;
540                                 offset <<= 7;
541                                 offset |= v & ~BIT(7);
542                         } while (v & BIT(7));
543
544                         regs[0] = base + (offset << 2);
545                         if (clear)
546                                 regs[1] = 0;
547                         regs += 2;
548                 } while (--count);
549         }
550
551         if (clear) {
552                 u8 count = *++data;
553
554                 /* Clear past the tail for HW access */
555                 GEM_BUG_ON(dword_in_page(regs) > count);
556                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
557
558                 /* Close the batch; used mainly by live_lrc_layout() */
559                 *regs = MI_BATCH_BUFFER_END;
560                 if (INTEL_GEN(engine->i915) >= 10)
561                         *regs |= BIT(0);
562         }
563 }
564
565 static const u8 gen8_xcs_offsets[] = {
566         NOP(1),
567         LRI(11, 0),
568         REG16(0x244),
569         REG(0x034),
570         REG(0x030),
571         REG(0x038),
572         REG(0x03c),
573         REG(0x168),
574         REG(0x140),
575         REG(0x110),
576         REG(0x11c),
577         REG(0x114),
578         REG(0x118),
579
580         NOP(9),
581         LRI(9, 0),
582         REG16(0x3a8),
583         REG16(0x28c),
584         REG16(0x288),
585         REG16(0x284),
586         REG16(0x280),
587         REG16(0x27c),
588         REG16(0x278),
589         REG16(0x274),
590         REG16(0x270),
591
592         NOP(13),
593         LRI(2, 0),
594         REG16(0x200),
595         REG(0x028),
596
597         END(80)
598 };
599
600 static const u8 gen9_xcs_offsets[] = {
601         NOP(1),
602         LRI(14, POSTED),
603         REG16(0x244),
604         REG(0x034),
605         REG(0x030),
606         REG(0x038),
607         REG(0x03c),
608         REG(0x168),
609         REG(0x140),
610         REG(0x110),
611         REG(0x11c),
612         REG(0x114),
613         REG(0x118),
614         REG(0x1c0),
615         REG(0x1c4),
616         REG(0x1c8),
617
618         NOP(3),
619         LRI(9, POSTED),
620         REG16(0x3a8),
621         REG16(0x28c),
622         REG16(0x288),
623         REG16(0x284),
624         REG16(0x280),
625         REG16(0x27c),
626         REG16(0x278),
627         REG16(0x274),
628         REG16(0x270),
629
630         NOP(13),
631         LRI(1, POSTED),
632         REG16(0x200),
633
634         NOP(13),
635         LRI(44, POSTED),
636         REG(0x028),
637         REG(0x09c),
638         REG(0x0c0),
639         REG(0x178),
640         REG(0x17c),
641         REG16(0x358),
642         REG(0x170),
643         REG(0x150),
644         REG(0x154),
645         REG(0x158),
646         REG16(0x41c),
647         REG16(0x600),
648         REG16(0x604),
649         REG16(0x608),
650         REG16(0x60c),
651         REG16(0x610),
652         REG16(0x614),
653         REG16(0x618),
654         REG16(0x61c),
655         REG16(0x620),
656         REG16(0x624),
657         REG16(0x628),
658         REG16(0x62c),
659         REG16(0x630),
660         REG16(0x634),
661         REG16(0x638),
662         REG16(0x63c),
663         REG16(0x640),
664         REG16(0x644),
665         REG16(0x648),
666         REG16(0x64c),
667         REG16(0x650),
668         REG16(0x654),
669         REG16(0x658),
670         REG16(0x65c),
671         REG16(0x660),
672         REG16(0x664),
673         REG16(0x668),
674         REG16(0x66c),
675         REG16(0x670),
676         REG16(0x674),
677         REG16(0x678),
678         REG16(0x67c),
679         REG(0x068),
680
681         END(176)
682 };
683
684 static const u8 gen12_xcs_offsets[] = {
685         NOP(1),
686         LRI(13, POSTED),
687         REG16(0x244),
688         REG(0x034),
689         REG(0x030),
690         REG(0x038),
691         REG(0x03c),
692         REG(0x168),
693         REG(0x140),
694         REG(0x110),
695         REG(0x1c0),
696         REG(0x1c4),
697         REG(0x1c8),
698         REG(0x180),
699         REG16(0x2b4),
700
701         NOP(5),
702         LRI(9, POSTED),
703         REG16(0x3a8),
704         REG16(0x28c),
705         REG16(0x288),
706         REG16(0x284),
707         REG16(0x280),
708         REG16(0x27c),
709         REG16(0x278),
710         REG16(0x274),
711         REG16(0x270),
712
713         END(80)
714 };
715
716 static const u8 gen8_rcs_offsets[] = {
717         NOP(1),
718         LRI(14, POSTED),
719         REG16(0x244),
720         REG(0x034),
721         REG(0x030),
722         REG(0x038),
723         REG(0x03c),
724         REG(0x168),
725         REG(0x140),
726         REG(0x110),
727         REG(0x11c),
728         REG(0x114),
729         REG(0x118),
730         REG(0x1c0),
731         REG(0x1c4),
732         REG(0x1c8),
733
734         NOP(3),
735         LRI(9, POSTED),
736         REG16(0x3a8),
737         REG16(0x28c),
738         REG16(0x288),
739         REG16(0x284),
740         REG16(0x280),
741         REG16(0x27c),
742         REG16(0x278),
743         REG16(0x274),
744         REG16(0x270),
745
746         NOP(13),
747         LRI(1, 0),
748         REG(0x0c8),
749
750         END(80)
751 };
752
753 static const u8 gen9_rcs_offsets[] = {
754         NOP(1),
755         LRI(14, POSTED),
756         REG16(0x244),
757         REG(0x34),
758         REG(0x30),
759         REG(0x38),
760         REG(0x3c),
761         REG(0x168),
762         REG(0x140),
763         REG(0x110),
764         REG(0x11c),
765         REG(0x114),
766         REG(0x118),
767         REG(0x1c0),
768         REG(0x1c4),
769         REG(0x1c8),
770
771         NOP(3),
772         LRI(9, POSTED),
773         REG16(0x3a8),
774         REG16(0x28c),
775         REG16(0x288),
776         REG16(0x284),
777         REG16(0x280),
778         REG16(0x27c),
779         REG16(0x278),
780         REG16(0x274),
781         REG16(0x270),
782
783         NOP(13),
784         LRI(1, 0),
785         REG(0xc8),
786
787         NOP(13),
788         LRI(44, POSTED),
789         REG(0x28),
790         REG(0x9c),
791         REG(0xc0),
792         REG(0x178),
793         REG(0x17c),
794         REG16(0x358),
795         REG(0x170),
796         REG(0x150),
797         REG(0x154),
798         REG(0x158),
799         REG16(0x41c),
800         REG16(0x600),
801         REG16(0x604),
802         REG16(0x608),
803         REG16(0x60c),
804         REG16(0x610),
805         REG16(0x614),
806         REG16(0x618),
807         REG16(0x61c),
808         REG16(0x620),
809         REG16(0x624),
810         REG16(0x628),
811         REG16(0x62c),
812         REG16(0x630),
813         REG16(0x634),
814         REG16(0x638),
815         REG16(0x63c),
816         REG16(0x640),
817         REG16(0x644),
818         REG16(0x648),
819         REG16(0x64c),
820         REG16(0x650),
821         REG16(0x654),
822         REG16(0x658),
823         REG16(0x65c),
824         REG16(0x660),
825         REG16(0x664),
826         REG16(0x668),
827         REG16(0x66c),
828         REG16(0x670),
829         REG16(0x674),
830         REG16(0x678),
831         REG16(0x67c),
832         REG(0x68),
833
834         END(176)
835 };
836
837 static const u8 gen11_rcs_offsets[] = {
838         NOP(1),
839         LRI(15, POSTED),
840         REG16(0x244),
841         REG(0x034),
842         REG(0x030),
843         REG(0x038),
844         REG(0x03c),
845         REG(0x168),
846         REG(0x140),
847         REG(0x110),
848         REG(0x11c),
849         REG(0x114),
850         REG(0x118),
851         REG(0x1c0),
852         REG(0x1c4),
853         REG(0x1c8),
854         REG(0x180),
855
856         NOP(1),
857         LRI(9, POSTED),
858         REG16(0x3a8),
859         REG16(0x28c),
860         REG16(0x288),
861         REG16(0x284),
862         REG16(0x280),
863         REG16(0x27c),
864         REG16(0x278),
865         REG16(0x274),
866         REG16(0x270),
867
868         LRI(1, POSTED),
869         REG(0x1b0),
870
871         NOP(10),
872         LRI(1, 0),
873         REG(0x0c8),
874
875         END(80)
876 };
877
878 static const u8 gen12_rcs_offsets[] = {
879         NOP(1),
880         LRI(13, POSTED),
881         REG16(0x244),
882         REG(0x034),
883         REG(0x030),
884         REG(0x038),
885         REG(0x03c),
886         REG(0x168),
887         REG(0x140),
888         REG(0x110),
889         REG(0x1c0),
890         REG(0x1c4),
891         REG(0x1c8),
892         REG(0x180),
893         REG16(0x2b4),
894
895         NOP(5),
896         LRI(9, POSTED),
897         REG16(0x3a8),
898         REG16(0x28c),
899         REG16(0x288),
900         REG16(0x284),
901         REG16(0x280),
902         REG16(0x27c),
903         REG16(0x278),
904         REG16(0x274),
905         REG16(0x270),
906
907         LRI(3, POSTED),
908         REG(0x1b0),
909         REG16(0x5a8),
910         REG16(0x5ac),
911
912         NOP(6),
913         LRI(1, 0),
914         REG(0x0c8),
915
916         END(80)
917 };
918
919 #undef END
920 #undef REG16
921 #undef REG
922 #undef LRI
923 #undef NOP
924
925 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
926 {
927         /*
928          * The gen12+ lists only have the registers we program in the basic
929          * default state. We rely on the context image using relative
930          * addressing to automatic fixup the register state between the
931          * physical engines for virtual engine.
932          */
933         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
934                    !intel_engine_has_relative_mmio(engine));
935
936         if (engine->class == RENDER_CLASS) {
937                 if (INTEL_GEN(engine->i915) >= 12)
938                         return gen12_rcs_offsets;
939                 else if (INTEL_GEN(engine->i915) >= 11)
940                         return gen11_rcs_offsets;
941                 else if (INTEL_GEN(engine->i915) >= 9)
942                         return gen9_rcs_offsets;
943                 else
944                         return gen8_rcs_offsets;
945         } else {
946                 if (INTEL_GEN(engine->i915) >= 12)
947                         return gen12_xcs_offsets;
948                 else if (INTEL_GEN(engine->i915) >= 9)
949                         return gen9_xcs_offsets;
950                 else
951                         return gen8_xcs_offsets;
952         }
953 }
954
955 static struct i915_request *
956 __unwind_incomplete_requests(struct intel_engine_cs *engine)
957 {
958         struct i915_request *rq, *rn, *active = NULL;
959         struct list_head *uninitialized_var(pl);
960         int prio = I915_PRIORITY_INVALID;
961
962         lockdep_assert_held(&engine->active.lock);
963
964         list_for_each_entry_safe_reverse(rq, rn,
965                                          &engine->active.requests,
966                                          sched.link) {
967                 if (i915_request_completed(rq))
968                         continue; /* XXX */
969
970                 __i915_request_unsubmit(rq);
971
972                 /*
973                  * Push the request back into the queue for later resubmission.
974                  * If this request is not native to this physical engine (i.e.
975                  * it came from a virtual source), push it back onto the virtual
976                  * engine so that it can be moved across onto another physical
977                  * engine as load dictates.
978                  */
979                 if (likely(rq->execution_mask == engine->mask)) {
980                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
981                         if (rq_prio(rq) != prio) {
982                                 prio = rq_prio(rq);
983                                 pl = i915_sched_lookup_priolist(engine, prio);
984                         }
985                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
986
987                         list_move(&rq->sched.link, pl);
988                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
989
990                         active = rq;
991                 } else {
992                         struct intel_engine_cs *owner = rq->context->engine;
993
994                         /*
995                          * Decouple the virtual breadcrumb before moving it
996                          * back to the virtual engine -- we don't want the
997                          * request to complete in the background and try
998                          * and cancel the breadcrumb on the virtual engine
999                          * (instead of the old engine where it is linked)!
1000                          */
1001                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1002                                      &rq->fence.flags)) {
1003                                 spin_lock_nested(&rq->lock,
1004                                                  SINGLE_DEPTH_NESTING);
1005                                 i915_request_cancel_breadcrumb(rq);
1006                                 spin_unlock(&rq->lock);
1007                         }
1008                         rq->engine = owner;
1009                         owner->submit_request(rq);
1010                         active = NULL;
1011                 }
1012         }
1013
1014         return active;
1015 }
1016
1017 struct i915_request *
1018 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1019 {
1020         struct intel_engine_cs *engine =
1021                 container_of(execlists, typeof(*engine), execlists);
1022
1023         return __unwind_incomplete_requests(engine);
1024 }
1025
1026 static inline void
1027 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1028 {
1029         /*
1030          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1031          * The compiler should eliminate this function as dead-code.
1032          */
1033         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1034                 return;
1035
1036         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1037                                    status, rq);
1038 }
1039
1040 static void intel_engine_context_in(struct intel_engine_cs *engine)
1041 {
1042         unsigned long flags;
1043
1044         if (READ_ONCE(engine->stats.enabled) == 0)
1045                 return;
1046
1047         write_seqlock_irqsave(&engine->stats.lock, flags);
1048
1049         if (engine->stats.enabled > 0) {
1050                 if (engine->stats.active++ == 0)
1051                         engine->stats.start = ktime_get();
1052                 GEM_BUG_ON(engine->stats.active == 0);
1053         }
1054
1055         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1056 }
1057
1058 static void intel_engine_context_out(struct intel_engine_cs *engine)
1059 {
1060         unsigned long flags;
1061
1062         if (READ_ONCE(engine->stats.enabled) == 0)
1063                 return;
1064
1065         write_seqlock_irqsave(&engine->stats.lock, flags);
1066
1067         if (engine->stats.enabled > 0) {
1068                 ktime_t last;
1069
1070                 if (engine->stats.active && --engine->stats.active == 0) {
1071                         /*
1072                          * Decrement the active context count and in case GPU
1073                          * is now idle add up to the running total.
1074                          */
1075                         last = ktime_sub(ktime_get(), engine->stats.start);
1076
1077                         engine->stats.total = ktime_add(engine->stats.total,
1078                                                         last);
1079                 } else if (engine->stats.active == 0) {
1080                         /*
1081                          * After turning on engine stats, context out might be
1082                          * the first event in which case we account from the
1083                          * time stats gathering was turned on.
1084                          */
1085                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1086
1087                         engine->stats.total = ktime_add(engine->stats.total,
1088                                                         last);
1089                 }
1090         }
1091
1092         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1093 }
1094
1095 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1096 {
1097         if (INTEL_GEN(engine->i915) >= 12)
1098                 return 0x60;
1099         else if (INTEL_GEN(engine->i915) >= 9)
1100                 return 0x54;
1101         else if (engine->class == RENDER_CLASS)
1102                 return 0x58;
1103         else
1104                 return -1;
1105 }
1106
1107 static void
1108 execlists_check_context(const struct intel_context *ce,
1109                         const struct intel_engine_cs *engine)
1110 {
1111         const struct intel_ring *ring = ce->ring;
1112         u32 *regs = ce->lrc_reg_state;
1113         bool valid = true;
1114         int x;
1115
1116         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1117                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1118                        engine->name,
1119                        regs[CTX_RING_START],
1120                        i915_ggtt_offset(ring->vma));
1121                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1122                 valid = false;
1123         }
1124
1125         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1126             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1127                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1128                        engine->name,
1129                        regs[CTX_RING_CTL],
1130                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1131                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1132                 valid = false;
1133         }
1134
1135         x = lrc_ring_mi_mode(engine);
1136         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1137                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1138                        engine->name, regs[x + 1]);
1139                 regs[x + 1] &= ~STOP_RING;
1140                 regs[x + 1] |= STOP_RING << 16;
1141                 valid = false;
1142         }
1143
1144         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1145 }
1146
1147 static void restore_default_state(struct intel_context *ce,
1148                                   struct intel_engine_cs *engine)
1149 {
1150         u32 *regs = ce->lrc_reg_state;
1151
1152         if (engine->pinned_default_state)
1153                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
1154                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1155                        engine->context_size - PAGE_SIZE);
1156
1157         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1158 }
1159
1160 static void reset_active(struct i915_request *rq,
1161                          struct intel_engine_cs *engine)
1162 {
1163         struct intel_context * const ce = rq->context;
1164         u32 head;
1165
1166         /*
1167          * The executing context has been cancelled. We want to prevent
1168          * further execution along this context and propagate the error on
1169          * to anything depending on its results.
1170          *
1171          * In __i915_request_submit(), we apply the -EIO and remove the
1172          * requests' payloads for any banned requests. But first, we must
1173          * rewind the context back to the start of the incomplete request so
1174          * that we do not jump back into the middle of the batch.
1175          *
1176          * We preserve the breadcrumbs and semaphores of the incomplete
1177          * requests so that inter-timeline dependencies (i.e other timelines)
1178          * remain correctly ordered. And we defer to __i915_request_submit()
1179          * so that all asynchronous waits are correctly handled.
1180          */
1181         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1182                      rq->fence.context, rq->fence.seqno);
1183
1184         /* On resubmission of the active request, payload will be scrubbed */
1185         if (i915_request_completed(rq))
1186                 head = rq->tail;
1187         else
1188                 head = active_request(ce->timeline, rq)->head;
1189         ce->ring->head = intel_ring_wrap(ce->ring, head);
1190         intel_ring_update_space(ce->ring);
1191
1192         /* Scrub the context image to prevent replaying the previous batch */
1193         restore_default_state(ce, engine);
1194         __execlists_update_reg_state(ce, engine);
1195
1196         /* We've switched away, so this should be a no-op, but intent matters */
1197         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1198 }
1199
1200 static inline struct intel_engine_cs *
1201 __execlists_schedule_in(struct i915_request *rq)
1202 {
1203         struct intel_engine_cs * const engine = rq->engine;
1204         struct intel_context * const ce = rq->context;
1205
1206         intel_context_get(ce);
1207
1208         if (unlikely(intel_context_is_banned(ce)))
1209                 reset_active(rq, engine);
1210
1211         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1212                 execlists_check_context(ce, engine);
1213
1214         if (ce->tag) {
1215                 /* Use a fixed tag for OA and friends */
1216                 ce->lrc_desc |= (u64)ce->tag << 32;
1217         } else {
1218                 /* We don't need a strict matching tag, just different values */
1219                 ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1220                 ce->lrc_desc |=
1221                         (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1222                         GEN11_SW_CTX_ID_SHIFT;
1223                 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1224         }
1225
1226         __intel_gt_pm_get(engine->gt);
1227         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1228         intel_engine_context_in(engine);
1229
1230         return engine;
1231 }
1232
1233 static inline struct i915_request *
1234 execlists_schedule_in(struct i915_request *rq, int idx)
1235 {
1236         struct intel_context * const ce = rq->context;
1237         struct intel_engine_cs *old;
1238
1239         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1240         trace_i915_request_in(rq, idx);
1241
1242         old = READ_ONCE(ce->inflight);
1243         do {
1244                 if (!old) {
1245                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1246                         break;
1247                 }
1248         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1249
1250         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1251         return i915_request_get(rq);
1252 }
1253
1254 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1255 {
1256         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1257         struct i915_request *next = READ_ONCE(ve->request);
1258
1259         if (next && next->execution_mask & ~rq->execution_mask)
1260                 tasklet_schedule(&ve->base.execlists.tasklet);
1261 }
1262
1263 static inline void
1264 __execlists_schedule_out(struct i915_request *rq,
1265                          struct intel_engine_cs * const engine)
1266 {
1267         struct intel_context * const ce = rq->context;
1268
1269         /*
1270          * NB process_csb() is not under the engine->active.lock and hence
1271          * schedule_out can race with schedule_in meaning that we should
1272          * refrain from doing non-trivial work here.
1273          */
1274
1275         /*
1276          * If we have just completed this context, the engine may now be
1277          * idle and we want to re-enter powersaving.
1278          */
1279         if (list_is_last(&rq->link, &ce->timeline->requests) &&
1280             i915_request_completed(rq))
1281                 intel_engine_add_retire(engine, ce->timeline);
1282
1283         intel_engine_context_out(engine);
1284         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1285         intel_gt_pm_put_async(engine->gt);
1286
1287         /*
1288          * If this is part of a virtual engine, its next request may
1289          * have been blocked waiting for access to the active context.
1290          * We have to kick all the siblings again in case we need to
1291          * switch (e.g. the next request is not runnable on this
1292          * engine). Hopefully, we will already have submitted the next
1293          * request before the tasklet runs and do not need to rebuild
1294          * each virtual tree and kick everyone again.
1295          */
1296         if (ce->engine != engine)
1297                 kick_siblings(rq, ce);
1298
1299         intel_context_put(ce);
1300 }
1301
1302 static inline void
1303 execlists_schedule_out(struct i915_request *rq)
1304 {
1305         struct intel_context * const ce = rq->context;
1306         struct intel_engine_cs *cur, *old;
1307
1308         trace_i915_request_out(rq);
1309
1310         old = READ_ONCE(ce->inflight);
1311         do
1312                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1313         while (!try_cmpxchg(&ce->inflight, &old, cur));
1314         if (!cur)
1315                 __execlists_schedule_out(rq, old);
1316
1317         i915_request_put(rq);
1318 }
1319
1320 static u64 execlists_update_context(struct i915_request *rq)
1321 {
1322         struct intel_context *ce = rq->context;
1323         u64 desc = ce->lrc_desc;
1324         u32 tail;
1325
1326         /*
1327          * WaIdleLiteRestore:bdw,skl
1328          *
1329          * We should never submit the context with the same RING_TAIL twice
1330          * just in case we submit an empty ring, which confuses the HW.
1331          *
1332          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1333          * the normal request to be able to always advance the RING_TAIL on
1334          * subsequent resubmissions (for lite restore). Should that fail us,
1335          * and we try and submit the same tail again, force the context
1336          * reload.
1337          */
1338         tail = intel_ring_set_tail(rq->ring, rq->tail);
1339         if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
1340                 desc |= CTX_DESC_FORCE_RESTORE;
1341         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1342         rq->tail = rq->wa_tail;
1343
1344         /*
1345          * Make sure the context image is complete before we submit it to HW.
1346          *
1347          * Ostensibly, writes (including the WCB) should be flushed prior to
1348          * an uncached write such as our mmio register access, the empirical
1349          * evidence (esp. on Braswell) suggests that the WC write into memory
1350          * may not be visible to the HW prior to the completion of the UC
1351          * register write and that we may begin execution from the context
1352          * before its image is complete leading to invalid PD chasing.
1353          */
1354         wmb();
1355
1356         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1357         return desc;
1358 }
1359
1360 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1361 {
1362         if (execlists->ctrl_reg) {
1363                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1364                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1365         } else {
1366                 writel(upper_32_bits(desc), execlists->submit_reg);
1367                 writel(lower_32_bits(desc), execlists->submit_reg);
1368         }
1369 }
1370
1371 static __maybe_unused void
1372 trace_ports(const struct intel_engine_execlists *execlists,
1373             const char *msg,
1374             struct i915_request * const *ports)
1375 {
1376         const struct intel_engine_cs *engine =
1377                 container_of(execlists, typeof(*engine), execlists);
1378
1379         if (!ports[0])
1380                 return;
1381
1382         ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1383                      ports[0]->fence.context,
1384                      ports[0]->fence.seqno,
1385                      i915_request_completed(ports[0]) ? "!" :
1386                      i915_request_started(ports[0]) ? "*" :
1387                      "",
1388                      ports[1] ? ports[1]->fence.context : 0,
1389                      ports[1] ? ports[1]->fence.seqno : 0);
1390 }
1391
1392 static __maybe_unused bool
1393 assert_pending_valid(const struct intel_engine_execlists *execlists,
1394                      const char *msg)
1395 {
1396         struct i915_request * const *port, *rq;
1397         struct intel_context *ce = NULL;
1398
1399         trace_ports(execlists, msg, execlists->pending);
1400
1401         if (!execlists->pending[0]) {
1402                 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1403                 return false;
1404         }
1405
1406         if (execlists->pending[execlists_num_ports(execlists)]) {
1407                 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1408                               execlists_num_ports(execlists));
1409                 return false;
1410         }
1411
1412         for (port = execlists->pending; (rq = *port); port++) {
1413                 unsigned long flags;
1414                 bool ok = true;
1415
1416                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1417                 GEM_BUG_ON(!i915_request_is_active(rq));
1418
1419                 if (ce == rq->context) {
1420                         GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1421                                       ce->timeline->fence_context,
1422                                       port - execlists->pending);
1423                         return false;
1424                 }
1425                 ce = rq->context;
1426
1427                 /* Hold tightly onto the lock to prevent concurrent retires! */
1428                 if (!spin_trylock_irqsave(&rq->lock, flags))
1429                         continue;
1430
1431                 if (i915_request_completed(rq))
1432                         goto unlock;
1433
1434                 if (i915_active_is_idle(&ce->active) &&
1435                     !intel_context_is_barrier(ce)) {
1436                         GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1437                                       ce->timeline->fence_context,
1438                                       port - execlists->pending);
1439                         ok = false;
1440                         goto unlock;
1441                 }
1442
1443                 if (!i915_vma_is_pinned(ce->state)) {
1444                         GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1445                                       ce->timeline->fence_context,
1446                                       port - execlists->pending);
1447                         ok = false;
1448                         goto unlock;
1449                 }
1450
1451                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1452                         GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1453                                       ce->timeline->fence_context,
1454                                       port - execlists->pending);
1455                         ok = false;
1456                         goto unlock;
1457                 }
1458
1459 unlock:
1460                 spin_unlock_irqrestore(&rq->lock, flags);
1461                 if (!ok)
1462                         return false;
1463         }
1464
1465         return ce;
1466 }
1467
1468 static void execlists_submit_ports(struct intel_engine_cs *engine)
1469 {
1470         struct intel_engine_execlists *execlists = &engine->execlists;
1471         unsigned int n;
1472
1473         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1474
1475         /*
1476          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1477          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1478          * not be relinquished until the device is idle (see
1479          * i915_gem_idle_work_handler()). As a precaution, we make sure
1480          * that all ELSP are drained i.e. we have processed the CSB,
1481          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1482          */
1483         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1484
1485         /*
1486          * ELSQ note: the submit queue is not cleared after being submitted
1487          * to the HW so we need to make sure we always clean it up. This is
1488          * currently ensured by the fact that we always write the same number
1489          * of elsq entries, keep this in mind before changing the loop below.
1490          */
1491         for (n = execlists_num_ports(execlists); n--; ) {
1492                 struct i915_request *rq = execlists->pending[n];
1493
1494                 write_desc(execlists,
1495                            rq ? execlists_update_context(rq) : 0,
1496                            n);
1497         }
1498
1499         /* we need to manually load the submit queue */
1500         if (execlists->ctrl_reg)
1501                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1502 }
1503
1504 static bool ctx_single_port_submission(const struct intel_context *ce)
1505 {
1506         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1507                 intel_context_force_single_submission(ce));
1508 }
1509
1510 static bool can_merge_ctx(const struct intel_context *prev,
1511                           const struct intel_context *next)
1512 {
1513         if (prev != next)
1514                 return false;
1515
1516         if (ctx_single_port_submission(prev))
1517                 return false;
1518
1519         return true;
1520 }
1521
1522 static bool can_merge_rq(const struct i915_request *prev,
1523                          const struct i915_request *next)
1524 {
1525         GEM_BUG_ON(prev == next);
1526         GEM_BUG_ON(!assert_priority_queue(prev, next));
1527
1528         /*
1529          * We do not submit known completed requests. Therefore if the next
1530          * request is already completed, we can pretend to merge it in
1531          * with the previous context (and we will skip updating the ELSP
1532          * and tracking). Thus hopefully keeping the ELSP full with active
1533          * contexts, despite the best efforts of preempt-to-busy to confuse
1534          * us.
1535          */
1536         if (i915_request_completed(next))
1537                 return true;
1538
1539         if (unlikely((prev->fence.flags ^ next->fence.flags) &
1540                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1541                       BIT(I915_FENCE_FLAG_SENTINEL))))
1542                 return false;
1543
1544         if (!can_merge_ctx(prev->context, next->context))
1545                 return false;
1546
1547         return true;
1548 }
1549
1550 static void virtual_update_register_offsets(u32 *regs,
1551                                             struct intel_engine_cs *engine)
1552 {
1553         set_offsets(regs, reg_offsets(engine), engine, false);
1554 }
1555
1556 static bool virtual_matches(const struct virtual_engine *ve,
1557                             const struct i915_request *rq,
1558                             const struct intel_engine_cs *engine)
1559 {
1560         const struct intel_engine_cs *inflight;
1561
1562         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1563                 return false;
1564
1565         /*
1566          * We track when the HW has completed saving the context image
1567          * (i.e. when we have seen the final CS event switching out of
1568          * the context) and must not overwrite the context image before
1569          * then. This restricts us to only using the active engine
1570          * while the previous virtualized request is inflight (so
1571          * we reuse the register offsets). This is a very small
1572          * hystersis on the greedy seelction algorithm.
1573          */
1574         inflight = intel_context_inflight(&ve->context);
1575         if (inflight && inflight != engine)
1576                 return false;
1577
1578         return true;
1579 }
1580
1581 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1582                                      struct intel_engine_cs *engine)
1583 {
1584         struct intel_engine_cs *old = ve->siblings[0];
1585
1586         /* All unattached (rq->engine == old) must already be completed */
1587
1588         spin_lock(&old->breadcrumbs.irq_lock);
1589         if (!list_empty(&ve->context.signal_link)) {
1590                 list_move_tail(&ve->context.signal_link,
1591                                &engine->breadcrumbs.signalers);
1592                 intel_engine_signal_breadcrumbs(engine);
1593         }
1594         spin_unlock(&old->breadcrumbs.irq_lock);
1595 }
1596
1597 static struct i915_request *
1598 last_active(const struct intel_engine_execlists *execlists)
1599 {
1600         struct i915_request * const *last = READ_ONCE(execlists->active);
1601
1602         while (*last && i915_request_completed(*last))
1603                 last++;
1604
1605         return *last;
1606 }
1607
1608 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1609 {
1610         LIST_HEAD(list);
1611
1612         /*
1613          * We want to move the interrupted request to the back of
1614          * the round-robin list (i.e. its priority level), but
1615          * in doing so, we must then move all requests that were in
1616          * flight and were waiting for the interrupted request to
1617          * be run after it again.
1618          */
1619         do {
1620                 struct i915_dependency *p;
1621
1622                 GEM_BUG_ON(i915_request_is_active(rq));
1623                 list_move_tail(&rq->sched.link, pl);
1624
1625                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
1626                         struct i915_request *w =
1627                                 container_of(p->waiter, typeof(*w), sched);
1628
1629                         /* Leave semaphores spinning on the other engines */
1630                         if (w->engine != rq->engine)
1631                                 continue;
1632
1633                         /* No waiter should start before its signaler */
1634                         GEM_BUG_ON(i915_request_started(w) &&
1635                                    !i915_request_completed(rq));
1636
1637                         GEM_BUG_ON(i915_request_is_active(w));
1638                         if (!i915_request_is_ready(w))
1639                                 continue;
1640
1641                         if (rq_prio(w) < rq_prio(rq))
1642                                 continue;
1643
1644                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1645                         list_move_tail(&w->sched.link, &list);
1646                 }
1647
1648                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1649         } while (rq);
1650 }
1651
1652 static void defer_active(struct intel_engine_cs *engine)
1653 {
1654         struct i915_request *rq;
1655
1656         rq = __unwind_incomplete_requests(engine);
1657         if (!rq)
1658                 return;
1659
1660         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1661 }
1662
1663 static bool
1664 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1665 {
1666         int hint;
1667
1668         if (!intel_engine_has_timeslices(engine))
1669                 return false;
1670
1671         if (list_is_last(&rq->sched.link, &engine->active.requests))
1672                 return false;
1673
1674         hint = max(rq_prio(list_next_entry(rq, sched.link)),
1675                    engine->execlists.queue_priority_hint);
1676
1677         return hint >= effective_prio(rq);
1678 }
1679
1680 static int
1681 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1682 {
1683         if (list_is_last(&rq->sched.link, &engine->active.requests))
1684                 return INT_MIN;
1685
1686         return rq_prio(list_next_entry(rq, sched.link));
1687 }
1688
1689 static inline unsigned long
1690 timeslice(const struct intel_engine_cs *engine)
1691 {
1692         return READ_ONCE(engine->props.timeslice_duration_ms);
1693 }
1694
1695 static unsigned long
1696 active_timeslice(const struct intel_engine_cs *engine)
1697 {
1698         const struct i915_request *rq = *engine->execlists.active;
1699
1700         if (!rq || i915_request_completed(rq))
1701                 return 0;
1702
1703         if (engine->execlists.switch_priority_hint < effective_prio(rq))
1704                 return 0;
1705
1706         return timeslice(engine);
1707 }
1708
1709 static void set_timeslice(struct intel_engine_cs *engine)
1710 {
1711         if (!intel_engine_has_timeslices(engine))
1712                 return;
1713
1714         set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1715 }
1716
1717 static void record_preemption(struct intel_engine_execlists *execlists)
1718 {
1719         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1720 }
1721
1722 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
1723 {
1724         struct i915_request *rq;
1725
1726         rq = last_active(&engine->execlists);
1727         if (!rq)
1728                 return 0;
1729
1730         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1731         if (unlikely(intel_context_is_banned(rq->context)))
1732                 return 1;
1733
1734         return READ_ONCE(engine->props.preempt_timeout_ms);
1735 }
1736
1737 static void set_preempt_timeout(struct intel_engine_cs *engine)
1738 {
1739         if (!intel_engine_has_preempt_reset(engine))
1740                 return;
1741
1742         set_timer_ms(&engine->execlists.preempt,
1743                      active_preempt_timeout(engine));
1744 }
1745
1746 static inline void clear_ports(struct i915_request **ports, int count)
1747 {
1748         memset_p((void **)ports, NULL, count);
1749 }
1750
1751 static void execlists_dequeue(struct intel_engine_cs *engine)
1752 {
1753         struct intel_engine_execlists * const execlists = &engine->execlists;
1754         struct i915_request **port = execlists->pending;
1755         struct i915_request ** const last_port = port + execlists->port_mask;
1756         struct i915_request *last;
1757         struct rb_node *rb;
1758         bool submit = false;
1759
1760         /*
1761          * Hardware submission is through 2 ports. Conceptually each port
1762          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1763          * static for a context, and unique to each, so we only execute
1764          * requests belonging to a single context from each ring. RING_HEAD
1765          * is maintained by the CS in the context image, it marks the place
1766          * where it got up to last time, and through RING_TAIL we tell the CS
1767          * where we want to execute up to this time.
1768          *
1769          * In this list the requests are in order of execution. Consecutive
1770          * requests from the same context are adjacent in the ringbuffer. We
1771          * can combine these requests into a single RING_TAIL update:
1772          *
1773          *              RING_HEAD...req1...req2
1774          *                                    ^- RING_TAIL
1775          * since to execute req2 the CS must first execute req1.
1776          *
1777          * Our goal then is to point each port to the end of a consecutive
1778          * sequence of requests as being the most optimal (fewest wake ups
1779          * and context switches) submission.
1780          */
1781
1782         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1783                 struct virtual_engine *ve =
1784                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1785                 struct i915_request *rq = READ_ONCE(ve->request);
1786
1787                 if (!rq) { /* lazily cleanup after another engine handled rq */
1788                         rb_erase_cached(rb, &execlists->virtual);
1789                         RB_CLEAR_NODE(rb);
1790                         rb = rb_first_cached(&execlists->virtual);
1791                         continue;
1792                 }
1793
1794                 if (!virtual_matches(ve, rq, engine)) {
1795                         rb = rb_next(rb);
1796                         continue;
1797                 }
1798
1799                 break;
1800         }
1801
1802         /*
1803          * If the queue is higher priority than the last
1804          * request in the currently active context, submit afresh.
1805          * We will resubmit again afterwards in case we need to split
1806          * the active context to interject the preemption request,
1807          * i.e. we will retrigger preemption following the ack in case
1808          * of trouble.
1809          */
1810         last = last_active(execlists);
1811         if (last) {
1812                 if (need_preempt(engine, last, rb)) {
1813                         ENGINE_TRACE(engine,
1814                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1815                                      last->fence.context,
1816                                      last->fence.seqno,
1817                                      last->sched.attr.priority,
1818                                      execlists->queue_priority_hint);
1819                         record_preemption(execlists);
1820
1821                         /*
1822                          * Don't let the RING_HEAD advance past the breadcrumb
1823                          * as we unwind (and until we resubmit) so that we do
1824                          * not accidentally tell it to go backwards.
1825                          */
1826                         ring_set_paused(engine, 1);
1827
1828                         /*
1829                          * Note that we have not stopped the GPU at this point,
1830                          * so we are unwinding the incomplete requests as they
1831                          * remain inflight and so by the time we do complete
1832                          * the preemption, some of the unwound requests may
1833                          * complete!
1834                          */
1835                         __unwind_incomplete_requests(engine);
1836
1837                         /*
1838                          * If we need to return to the preempted context, we
1839                          * need to skip the lite-restore and force it to
1840                          * reload the RING_TAIL. Otherwise, the HW has a
1841                          * tendency to ignore us rewinding the TAIL to the
1842                          * end of an earlier request.
1843                          */
1844                         last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1845                         last = NULL;
1846                 } else if (need_timeslice(engine, last) &&
1847                            timer_expired(&engine->execlists.timer)) {
1848                         ENGINE_TRACE(engine,
1849                                      "expired last=%llx:%lld, prio=%d, hint=%d\n",
1850                                      last->fence.context,
1851                                      last->fence.seqno,
1852                                      last->sched.attr.priority,
1853                                      execlists->queue_priority_hint);
1854
1855                         ring_set_paused(engine, 1);
1856                         defer_active(engine);
1857
1858                         /*
1859                          * Unlike for preemption, if we rewind and continue
1860                          * executing the same context as previously active,
1861                          * the order of execution will remain the same and
1862                          * the tail will only advance. We do not need to
1863                          * force a full context restore, as a lite-restore
1864                          * is sufficient to resample the monotonic TAIL.
1865                          *
1866                          * If we switch to any other context, similarly we
1867                          * will not rewind TAIL of current context, and
1868                          * normal save/restore will preserve state and allow
1869                          * us to later continue executing the same request.
1870                          */
1871                         last = NULL;
1872                 } else {
1873                         /*
1874                          * Otherwise if we already have a request pending
1875                          * for execution after the current one, we can
1876                          * just wait until the next CS event before
1877                          * queuing more. In either case we will force a
1878                          * lite-restore preemption event, but if we wait
1879                          * we hopefully coalesce several updates into a single
1880                          * submission.
1881                          */
1882                         if (!list_is_last(&last->sched.link,
1883                                           &engine->active.requests)) {
1884                                 /*
1885                                  * Even if ELSP[1] is occupied and not worthy
1886                                  * of timeslices, our queue might be.
1887                                  */
1888                                 if (!execlists->timer.expires &&
1889                                     need_timeslice(engine, last))
1890                                         set_timer_ms(&execlists->timer,
1891                                                      timeslice(engine));
1892
1893                                 return;
1894                         }
1895                 }
1896         }
1897
1898         while (rb) { /* XXX virtual is always taking precedence */
1899                 struct virtual_engine *ve =
1900                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1901                 struct i915_request *rq;
1902
1903                 spin_lock(&ve->base.active.lock);
1904
1905                 rq = ve->request;
1906                 if (unlikely(!rq)) { /* lost the race to a sibling */
1907                         spin_unlock(&ve->base.active.lock);
1908                         rb_erase_cached(rb, &execlists->virtual);
1909                         RB_CLEAR_NODE(rb);
1910                         rb = rb_first_cached(&execlists->virtual);
1911                         continue;
1912                 }
1913
1914                 GEM_BUG_ON(rq != ve->request);
1915                 GEM_BUG_ON(rq->engine != &ve->base);
1916                 GEM_BUG_ON(rq->context != &ve->context);
1917
1918                 if (rq_prio(rq) >= queue_prio(execlists)) {
1919                         if (!virtual_matches(ve, rq, engine)) {
1920                                 spin_unlock(&ve->base.active.lock);
1921                                 rb = rb_next(rb);
1922                                 continue;
1923                         }
1924
1925                         if (last && !can_merge_rq(last, rq)) {
1926                                 spin_unlock(&ve->base.active.lock);
1927                                 return; /* leave this for another */
1928                         }
1929
1930                         ENGINE_TRACE(engine,
1931                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
1932                                      rq->fence.context,
1933                                      rq->fence.seqno,
1934                                      i915_request_completed(rq) ? "!" :
1935                                      i915_request_started(rq) ? "*" :
1936                                      "",
1937                                      yesno(engine != ve->siblings[0]));
1938
1939                         ve->request = NULL;
1940                         ve->base.execlists.queue_priority_hint = INT_MIN;
1941                         rb_erase_cached(rb, &execlists->virtual);
1942                         RB_CLEAR_NODE(rb);
1943
1944                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1945                         rq->engine = engine;
1946
1947                         if (engine != ve->siblings[0]) {
1948                                 u32 *regs = ve->context.lrc_reg_state;
1949                                 unsigned int n;
1950
1951                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1952
1953                                 if (!intel_engine_has_relative_mmio(engine))
1954                                         virtual_update_register_offsets(regs,
1955                                                                         engine);
1956
1957                                 if (!list_empty(&ve->context.signals))
1958                                         virtual_xfer_breadcrumbs(ve, engine);
1959
1960                                 /*
1961                                  * Move the bound engine to the top of the list
1962                                  * for future execution. We then kick this
1963                                  * tasklet first before checking others, so that
1964                                  * we preferentially reuse this set of bound
1965                                  * registers.
1966                                  */
1967                                 for (n = 1; n < ve->num_siblings; n++) {
1968                                         if (ve->siblings[n] == engine) {
1969                                                 swap(ve->siblings[n],
1970                                                      ve->siblings[0]);
1971                                                 break;
1972                                         }
1973                                 }
1974
1975                                 GEM_BUG_ON(ve->siblings[0] != engine);
1976                         }
1977
1978                         if (__i915_request_submit(rq)) {
1979                                 submit = true;
1980                                 last = rq;
1981                         }
1982                         i915_request_put(rq);
1983
1984                         /*
1985                          * Hmm, we have a bunch of virtual engine requests,
1986                          * but the first one was already completed (thanks
1987                          * preempt-to-busy!). Keep looking at the veng queue
1988                          * until we have no more relevant requests (i.e.
1989                          * the normal submit queue has higher priority).
1990                          */
1991                         if (!submit) {
1992                                 spin_unlock(&ve->base.active.lock);
1993                                 rb = rb_first_cached(&execlists->virtual);
1994                                 continue;
1995                         }
1996                 }
1997
1998                 spin_unlock(&ve->base.active.lock);
1999                 break;
2000         }
2001
2002         while ((rb = rb_first_cached(&execlists->queue))) {
2003                 struct i915_priolist *p = to_priolist(rb);
2004                 struct i915_request *rq, *rn;
2005                 int i;
2006
2007                 priolist_for_each_request_consume(rq, rn, p, i) {
2008                         bool merge = true;
2009
2010                         /*
2011                          * Can we combine this request with the current port?
2012                          * It has to be the same context/ringbuffer and not
2013                          * have any exceptions (e.g. GVT saying never to
2014                          * combine contexts).
2015                          *
2016                          * If we can combine the requests, we can execute both
2017                          * by updating the RING_TAIL to point to the end of the
2018                          * second request, and so we never need to tell the
2019                          * hardware about the first.
2020                          */
2021                         if (last && !can_merge_rq(last, rq)) {
2022                                 /*
2023                                  * If we are on the second port and cannot
2024                                  * combine this request with the last, then we
2025                                  * are done.
2026                                  */
2027                                 if (port == last_port)
2028                                         goto done;
2029
2030                                 /*
2031                                  * We must not populate both ELSP[] with the
2032                                  * same LRCA, i.e. we must submit 2 different
2033                                  * contexts if we submit 2 ELSP.
2034                                  */
2035                                 if (last->context == rq->context)
2036                                         goto done;
2037
2038                                 if (i915_request_has_sentinel(last))
2039                                         goto done;
2040
2041                                 /*
2042                                  * If GVT overrides us we only ever submit
2043                                  * port[0], leaving port[1] empty. Note that we
2044                                  * also have to be careful that we don't queue
2045                                  * the same context (even though a different
2046                                  * request) to the second port.
2047                                  */
2048                                 if (ctx_single_port_submission(last->context) ||
2049                                     ctx_single_port_submission(rq->context))
2050                                         goto done;
2051
2052                                 merge = false;
2053                         }
2054
2055                         if (__i915_request_submit(rq)) {
2056                                 if (!merge) {
2057                                         *port = execlists_schedule_in(last, port - execlists->pending);
2058                                         port++;
2059                                         last = NULL;
2060                                 }
2061
2062                                 GEM_BUG_ON(last &&
2063                                            !can_merge_ctx(last->context,
2064                                                           rq->context));
2065
2066                                 submit = true;
2067                                 last = rq;
2068                         }
2069                 }
2070
2071                 rb_erase_cached(&p->node, &execlists->queue);
2072                 i915_priolist_free(p);
2073         }
2074
2075 done:
2076         /*
2077          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2078          *
2079          * We choose the priority hint such that if we add a request of greater
2080          * priority than this, we kick the submission tasklet to decide on
2081          * the right order of submitting the requests to hardware. We must
2082          * also be prepared to reorder requests as they are in-flight on the
2083          * HW. We derive the priority hint then as the first "hole" in
2084          * the HW submission ports and if there are no available slots,
2085          * the priority of the lowest executing request, i.e. last.
2086          *
2087          * When we do receive a higher priority request ready to run from the
2088          * user, see queue_request(), the priority hint is bumped to that
2089          * request triggering preemption on the next dequeue (or subsequent
2090          * interrupt for secondary ports).
2091          */
2092         execlists->queue_priority_hint = queue_prio(execlists);
2093
2094         if (submit) {
2095                 *port = execlists_schedule_in(last, port - execlists->pending);
2096                 execlists->switch_priority_hint =
2097                         switch_prio(engine, *execlists->pending);
2098
2099                 /*
2100                  * Skip if we ended up with exactly the same set of requests,
2101                  * e.g. trying to timeslice a pair of ordered contexts
2102                  */
2103                 if (!memcmp(execlists->active, execlists->pending,
2104                             (port - execlists->pending + 1) * sizeof(*port))) {
2105                         do
2106                                 execlists_schedule_out(fetch_and_zero(port));
2107                         while (port-- != execlists->pending);
2108
2109                         goto skip_submit;
2110                 }
2111                 clear_ports(port + 1, last_port - port);
2112
2113                 execlists_submit_ports(engine);
2114                 set_preempt_timeout(engine);
2115         } else {
2116 skip_submit:
2117                 ring_set_paused(engine, 0);
2118         }
2119 }
2120
2121 static void
2122 cancel_port_requests(struct intel_engine_execlists * const execlists)
2123 {
2124         struct i915_request * const *port;
2125
2126         for (port = execlists->pending; *port; port++)
2127                 execlists_schedule_out(*port);
2128         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2129
2130         /* Mark the end of active before we overwrite *active */
2131         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2132                 execlists_schedule_out(*port);
2133         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2134
2135         WRITE_ONCE(execlists->active, execlists->inflight);
2136 }
2137
2138 static inline void
2139 invalidate_csb_entries(const u32 *first, const u32 *last)
2140 {
2141         clflush((void *)first);
2142         clflush((void *)last);
2143 }
2144
2145 static inline bool
2146 reset_in_progress(const struct intel_engine_execlists *execlists)
2147 {
2148         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
2149 }
2150
2151 /*
2152  * Starting with Gen12, the status has a new format:
2153  *
2154  *     bit  0:     switched to new queue
2155  *     bit  1:     reserved
2156  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2157  *                 switch detail is set to "wait on semaphore"
2158  *     bits 3-5:   engine class
2159  *     bits 6-11:  engine instance
2160  *     bits 12-14: reserved
2161  *     bits 15-25: sw context id of the lrc the GT switched to
2162  *     bits 26-31: sw counter of the lrc the GT switched to
2163  *     bits 32-35: context switch detail
2164  *                  - 0: ctx complete
2165  *                  - 1: wait on sync flip
2166  *                  - 2: wait on vblank
2167  *                  - 3: wait on scanline
2168  *                  - 4: wait on semaphore
2169  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2170  *                       WAIT_FOR_EVENT)
2171  *     bit  36:    reserved
2172  *     bits 37-43: wait detail (for switch detail 1 to 4)
2173  *     bits 44-46: reserved
2174  *     bits 47-57: sw context id of the lrc the GT switched away from
2175  *     bits 58-63: sw counter of the lrc the GT switched away from
2176  */
2177 static inline bool
2178 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2179 {
2180         u32 lower_dw = csb[0];
2181         u32 upper_dw = csb[1];
2182         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2183         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2184         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2185
2186         /*
2187          * The context switch detail is not guaranteed to be 5 when a preemption
2188          * occurs, so we can't just check for that. The check below works for
2189          * all the cases we care about, including preemptions of WAIT
2190          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2191          * would require some extra handling, but we don't support that.
2192          */
2193         if (!ctx_away_valid || new_queue) {
2194                 GEM_BUG_ON(!ctx_to_valid);
2195                 return true;
2196         }
2197
2198         /*
2199          * switch detail = 5 is covered by the case above and we do not expect a
2200          * context switch on an unsuccessful wait instruction since we always
2201          * use polling mode.
2202          */
2203         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2204         return false;
2205 }
2206
2207 static inline bool
2208 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2209 {
2210         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2211 }
2212
2213 static void process_csb(struct intel_engine_cs *engine)
2214 {
2215         struct intel_engine_execlists * const execlists = &engine->execlists;
2216         const u32 * const buf = execlists->csb_status;
2217         const u8 num_entries = execlists->csb_size;
2218         u8 head, tail;
2219
2220         /*
2221          * As we modify our execlists state tracking we require exclusive
2222          * access. Either we are inside the tasklet, or the tasklet is disabled
2223          * and we assume that is only inside the reset paths and so serialised.
2224          */
2225         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2226                    !reset_in_progress(execlists));
2227         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2228
2229         /*
2230          * Note that csb_write, csb_status may be either in HWSP or mmio.
2231          * When reading from the csb_write mmio register, we have to be
2232          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2233          * the low 4bits. As it happens we know the next 4bits are always
2234          * zero and so we can simply masked off the low u8 of the register
2235          * and treat it identically to reading from the HWSP (without having
2236          * to use explicit shifting and masking, and probably bifurcating
2237          * the code to handle the legacy mmio read).
2238          */
2239         head = execlists->csb_head;
2240         tail = READ_ONCE(*execlists->csb_write);
2241         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2242         if (unlikely(head == tail))
2243                 return;
2244
2245         /*
2246          * Hopefully paired with a wmb() in HW!
2247          *
2248          * We must complete the read of the write pointer before any reads
2249          * from the CSB, so that we do not see stale values. Without an rmb
2250          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2251          * we perform the READ_ONCE(*csb_write).
2252          */
2253         rmb();
2254
2255         do {
2256                 bool promote;
2257
2258                 if (++head == num_entries)
2259                         head = 0;
2260
2261                 /*
2262                  * We are flying near dragons again.
2263                  *
2264                  * We hold a reference to the request in execlist_port[]
2265                  * but no more than that. We are operating in softirq
2266                  * context and so cannot hold any mutex or sleep. That
2267                  * prevents us stopping the requests we are processing
2268                  * in port[] from being retired simultaneously (the
2269                  * breadcrumb will be complete before we see the
2270                  * context-switch). As we only hold the reference to the
2271                  * request, any pointer chasing underneath the request
2272                  * is subject to a potential use-after-free. Thus we
2273                  * store all of the bookkeeping within port[] as
2274                  * required, and avoid using unguarded pointers beneath
2275                  * request itself. The same applies to the atomic
2276                  * status notifier.
2277                  */
2278
2279                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2280                              head, buf[2 * head + 0], buf[2 * head + 1]);
2281
2282                 if (INTEL_GEN(engine->i915) >= 12)
2283                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2284                 else
2285                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2286                 if (promote) {
2287                         struct i915_request * const *old = execlists->active;
2288
2289                         /* Point active to the new ELSP; prevent overwriting */
2290                         WRITE_ONCE(execlists->active, execlists->pending);
2291
2292                         if (!inject_preempt_hang(execlists))
2293                                 ring_set_paused(engine, 0);
2294
2295                         /* cancel old inflight, prepare for switch */
2296                         trace_ports(execlists, "preempted", old);
2297                         while (*old)
2298                                 execlists_schedule_out(*old++);
2299
2300                         /* switch pending to inflight */
2301                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2302                         WRITE_ONCE(execlists->active,
2303                                    memcpy(execlists->inflight,
2304                                           execlists->pending,
2305                                           execlists_num_ports(execlists) *
2306                                           sizeof(*execlists->pending)));
2307
2308                         WRITE_ONCE(execlists->pending[0], NULL);
2309                 } else {
2310                         GEM_BUG_ON(!*execlists->active);
2311
2312                         /* port0 completed, advanced to port1 */
2313                         trace_ports(execlists, "completed", execlists->active);
2314
2315                         /*
2316                          * We rely on the hardware being strongly
2317                          * ordered, that the breadcrumb write is
2318                          * coherent (visible from the CPU) before the
2319                          * user interrupt and CSB is processed.
2320                          */
2321                         GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
2322                                    !reset_in_progress(execlists));
2323                         execlists_schedule_out(*execlists->active++);
2324
2325                         GEM_BUG_ON(execlists->active - execlists->inflight >
2326                                    execlists_num_ports(execlists));
2327                 }
2328         } while (head != tail);
2329
2330         execlists->csb_head = head;
2331         set_timeslice(engine);
2332
2333         /*
2334          * Gen11 has proven to fail wrt global observation point between
2335          * entry and tail update, failing on the ordering and thus
2336          * we see an old entry in the context status buffer.
2337          *
2338          * Forcibly evict out entries for the next gpu csb update,
2339          * to increase the odds that we get a fresh entries with non
2340          * working hardware. The cost for doing so comes out mostly with
2341          * the wash as hardware, working or not, will need to do the
2342          * invalidation before.
2343          */
2344         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2345 }
2346
2347 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2348 {
2349         lockdep_assert_held(&engine->active.lock);
2350         if (!engine->execlists.pending[0]) {
2351                 rcu_read_lock(); /* protect peeking at execlists->active */
2352                 execlists_dequeue(engine);
2353                 rcu_read_unlock();
2354         }
2355 }
2356
2357 static void __execlists_hold(struct i915_request *rq)
2358 {
2359         LIST_HEAD(list);
2360
2361         do {
2362                 struct i915_dependency *p;
2363
2364                 if (i915_request_is_active(rq))
2365                         __i915_request_unsubmit(rq);
2366
2367                 RQ_TRACE(rq, "on hold\n");
2368                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2369                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2370                 i915_request_set_hold(rq);
2371
2372                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2373                         struct i915_request *w =
2374                                 container_of(p->waiter, typeof(*w), sched);
2375
2376                         /* Leave semaphores spinning on the other engines */
2377                         if (w->engine != rq->engine)
2378                                 continue;
2379
2380                         if (!i915_request_is_ready(w))
2381                                 continue;
2382
2383                         if (i915_request_completed(w))
2384                                 continue;
2385
2386                         if (i915_request_on_hold(rq))
2387                                 continue;
2388
2389                         list_move_tail(&w->sched.link, &list);
2390                 }
2391
2392                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2393         } while (rq);
2394 }
2395
2396 static bool execlists_hold(struct intel_engine_cs *engine,
2397                            struct i915_request *rq)
2398 {
2399         spin_lock_irq(&engine->active.lock);
2400
2401         if (i915_request_completed(rq)) { /* too late! */
2402                 rq = NULL;
2403                 goto unlock;
2404         }
2405
2406         if (rq->engine != engine) { /* preempted virtual engine */
2407                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2408
2409                 /*
2410                  * intel_context_inflight() is only protected by virtue
2411                  * of process_csb() being called only by the tasklet (or
2412                  * directly from inside reset while the tasklet is suspended).
2413                  * Assert that neither of those are allowed to run while we
2414                  * poke at the request queues.
2415                  */
2416                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2417
2418                 /*
2419                  * An unsubmitted request along a virtual engine will
2420                  * remain on the active (this) engine until we are able
2421                  * to process the context switch away (and so mark the
2422                  * context as no longer in flight). That cannot have happened
2423                  * yet, otherwise we would not be hanging!
2424                  */
2425                 spin_lock(&ve->base.active.lock);
2426                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2427                 GEM_BUG_ON(ve->request != rq);
2428                 ve->request = NULL;
2429                 spin_unlock(&ve->base.active.lock);
2430                 i915_request_put(rq);
2431
2432                 rq->engine = engine;
2433         }
2434
2435         /*
2436          * Transfer this request onto the hold queue to prevent it
2437          * being resumbitted to HW (and potentially completed) before we have
2438          * released it. Since we may have already submitted following
2439          * requests, we need to remove those as well.
2440          */
2441         GEM_BUG_ON(i915_request_on_hold(rq));
2442         GEM_BUG_ON(rq->engine != engine);
2443         __execlists_hold(rq);
2444
2445 unlock:
2446         spin_unlock_irq(&engine->active.lock);
2447         return rq;
2448 }
2449
2450 static bool hold_request(const struct i915_request *rq)
2451 {
2452         struct i915_dependency *p;
2453
2454         /*
2455          * If one of our ancestors is on hold, we must also be on hold,
2456          * otherwise we will bypass it and execute before it.
2457          */
2458         list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
2459                 const struct i915_request *s =
2460                         container_of(p->signaler, typeof(*s), sched);
2461
2462                 if (s->engine != rq->engine)
2463                         continue;
2464
2465                 if (i915_request_on_hold(s))
2466                         return true;
2467         }
2468
2469         return false;
2470 }
2471
2472 static void __execlists_unhold(struct i915_request *rq)
2473 {
2474         LIST_HEAD(list);
2475
2476         do {
2477                 struct i915_dependency *p;
2478
2479                 GEM_BUG_ON(!i915_request_on_hold(rq));
2480                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2481
2482                 i915_request_clear_hold(rq);
2483                 list_move_tail(&rq->sched.link,
2484                                i915_sched_lookup_priolist(rq->engine,
2485                                                           rq_prio(rq)));
2486                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2487                 RQ_TRACE(rq, "hold release\n");
2488
2489                 /* Also release any children on this engine that are ready */
2490                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2491                         struct i915_request *w =
2492                                 container_of(p->waiter, typeof(*w), sched);
2493
2494                         if (w->engine != rq->engine)
2495                                 continue;
2496
2497                         if (!i915_request_on_hold(rq))
2498                                 continue;
2499
2500                         /* Check that no other parents are also on hold */
2501                         if (hold_request(rq))
2502                                 continue;
2503
2504                         list_move_tail(&w->sched.link, &list);
2505                 }
2506
2507                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2508         } while (rq);
2509 }
2510
2511 static void execlists_unhold(struct intel_engine_cs *engine,
2512                              struct i915_request *rq)
2513 {
2514         spin_lock_irq(&engine->active.lock);
2515
2516         /*
2517          * Move this request back to the priority queue, and all of its
2518          * children and grandchildren that were suspended along with it.
2519          */
2520         __execlists_unhold(rq);
2521
2522         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2523                 engine->execlists.queue_priority_hint = rq_prio(rq);
2524                 tasklet_hi_schedule(&engine->execlists.tasklet);
2525         }
2526
2527         spin_unlock_irq(&engine->active.lock);
2528 }
2529
2530 struct execlists_capture {
2531         struct work_struct work;
2532         struct i915_request *rq;
2533         struct i915_gpu_coredump *error;
2534 };
2535
2536 static void execlists_capture_work(struct work_struct *work)
2537 {
2538         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2539         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2540         struct intel_engine_cs *engine = cap->rq->engine;
2541         struct intel_gt_coredump *gt = cap->error->gt;
2542         struct intel_engine_capture_vma *vma;
2543
2544         /* Compress all the objects attached to the request, slow! */
2545         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2546         if (vma) {
2547                 struct i915_vma_compress *compress =
2548                         i915_vma_capture_prepare(gt);
2549
2550                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2551                 i915_vma_capture_finish(gt, compress);
2552         }
2553
2554         gt->simulated = gt->engine->simulated;
2555         cap->error->simulated = gt->simulated;
2556
2557         /* Publish the error state, and announce it to the world */
2558         i915_error_state_store(cap->error);
2559         i915_gpu_coredump_put(cap->error);
2560
2561         /* Return this request and all that depend upon it for signaling */
2562         execlists_unhold(engine, cap->rq);
2563         i915_request_put(cap->rq);
2564
2565         kfree(cap);
2566 }
2567
2568 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2569 {
2570         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2571         struct execlists_capture *cap;
2572
2573         cap = kmalloc(sizeof(*cap), gfp);
2574         if (!cap)
2575                 return NULL;
2576
2577         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2578         if (!cap->error)
2579                 goto err_cap;
2580
2581         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2582         if (!cap->error->gt)
2583                 goto err_gpu;
2584
2585         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2586         if (!cap->error->gt->engine)
2587                 goto err_gt;
2588
2589         return cap;
2590
2591 err_gt:
2592         kfree(cap->error->gt);
2593 err_gpu:
2594         kfree(cap->error);
2595 err_cap:
2596         kfree(cap);
2597         return NULL;
2598 }
2599
2600 static bool execlists_capture(struct intel_engine_cs *engine)
2601 {
2602         struct execlists_capture *cap;
2603
2604         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2605                 return true;
2606
2607         /*
2608          * We need to _quickly_ capture the engine state before we reset.
2609          * We are inside an atomic section (softirq) here and we are delaying
2610          * the forced preemption event.
2611          */
2612         cap = capture_regs(engine);
2613         if (!cap)
2614                 return true;
2615
2616         cap->rq = execlists_active(&engine->execlists);
2617         GEM_BUG_ON(!cap->rq);
2618
2619         rcu_read_lock();
2620         cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2621         cap->rq = i915_request_get_rcu(cap->rq);
2622         rcu_read_unlock();
2623         if (!cap->rq)
2624                 goto err_free;
2625
2626         /*
2627          * Remove the request from the execlists queue, and take ownership
2628          * of the request. We pass it to our worker who will _slowly_ compress
2629          * all the pages the _user_ requested for debugging their batch, after
2630          * which we return it to the queue for signaling.
2631          *
2632          * By removing them from the execlists queue, we also remove the
2633          * requests from being processed by __unwind_incomplete_requests()
2634          * during the intel_engine_reset(), and so they will *not* be replayed
2635          * afterwards.
2636          *
2637          * Note that because we have not yet reset the engine at this point,
2638          * it is possible for the request that we have identified as being
2639          * guilty, did in fact complete and we will then hit an arbitration
2640          * point allowing the outstanding preemption to succeed. The likelihood
2641          * of that is very low (as capturing of the engine registers should be
2642          * fast enough to run inside an irq-off atomic section!), so we will
2643          * simply hold that request accountable for being non-preemptible
2644          * long enough to force the reset.
2645          */
2646         if (!execlists_hold(engine, cap->rq))
2647                 goto err_rq;
2648
2649         INIT_WORK(&cap->work, execlists_capture_work);
2650         schedule_work(&cap->work);
2651         return true;
2652
2653 err_rq:
2654         i915_request_put(cap->rq);
2655 err_free:
2656         i915_gpu_coredump_put(cap->error);
2657         kfree(cap);
2658         return false;
2659 }
2660
2661 static noinline void preempt_reset(struct intel_engine_cs *engine)
2662 {
2663         const unsigned int bit = I915_RESET_ENGINE + engine->id;
2664         unsigned long *lock = &engine->gt->reset.flags;
2665
2666         if (i915_modparams.reset < 3)
2667                 return;
2668
2669         if (test_and_set_bit(bit, lock))
2670                 return;
2671
2672         /* Mark this tasklet as disabled to avoid waiting for it to complete */
2673         tasklet_disable_nosync(&engine->execlists.tasklet);
2674
2675         ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
2676                      READ_ONCE(engine->props.preempt_timeout_ms),
2677                      jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
2678
2679         ring_set_paused(engine, 1); /* Freeze the current request in place */
2680         if (execlists_capture(engine))
2681                 intel_engine_reset(engine, "preemption time out");
2682         else
2683                 ring_set_paused(engine, 0);
2684
2685         tasklet_enable(&engine->execlists.tasklet);
2686         clear_and_wake_up_bit(bit, lock);
2687 }
2688
2689 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2690 {
2691         const struct timer_list *t = &engine->execlists.preempt;
2692
2693         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2694                 return false;
2695
2696         if (!timer_expired(t))
2697                 return false;
2698
2699         return READ_ONCE(engine->execlists.pending[0]);
2700 }
2701
2702 /*
2703  * Check the unread Context Status Buffers and manage the submission of new
2704  * contexts to the ELSP accordingly.
2705  */
2706 static void execlists_submission_tasklet(unsigned long data)
2707 {
2708         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2709         bool timeout = preempt_timeout(engine);
2710
2711         process_csb(engine);
2712         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2713                 unsigned long flags;
2714
2715                 spin_lock_irqsave(&engine->active.lock, flags);
2716                 __execlists_submission_tasklet(engine);
2717                 spin_unlock_irqrestore(&engine->active.lock, flags);
2718
2719                 /* Recheck after serialising with direct-submission */
2720                 if (timeout && preempt_timeout(engine))
2721                         preempt_reset(engine);
2722         }
2723 }
2724
2725 static void __execlists_kick(struct intel_engine_execlists *execlists)
2726 {
2727         /* Kick the tasklet for some interrupt coalescing and reset handling */
2728         tasklet_hi_schedule(&execlists->tasklet);
2729 }
2730
2731 #define execlists_kick(t, member) \
2732         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2733
2734 static void execlists_timeslice(struct timer_list *timer)
2735 {
2736         execlists_kick(timer, timer);
2737 }
2738
2739 static void execlists_preempt(struct timer_list *timer)
2740 {
2741         execlists_kick(timer, preempt);
2742 }
2743
2744 static void queue_request(struct intel_engine_cs *engine,
2745                           struct i915_request *rq)
2746 {
2747         GEM_BUG_ON(!list_empty(&rq->sched.link));
2748         list_add_tail(&rq->sched.link,
2749                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
2750         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2751 }
2752
2753 static void __submit_queue_imm(struct intel_engine_cs *engine)
2754 {
2755         struct intel_engine_execlists * const execlists = &engine->execlists;
2756
2757         if (reset_in_progress(execlists))
2758                 return; /* defer until we restart the engine following reset */
2759
2760         if (execlists->tasklet.func == execlists_submission_tasklet)
2761                 __execlists_submission_tasklet(engine);
2762         else
2763                 tasklet_hi_schedule(&execlists->tasklet);
2764 }
2765
2766 static void submit_queue(struct intel_engine_cs *engine,
2767                          const struct i915_request *rq)
2768 {
2769         struct intel_engine_execlists *execlists = &engine->execlists;
2770
2771         if (rq_prio(rq) <= execlists->queue_priority_hint)
2772                 return;
2773
2774         execlists->queue_priority_hint = rq_prio(rq);
2775         __submit_queue_imm(engine);
2776 }
2777
2778 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2779                              const struct i915_request *rq)
2780 {
2781         GEM_BUG_ON(i915_request_on_hold(rq));
2782         return !list_empty(&engine->active.hold) && hold_request(rq);
2783 }
2784
2785 static void execlists_submit_request(struct i915_request *request)
2786 {
2787         struct intel_engine_cs *engine = request->engine;
2788         unsigned long flags;
2789
2790         /* Will be called from irq-context when using foreign fences. */
2791         spin_lock_irqsave(&engine->active.lock, flags);
2792
2793         if (unlikely(ancestor_on_hold(engine, request))) {
2794                 list_add_tail(&request->sched.link, &engine->active.hold);
2795                 i915_request_set_hold(request);
2796         } else {
2797                 queue_request(engine, request);
2798
2799                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2800                 GEM_BUG_ON(list_empty(&request->sched.link));
2801
2802                 submit_queue(engine, request);
2803         }
2804
2805         spin_unlock_irqrestore(&engine->active.lock, flags);
2806 }
2807
2808 static void __execlists_context_fini(struct intel_context *ce)
2809 {
2810         intel_ring_put(ce->ring);
2811         i915_vma_put(ce->state);
2812 }
2813
2814 static void execlists_context_destroy(struct kref *kref)
2815 {
2816         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2817
2818         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2819         GEM_BUG_ON(intel_context_is_pinned(ce));
2820
2821         if (ce->state)
2822                 __execlists_context_fini(ce);
2823
2824         intel_context_fini(ce);
2825         intel_context_free(ce);
2826 }
2827
2828 static void
2829 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2830 {
2831         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2832                 return;
2833
2834         vaddr += engine->context_size;
2835
2836         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2837 }
2838
2839 static void
2840 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2841 {
2842         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2843                 return;
2844
2845         vaddr += engine->context_size;
2846
2847         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2848                 dev_err_once(engine->i915->drm.dev,
2849                              "%s context redzone overwritten!\n",
2850                              engine->name);
2851 }
2852
2853 static void execlists_context_unpin(struct intel_context *ce)
2854 {
2855         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2856                       ce->engine);
2857
2858         i915_gem_object_unpin_map(ce->state->obj);
2859 }
2860
2861 static void
2862 __execlists_update_reg_state(const struct intel_context *ce,
2863                              const struct intel_engine_cs *engine)
2864 {
2865         struct intel_ring *ring = ce->ring;
2866         u32 *regs = ce->lrc_reg_state;
2867
2868         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
2869         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2870
2871         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
2872         regs[CTX_RING_HEAD] = ring->head;
2873         regs[CTX_RING_TAIL] = ring->tail;
2874
2875         /* RPCS */
2876         if (engine->class == RENDER_CLASS) {
2877                 regs[CTX_R_PWR_CLK_STATE] =
2878                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2879
2880                 i915_oa_init_reg_state(ce, engine);
2881         }
2882 }
2883
2884 static int
2885 __execlists_context_pin(struct intel_context *ce,
2886                         struct intel_engine_cs *engine)
2887 {
2888         void *vaddr;
2889
2890         GEM_BUG_ON(!ce->state);
2891         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2892
2893         vaddr = i915_gem_object_pin_map(ce->state->obj,
2894                                         i915_coherent_map_type(engine->i915) |
2895                                         I915_MAP_OVERRIDE);
2896         if (IS_ERR(vaddr))
2897                 return PTR_ERR(vaddr);
2898
2899         ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
2900         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2901         __execlists_update_reg_state(ce, engine);
2902
2903         return 0;
2904 }
2905
2906 static int execlists_context_pin(struct intel_context *ce)
2907 {
2908         return __execlists_context_pin(ce, ce->engine);
2909 }
2910
2911 static int execlists_context_alloc(struct intel_context *ce)
2912 {
2913         return __execlists_context_alloc(ce, ce->engine);
2914 }
2915
2916 static void execlists_context_reset(struct intel_context *ce)
2917 {
2918         CE_TRACE(ce, "reset\n");
2919         GEM_BUG_ON(!intel_context_is_pinned(ce));
2920
2921         /*
2922          * Because we emit WA_TAIL_DWORDS there may be a disparity
2923          * between our bookkeeping in ce->ring->head and ce->ring->tail and
2924          * that stored in context. As we only write new commands from
2925          * ce->ring->tail onwards, everything before that is junk. If the GPU
2926          * starts reading from its RING_HEAD from the context, it may try to
2927          * execute that junk and die.
2928          *
2929          * The contexts that are stilled pinned on resume belong to the
2930          * kernel, and are local to each engine. All other contexts will
2931          * have their head/tail sanitized upon pinning before use, so they
2932          * will never see garbage,
2933          *
2934          * So to avoid that we reset the context images upon resume. For
2935          * simplicity, we just zero everything out.
2936          */
2937         intel_ring_reset(ce->ring, ce->ring->emit);
2938
2939         /* Scrub away the garbage */
2940         execlists_init_reg_state(ce->lrc_reg_state,
2941                                  ce, ce->engine, ce->ring, true);
2942         __execlists_update_reg_state(ce, ce->engine);
2943
2944         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
2945 }
2946
2947 static const struct intel_context_ops execlists_context_ops = {
2948         .alloc = execlists_context_alloc,
2949
2950         .pin = execlists_context_pin,
2951         .unpin = execlists_context_unpin,
2952
2953         .enter = intel_context_enter_engine,
2954         .exit = intel_context_exit_engine,
2955
2956         .reset = execlists_context_reset,
2957         .destroy = execlists_context_destroy,
2958 };
2959
2960 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
2961 {
2962         u32 *cs;
2963
2964         GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
2965
2966         cs = intel_ring_begin(rq, 6);
2967         if (IS_ERR(cs))
2968                 return PTR_ERR(cs);
2969
2970         /*
2971          * Check if we have been preempted before we even get started.
2972          *
2973          * After this point i915_request_started() reports true, even if
2974          * we get preempted and so are no longer running.
2975          */
2976         *cs++ = MI_ARB_CHECK;
2977         *cs++ = MI_NOOP;
2978
2979         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2980         *cs++ = i915_request_timeline(rq)->hwsp_offset;
2981         *cs++ = 0;
2982         *cs++ = rq->fence.seqno - 1;
2983
2984         intel_ring_advance(rq, cs);
2985
2986         /* Record the updated position of the request's payload */
2987         rq->infix = intel_ring_offset(rq, cs);
2988
2989         return 0;
2990 }
2991
2992 static int execlists_request_alloc(struct i915_request *request)
2993 {
2994         int ret;
2995
2996         GEM_BUG_ON(!intel_context_is_pinned(request->context));
2997
2998         /*
2999          * Flush enough space to reduce the likelihood of waiting after
3000          * we start building the request - in which case we will just
3001          * have to repeat work.
3002          */
3003         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3004
3005         /*
3006          * Note that after this point, we have committed to using
3007          * this request as it is being used to both track the
3008          * state of engine initialisation and liveness of the
3009          * golden renderstate above. Think twice before you try
3010          * to cancel/unwind this request now.
3011          */
3012
3013         /* Unconditionally invalidate GPU caches and TLBs. */
3014         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3015         if (ret)
3016                 return ret;
3017
3018         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3019         return 0;
3020 }
3021
3022 /*
3023  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3024  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3025  * but there is a slight complication as this is applied in WA batch where the
3026  * values are only initialized once so we cannot take register value at the
3027  * beginning and reuse it further; hence we save its value to memory, upload a
3028  * constant value with bit21 set and then we restore it back with the saved value.
3029  * To simplify the WA, a constant value is formed by using the default value
3030  * of this register. This shouldn't be a problem because we are only modifying
3031  * it for a short period and this batch in non-premptible. We can ofcourse
3032  * use additional instructions that read the actual value of the register
3033  * at that time and set our bit of interest but it makes the WA complicated.
3034  *
3035  * This WA is also required for Gen9 so extracting as a function avoids
3036  * code duplication.
3037  */
3038 static u32 *
3039 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3040 {
3041         /* NB no one else is allowed to scribble over scratch + 256! */
3042         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3043         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3044         *batch++ = intel_gt_scratch_offset(engine->gt,
3045                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3046         *batch++ = 0;
3047
3048         *batch++ = MI_LOAD_REGISTER_IMM(1);
3049         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3050         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3051
3052         batch = gen8_emit_pipe_control(batch,
3053                                        PIPE_CONTROL_CS_STALL |
3054                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3055                                        0);
3056
3057         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3058         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3059         *batch++ = intel_gt_scratch_offset(engine->gt,
3060                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3061         *batch++ = 0;
3062
3063         return batch;
3064 }
3065
3066 /*
3067  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3068  * initialized at the beginning and shared across all contexts but this field
3069  * helps us to have multiple batches at different offsets and select them based
3070  * on a criteria. At the moment this batch always start at the beginning of the page
3071  * and at this point we don't have multiple wa_ctx batch buffers.
3072  *
3073  * The number of WA applied are not known at the beginning; we use this field
3074  * to return the no of DWORDS written.
3075  *
3076  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3077  * so it adds NOOPs as padding to make it cacheline aligned.
3078  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3079  * makes a complete batch buffer.
3080  */
3081 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3082 {
3083         /* WaDisableCtxRestoreArbitration:bdw,chv */
3084         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3085
3086         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3087         if (IS_BROADWELL(engine->i915))
3088                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3089
3090         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3091         /* Actual scratch location is at 128 bytes offset */
3092         batch = gen8_emit_pipe_control(batch,
3093                                        PIPE_CONTROL_FLUSH_L3 |
3094                                        PIPE_CONTROL_STORE_DATA_INDEX |
3095                                        PIPE_CONTROL_CS_STALL |
3096                                        PIPE_CONTROL_QW_WRITE,
3097                                        LRC_PPHWSP_SCRATCH_ADDR);
3098
3099         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3100
3101         /* Pad to end of cacheline */
3102         while ((unsigned long)batch % CACHELINE_BYTES)
3103                 *batch++ = MI_NOOP;
3104
3105         /*
3106          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3107          * execution depends on the length specified in terms of cache lines
3108          * in the register CTX_RCS_INDIRECT_CTX
3109          */
3110
3111         return batch;
3112 }
3113
3114 struct lri {
3115         i915_reg_t reg;
3116         u32 value;
3117 };
3118
3119 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3120 {
3121         GEM_BUG_ON(!count || count > 63);
3122
3123         *batch++ = MI_LOAD_REGISTER_IMM(count);
3124         do {
3125                 *batch++ = i915_mmio_reg_offset(lri->reg);
3126                 *batch++ = lri->value;
3127         } while (lri++, --count);
3128         *batch++ = MI_NOOP;
3129
3130         return batch;
3131 }
3132
3133 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3134 {
3135         static const struct lri lri[] = {
3136                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3137                 {
3138                         COMMON_SLICE_CHICKEN2,
3139                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3140                                        0),
3141                 },
3142
3143                 /* BSpec: 11391 */
3144                 {
3145                         FF_SLICE_CHICKEN,
3146                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3147                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3148                 },
3149
3150                 /* BSpec: 11299 */
3151                 {
3152                         _3D_CHICKEN3,
3153                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3154                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3155                 }
3156         };
3157
3158         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3159
3160         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3161         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3162
3163         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3164         batch = gen8_emit_pipe_control(batch,
3165                                        PIPE_CONTROL_FLUSH_L3 |
3166                                        PIPE_CONTROL_STORE_DATA_INDEX |
3167                                        PIPE_CONTROL_CS_STALL |
3168                                        PIPE_CONTROL_QW_WRITE,
3169                                        LRC_PPHWSP_SCRATCH_ADDR);
3170
3171         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3172
3173         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3174         if (HAS_POOLED_EU(engine->i915)) {
3175                 /*
3176                  * EU pool configuration is setup along with golden context
3177                  * during context initialization. This value depends on
3178                  * device type (2x6 or 3x6) and needs to be updated based
3179                  * on which subslice is disabled especially for 2x6
3180                  * devices, however it is safe to load default
3181                  * configuration of 3x6 device instead of masking off
3182                  * corresponding bits because HW ignores bits of a disabled
3183                  * subslice and drops down to appropriate config. Please
3184                  * see render_state_setup() in i915_gem_render_state.c for
3185                  * possible configurations, to avoid duplication they are
3186                  * not shown here again.
3187                  */
3188                 *batch++ = GEN9_MEDIA_POOL_STATE;
3189                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3190                 *batch++ = 0x00777000;
3191                 *batch++ = 0;
3192                 *batch++ = 0;
3193                 *batch++ = 0;
3194         }
3195
3196         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3197
3198         /* Pad to end of cacheline */
3199         while ((unsigned long)batch % CACHELINE_BYTES)
3200                 *batch++ = MI_NOOP;
3201
3202         return batch;
3203 }
3204
3205 static u32 *
3206 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3207 {
3208         int i;
3209
3210         /*
3211          * WaPipeControlBefore3DStateSamplePattern: cnl
3212          *
3213          * Ensure the engine is idle prior to programming a
3214          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3215          */
3216         batch = gen8_emit_pipe_control(batch,
3217                                        PIPE_CONTROL_CS_STALL,
3218                                        0);
3219         /*
3220          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3221          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3222          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3223          * confusing. Since gen8_emit_pipe_control() already advances the
3224          * batch by 6 dwords, we advance the other 10 here, completing a
3225          * cacheline. It's not clear if the workaround requires this padding
3226          * before other commands, or if it's just the regular padding we would
3227          * already have for the workaround bb, so leave it here for now.
3228          */
3229         for (i = 0; i < 10; i++)
3230                 *batch++ = MI_NOOP;
3231
3232         /* Pad to end of cacheline */
3233         while ((unsigned long)batch % CACHELINE_BYTES)
3234                 *batch++ = MI_NOOP;
3235
3236         return batch;
3237 }
3238
3239 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3240
3241 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3242 {
3243         struct drm_i915_gem_object *obj;
3244         struct i915_vma *vma;
3245         int err;
3246
3247         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3248         if (IS_ERR(obj))
3249                 return PTR_ERR(obj);
3250
3251         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3252         if (IS_ERR(vma)) {
3253                 err = PTR_ERR(vma);
3254                 goto err;
3255         }
3256
3257         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3258         if (err)
3259                 goto err;
3260
3261         engine->wa_ctx.vma = vma;
3262         return 0;
3263
3264 err:
3265         i915_gem_object_put(obj);
3266         return err;
3267 }
3268
3269 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3270 {
3271         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3272 }
3273
3274 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3275
3276 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3277 {
3278         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3279         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3280                                             &wa_ctx->per_ctx };
3281         wa_bb_func_t wa_bb_fn[2];
3282         struct page *page;
3283         void *batch, *batch_ptr;
3284         unsigned int i;
3285         int ret;
3286
3287         if (engine->class != RENDER_CLASS)
3288                 return 0;
3289
3290         switch (INTEL_GEN(engine->i915)) {
3291         case 12:
3292         case 11:
3293                 return 0;
3294         case 10:
3295                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3296                 wa_bb_fn[1] = NULL;
3297                 break;
3298         case 9:
3299                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3300                 wa_bb_fn[1] = NULL;
3301                 break;
3302         case 8:
3303                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3304                 wa_bb_fn[1] = NULL;
3305                 break;
3306         default:
3307                 MISSING_CASE(INTEL_GEN(engine->i915));
3308                 return 0;
3309         }
3310
3311         ret = lrc_setup_wa_ctx(engine);
3312         if (ret) {
3313                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3314                 return ret;
3315         }
3316
3317         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3318         batch = batch_ptr = kmap_atomic(page);
3319
3320         /*
3321          * Emit the two workaround batch buffers, recording the offset from the
3322          * start of the workaround batch buffer object for each and their
3323          * respective sizes.
3324          */
3325         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3326                 wa_bb[i]->offset = batch_ptr - batch;
3327                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3328                                                   CACHELINE_BYTES))) {
3329                         ret = -EINVAL;
3330                         break;
3331                 }
3332                 if (wa_bb_fn[i])
3333                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3334                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3335         }
3336
3337         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3338
3339         kunmap_atomic(batch);
3340         if (ret)
3341                 lrc_destroy_wa_ctx(engine);
3342
3343         return ret;
3344 }
3345
3346 static void enable_execlists(struct intel_engine_cs *engine)
3347 {
3348         u32 mode;
3349
3350         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3351
3352         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3353
3354         if (INTEL_GEN(engine->i915) >= 11)
3355                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3356         else
3357                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3358         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3359
3360         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3361
3362         ENGINE_WRITE_FW(engine,
3363                         RING_HWS_PGA,
3364                         i915_ggtt_offset(engine->status_page.vma));
3365         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3366
3367         engine->context_tag = 0;
3368 }
3369
3370 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3371 {
3372         bool unexpected = false;
3373
3374         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3375                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3376                 unexpected = true;
3377         }
3378
3379         return unexpected;
3380 }
3381
3382 static int execlists_resume(struct intel_engine_cs *engine)
3383 {
3384         intel_engine_apply_workarounds(engine);
3385         intel_engine_apply_whitelist(engine);
3386
3387         intel_mocs_init_engine(engine);
3388
3389         intel_engine_reset_breadcrumbs(engine);
3390
3391         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3392                 struct drm_printer p = drm_debug_printer(__func__);
3393
3394                 intel_engine_dump(engine, &p, NULL);
3395         }
3396
3397         enable_execlists(engine);
3398
3399         return 0;
3400 }
3401
3402 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3403 {
3404         struct intel_engine_execlists * const execlists = &engine->execlists;
3405         unsigned long flags;
3406
3407         ENGINE_TRACE(engine, "depth<-%d\n",
3408                      atomic_read(&execlists->tasklet.count));
3409
3410         /*
3411          * Prevent request submission to the hardware until we have
3412          * completed the reset in i915_gem_reset_finish(). If a request
3413          * is completed by one engine, it may then queue a request
3414          * to a second via its execlists->tasklet *just* as we are
3415          * calling engine->resume() and also writing the ELSP.
3416          * Turning off the execlists->tasklet until the reset is over
3417          * prevents the race.
3418          */
3419         __tasklet_disable_sync_once(&execlists->tasklet);
3420         GEM_BUG_ON(!reset_in_progress(execlists));
3421
3422         /* And flush any current direct submission. */
3423         spin_lock_irqsave(&engine->active.lock, flags);
3424         spin_unlock_irqrestore(&engine->active.lock, flags);
3425
3426         /*
3427          * We stop engines, otherwise we might get failed reset and a
3428          * dead gpu (on elk). Also as modern gpu as kbl can suffer
3429          * from system hang if batchbuffer is progressing when
3430          * the reset is issued, regardless of READY_TO_RESET ack.
3431          * Thus assume it is best to stop engines on all gens
3432          * where we have a gpu reset.
3433          *
3434          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3435          *
3436          * FIXME: Wa for more modern gens needs to be validated
3437          */
3438         intel_engine_stop_cs(engine);
3439 }
3440
3441 static void reset_csb_pointers(struct intel_engine_cs *engine)
3442 {
3443         struct intel_engine_execlists * const execlists = &engine->execlists;
3444         const unsigned int reset_value = execlists->csb_size - 1;
3445
3446         ring_set_paused(engine, 0);
3447
3448         /*
3449          * After a reset, the HW starts writing into CSB entry [0]. We
3450          * therefore have to set our HEAD pointer back one entry so that
3451          * the *first* entry we check is entry 0. To complicate this further,
3452          * as we don't wait for the first interrupt after reset, we have to
3453          * fake the HW write to point back to the last entry so that our
3454          * inline comparison of our cached head position against the last HW
3455          * write works even before the first interrupt.
3456          */
3457         execlists->csb_head = reset_value;
3458         WRITE_ONCE(*execlists->csb_write, reset_value);
3459         wmb(); /* Make sure this is visible to HW (paranoia?) */
3460
3461         /*
3462          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3463          * Bludgeon them with a mmio update to be sure.
3464          */
3465         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3466                      reset_value << 8 | reset_value);
3467         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3468
3469         invalidate_csb_entries(&execlists->csb_status[0],
3470                                &execlists->csb_status[reset_value]);
3471 }
3472
3473 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3474 {
3475         int x;
3476
3477         x = lrc_ring_mi_mode(engine);
3478         if (x != -1) {
3479                 regs[x + 1] &= ~STOP_RING;
3480                 regs[x + 1] |= STOP_RING << 16;
3481         }
3482 }
3483
3484 static void __execlists_reset_reg_state(const struct intel_context *ce,
3485                                         const struct intel_engine_cs *engine)
3486 {
3487         u32 *regs = ce->lrc_reg_state;
3488
3489         __reset_stop_ring(regs, engine);
3490 }
3491
3492 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3493 {
3494         struct intel_engine_execlists * const execlists = &engine->execlists;
3495         struct intel_context *ce;
3496         struct i915_request *rq;
3497
3498         mb(); /* paranoia: read the CSB pointers from after the reset */
3499         clflush(execlists->csb_write);
3500         mb();
3501
3502         process_csb(engine); /* drain preemption events */
3503
3504         /* Following the reset, we need to reload the CSB read/write pointers */
3505         reset_csb_pointers(engine);
3506
3507         /*
3508          * Save the currently executing context, even if we completed
3509          * its request, it was still running at the time of the
3510          * reset and will have been clobbered.
3511          */
3512         rq = execlists_active(execlists);
3513         if (!rq)
3514                 goto unwind;
3515
3516         /* We still have requests in-flight; the engine should be active */
3517         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3518
3519         ce = rq->context;
3520         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3521
3522         if (i915_request_completed(rq)) {
3523                 /* Idle context; tidy up the ring so we can restart afresh */
3524                 ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
3525                 goto out_replay;
3526         }
3527
3528         /* Context has requests still in-flight; it should not be idle! */
3529         GEM_BUG_ON(i915_active_is_idle(&ce->active));
3530         rq = active_request(ce->timeline, rq);
3531         ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
3532         GEM_BUG_ON(ce->ring->head == ce->ring->tail);
3533
3534         /*
3535          * If this request hasn't started yet, e.g. it is waiting on a
3536          * semaphore, we need to avoid skipping the request or else we
3537          * break the signaling chain. However, if the context is corrupt
3538          * the request will not restart and we will be stuck with a wedged
3539          * device. It is quite often the case that if we issue a reset
3540          * while the GPU is loading the context image, that the context
3541          * image becomes corrupt.
3542          *
3543          * Otherwise, if we have not started yet, the request should replay
3544          * perfectly and we do not need to flag the result as being erroneous.
3545          */
3546         if (!i915_request_started(rq))
3547                 goto out_replay;
3548
3549         /*
3550          * If the request was innocent, we leave the request in the ELSP
3551          * and will try to replay it on restarting. The context image may
3552          * have been corrupted by the reset, in which case we may have
3553          * to service a new GPU hang, but more likely we can continue on
3554          * without impact.
3555          *
3556          * If the request was guilty, we presume the context is corrupt
3557          * and have to at least restore the RING register in the context
3558          * image back to the expected values to skip over the guilty request.
3559          */
3560         __i915_request_reset(rq, stalled);
3561         if (!stalled)
3562                 goto out_replay;
3563
3564         /*
3565          * We want a simple context + ring to execute the breadcrumb update.
3566          * We cannot rely on the context being intact across the GPU hang,
3567          * so clear it and rebuild just what we need for the breadcrumb.
3568          * All pending requests for this context will be zapped, and any
3569          * future request will be after userspace has had the opportunity
3570          * to recreate its own state.
3571          */
3572         GEM_BUG_ON(!intel_context_is_pinned(ce));
3573         restore_default_state(ce, engine);
3574
3575 out_replay:
3576         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3577                      ce->ring->head, ce->ring->tail);
3578         intel_ring_update_space(ce->ring);
3579         __execlists_reset_reg_state(ce, engine);
3580         __execlists_update_reg_state(ce, engine);
3581         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3582
3583 unwind:
3584         /* Push back any incomplete requests for replay after the reset. */
3585         cancel_port_requests(execlists);
3586         __unwind_incomplete_requests(engine);
3587 }
3588
3589 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3590 {
3591         unsigned long flags;
3592
3593         ENGINE_TRACE(engine, "\n");
3594
3595         spin_lock_irqsave(&engine->active.lock, flags);
3596
3597         __execlists_reset(engine, stalled);
3598
3599         spin_unlock_irqrestore(&engine->active.lock, flags);
3600 }
3601
3602 static void nop_submission_tasklet(unsigned long data)
3603 {
3604         /* The driver is wedged; don't process any more events. */
3605 }
3606
3607 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3608 {
3609         struct intel_engine_execlists * const execlists = &engine->execlists;
3610         struct i915_request *rq, *rn;
3611         struct rb_node *rb;
3612         unsigned long flags;
3613
3614         ENGINE_TRACE(engine, "\n");
3615
3616         /*
3617          * Before we call engine->cancel_requests(), we should have exclusive
3618          * access to the submission state. This is arranged for us by the
3619          * caller disabling the interrupt generation, the tasklet and other
3620          * threads that may then access the same state, giving us a free hand
3621          * to reset state. However, we still need to let lockdep be aware that
3622          * we know this state may be accessed in hardirq context, so we
3623          * disable the irq around this manipulation and we want to keep
3624          * the spinlock focused on its duties and not accidentally conflate
3625          * coverage to the submission's irq state. (Similarly, although we
3626          * shouldn't need to disable irq around the manipulation of the
3627          * submission's irq state, we also wish to remind ourselves that
3628          * it is irq state.)
3629          */
3630         spin_lock_irqsave(&engine->active.lock, flags);
3631
3632         __execlists_reset(engine, true);
3633
3634         /* Mark all executing requests as skipped. */
3635         list_for_each_entry(rq, &engine->active.requests, sched.link)
3636                 mark_eio(rq);
3637
3638         /* Flush the queued requests to the timeline list (for retiring). */
3639         while ((rb = rb_first_cached(&execlists->queue))) {
3640                 struct i915_priolist *p = to_priolist(rb);
3641                 int i;
3642
3643                 priolist_for_each_request_consume(rq, rn, p, i) {
3644                         mark_eio(rq);
3645                         __i915_request_submit(rq);
3646                 }
3647
3648                 rb_erase_cached(&p->node, &execlists->queue);
3649                 i915_priolist_free(p);
3650         }
3651
3652         /* On-hold requests will be flushed to timeline upon their release */
3653         list_for_each_entry(rq, &engine->active.hold, sched.link)
3654                 mark_eio(rq);
3655
3656         /* Cancel all attached virtual engines */
3657         while ((rb = rb_first_cached(&execlists->virtual))) {
3658                 struct virtual_engine *ve =
3659                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3660
3661                 rb_erase_cached(rb, &execlists->virtual);
3662                 RB_CLEAR_NODE(rb);
3663
3664                 spin_lock(&ve->base.active.lock);
3665                 rq = fetch_and_zero(&ve->request);
3666                 if (rq) {
3667                         mark_eio(rq);
3668
3669                         rq->engine = engine;
3670                         __i915_request_submit(rq);
3671                         i915_request_put(rq);
3672
3673                         ve->base.execlists.queue_priority_hint = INT_MIN;
3674                 }
3675                 spin_unlock(&ve->base.active.lock);
3676         }
3677
3678         /* Remaining _unready_ requests will be nop'ed when submitted */
3679
3680         execlists->queue_priority_hint = INT_MIN;
3681         execlists->queue = RB_ROOT_CACHED;
3682
3683         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3684         execlists->tasklet.func = nop_submission_tasklet;
3685
3686         spin_unlock_irqrestore(&engine->active.lock, flags);
3687 }
3688
3689 static void execlists_reset_finish(struct intel_engine_cs *engine)
3690 {
3691         struct intel_engine_execlists * const execlists = &engine->execlists;
3692
3693         /*
3694          * After a GPU reset, we may have requests to replay. Do so now while
3695          * we still have the forcewake to be sure that the GPU is not allowed
3696          * to sleep before we restart and reload a context.
3697          */
3698         GEM_BUG_ON(!reset_in_progress(execlists));
3699         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3700                 execlists->tasklet.func(execlists->tasklet.data);
3701
3702         if (__tasklet_enable(&execlists->tasklet))
3703                 /* And kick in case we missed a new request submission. */
3704                 tasklet_hi_schedule(&execlists->tasklet);
3705         ENGINE_TRACE(engine, "depth->%d\n",
3706                      atomic_read(&execlists->tasklet.count));
3707 }
3708
3709 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3710                                     u64 offset, u32 len,
3711                                     const unsigned int flags)
3712 {
3713         u32 *cs;
3714
3715         cs = intel_ring_begin(rq, 4);
3716         if (IS_ERR(cs))
3717                 return PTR_ERR(cs);
3718
3719         /*
3720          * WaDisableCtxRestoreArbitration:bdw,chv
3721          *
3722          * We don't need to perform MI_ARB_ENABLE as often as we do (in
3723          * particular all the gen that do not need the w/a at all!), if we
3724          * took care to make sure that on every switch into this context
3725          * (both ordinary and for preemption) that arbitrartion was enabled
3726          * we would be fine.  However, for gen8 there is another w/a that
3727          * requires us to not preempt inside GPGPU execution, so we keep
3728          * arbitration disabled for gen8 batches. Arbitration will be
3729          * re-enabled before we close the request
3730          * (engine->emit_fini_breadcrumb).
3731          */
3732         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3733
3734         /* FIXME(BDW+): Address space and security selectors. */
3735         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3736                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3737         *cs++ = lower_32_bits(offset);
3738         *cs++ = upper_32_bits(offset);
3739
3740         intel_ring_advance(rq, cs);
3741
3742         return 0;
3743 }
3744
3745 static int gen8_emit_bb_start(struct i915_request *rq,
3746                               u64 offset, u32 len,
3747                               const unsigned int flags)
3748 {
3749         u32 *cs;
3750
3751         cs = intel_ring_begin(rq, 6);
3752         if (IS_ERR(cs))
3753                 return PTR_ERR(cs);
3754
3755         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3756
3757         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3758                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3759         *cs++ = lower_32_bits(offset);
3760         *cs++ = upper_32_bits(offset);
3761
3762         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3763         *cs++ = MI_NOOP;
3764
3765         intel_ring_advance(rq, cs);
3766
3767         return 0;
3768 }
3769
3770 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3771 {
3772         ENGINE_WRITE(engine, RING_IMR,
3773                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3774         ENGINE_POSTING_READ(engine, RING_IMR);
3775 }
3776
3777 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3778 {
3779         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3780 }
3781
3782 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3783 {
3784         u32 cmd, *cs;
3785
3786         cs = intel_ring_begin(request, 4);
3787         if (IS_ERR(cs))
3788                 return PTR_ERR(cs);
3789
3790         cmd = MI_FLUSH_DW + 1;
3791
3792         /* We always require a command barrier so that subsequent
3793          * commands, such as breadcrumb interrupts, are strictly ordered
3794          * wrt the contents of the write cache being flushed to memory
3795          * (and thus being coherent from the CPU).
3796          */
3797         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3798
3799         if (mode & EMIT_INVALIDATE) {
3800                 cmd |= MI_INVALIDATE_TLB;
3801                 if (request->engine->class == VIDEO_DECODE_CLASS)
3802                         cmd |= MI_INVALIDATE_BSD;
3803         }
3804
3805         *cs++ = cmd;
3806         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3807         *cs++ = 0; /* upper addr */
3808         *cs++ = 0; /* value */
3809         intel_ring_advance(request, cs);
3810
3811         return 0;
3812 }
3813
3814 static int gen8_emit_flush_render(struct i915_request *request,
3815                                   u32 mode)
3816 {
3817         bool vf_flush_wa = false, dc_flush_wa = false;
3818         u32 *cs, flags = 0;
3819         int len;
3820
3821         flags |= PIPE_CONTROL_CS_STALL;
3822
3823         if (mode & EMIT_FLUSH) {
3824                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3825                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3826                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3827                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3828         }
3829
3830         if (mode & EMIT_INVALIDATE) {
3831                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3832                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3833                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3834                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3835                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3836                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3837                 flags |= PIPE_CONTROL_QW_WRITE;
3838                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3839
3840                 /*
3841                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3842                  * pipe control.
3843                  */
3844                 if (IS_GEN(request->i915, 9))
3845                         vf_flush_wa = true;
3846
3847                 /* WaForGAMHang:kbl */
3848                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3849                         dc_flush_wa = true;
3850         }
3851
3852         len = 6;
3853
3854         if (vf_flush_wa)
3855                 len += 6;
3856
3857         if (dc_flush_wa)
3858                 len += 12;
3859
3860         cs = intel_ring_begin(request, len);
3861         if (IS_ERR(cs))
3862                 return PTR_ERR(cs);
3863
3864         if (vf_flush_wa)
3865                 cs = gen8_emit_pipe_control(cs, 0, 0);
3866
3867         if (dc_flush_wa)
3868                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3869                                             0);
3870
3871         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3872
3873         if (dc_flush_wa)
3874                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3875
3876         intel_ring_advance(request, cs);
3877
3878         return 0;
3879 }
3880
3881 static int gen11_emit_flush_render(struct i915_request *request,
3882                                    u32 mode)
3883 {
3884         if (mode & EMIT_FLUSH) {
3885                 u32 *cs;
3886                 u32 flags = 0;
3887
3888                 flags |= PIPE_CONTROL_CS_STALL;
3889
3890                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3891                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3892                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3893                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3894                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3895                 flags |= PIPE_CONTROL_QW_WRITE;
3896                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3897
3898                 cs = intel_ring_begin(request, 6);
3899                 if (IS_ERR(cs))
3900                         return PTR_ERR(cs);
3901
3902                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3903                 intel_ring_advance(request, cs);
3904         }
3905
3906         if (mode & EMIT_INVALIDATE) {
3907                 u32 *cs;
3908                 u32 flags = 0;
3909
3910                 flags |= PIPE_CONTROL_CS_STALL;
3911
3912                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3913                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3914                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3915                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3916                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3917                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3918                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3919                 flags |= PIPE_CONTROL_QW_WRITE;
3920                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3921
3922                 cs = intel_ring_begin(request, 6);
3923                 if (IS_ERR(cs))
3924                         return PTR_ERR(cs);
3925
3926                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3927                 intel_ring_advance(request, cs);
3928         }
3929
3930         return 0;
3931 }
3932
3933 static u32 preparser_disable(bool state)
3934 {
3935         return MI_ARB_CHECK | 1 << 8 | state;
3936 }
3937
3938 static int gen12_emit_flush_render(struct i915_request *request,
3939                                    u32 mode)
3940 {
3941         if (mode & EMIT_FLUSH) {
3942                 u32 flags = 0;
3943                 u32 *cs;
3944
3945                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3946                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3947                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3948                 /* Wa_1409600907:tgl */
3949                 flags |= PIPE_CONTROL_DEPTH_STALL;
3950                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3951                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3952                 flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
3953
3954                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3955                 flags |= PIPE_CONTROL_QW_WRITE;
3956
3957                 flags |= PIPE_CONTROL_CS_STALL;
3958
3959                 cs = intel_ring_begin(request, 6);
3960                 if (IS_ERR(cs))
3961                         return PTR_ERR(cs);
3962
3963                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3964                 intel_ring_advance(request, cs);
3965         }
3966
3967         if (mode & EMIT_INVALIDATE) {
3968                 u32 flags = 0;
3969                 u32 *cs;
3970
3971                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3972                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3973                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3974                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3975                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3976                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3977                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3978                 flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
3979
3980                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3981                 flags |= PIPE_CONTROL_QW_WRITE;
3982
3983                 flags |= PIPE_CONTROL_CS_STALL;
3984
3985                 cs = intel_ring_begin(request, 8);
3986                 if (IS_ERR(cs))
3987                         return PTR_ERR(cs);
3988
3989                 /*
3990                  * Prevent the pre-parser from skipping past the TLB
3991                  * invalidate and loading a stale page for the batch
3992                  * buffer / request payload.
3993                  */
3994                 *cs++ = preparser_disable(true);
3995
3996                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3997
3998                 *cs++ = preparser_disable(false);
3999                 intel_ring_advance(request, cs);
4000
4001                 /*
4002                  * Wa_1604544889:tgl
4003                  */
4004                 if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4005                         flags = 0;
4006                         flags |= PIPE_CONTROL_CS_STALL;
4007                         flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4008
4009                         flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4010                         flags |= PIPE_CONTROL_QW_WRITE;
4011
4012                         cs = intel_ring_begin(request, 6);
4013                         if (IS_ERR(cs))
4014                                 return PTR_ERR(cs);
4015
4016                         cs = gen8_emit_pipe_control(cs, flags,
4017                                                     LRC_PPHWSP_SCRATCH_ADDR);
4018                         intel_ring_advance(request, cs);
4019                 }
4020         }
4021
4022         return 0;
4023 }
4024
4025 /*
4026  * Reserve space for 2 NOOPs at the end of each request to be
4027  * used as a workaround for not being allowed to do lite
4028  * restore with HEAD==TAIL (WaIdleLiteRestore).
4029  */
4030 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4031 {
4032         /* Ensure there's always at least one preemption point per-request. */
4033         *cs++ = MI_ARB_CHECK;
4034         *cs++ = MI_NOOP;
4035         request->wa_tail = intel_ring_offset(request, cs);
4036
4037         return cs;
4038 }
4039
4040 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4041 {
4042         *cs++ = MI_SEMAPHORE_WAIT |
4043                 MI_SEMAPHORE_GLOBAL_GTT |
4044                 MI_SEMAPHORE_POLL |
4045                 MI_SEMAPHORE_SAD_EQ_SDD;
4046         *cs++ = 0;
4047         *cs++ = intel_hws_preempt_address(request->engine);
4048         *cs++ = 0;
4049
4050         return cs;
4051 }
4052
4053 static __always_inline u32*
4054 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4055                                  u32 *cs)
4056 {
4057         *cs++ = MI_USER_INTERRUPT;
4058
4059         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4060         if (intel_engine_has_semaphores(request->engine))
4061                 cs = emit_preempt_busywait(request, cs);
4062
4063         request->tail = intel_ring_offset(request, cs);
4064         assert_ring_tail_valid(request->ring, request->tail);
4065
4066         return gen8_emit_wa_tail(request, cs);
4067 }
4068
4069 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4070 {
4071         cs = gen8_emit_ggtt_write(cs,
4072                                   request->fence.seqno,
4073                                   i915_request_active_timeline(request)->hwsp_offset,
4074                                   0);
4075
4076         return gen8_emit_fini_breadcrumb_footer(request, cs);
4077 }
4078
4079 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4080 {
4081         cs = gen8_emit_pipe_control(cs,
4082                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4083                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4084                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4085                                     0);
4086
4087         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4088         cs = gen8_emit_ggtt_write_rcs(cs,
4089                                       request->fence.seqno,
4090                                       i915_request_active_timeline(request)->hwsp_offset,
4091                                       PIPE_CONTROL_FLUSH_ENABLE |
4092                                       PIPE_CONTROL_CS_STALL);
4093
4094         return gen8_emit_fini_breadcrumb_footer(request, cs);
4095 }
4096
4097 static u32 *
4098 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4099 {
4100         cs = gen8_emit_ggtt_write_rcs(cs,
4101                                       request->fence.seqno,
4102                                       i915_request_active_timeline(request)->hwsp_offset,
4103                                       PIPE_CONTROL_CS_STALL |
4104                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4105                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4106                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4107                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4108                                       PIPE_CONTROL_FLUSH_ENABLE);
4109
4110         return gen8_emit_fini_breadcrumb_footer(request, cs);
4111 }
4112
4113 /*
4114  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4115  * flush and will continue pre-fetching the instructions after it before the
4116  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4117  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4118  * of the next request before the memory has been flushed, we're guaranteed that
4119  * we won't access the batch itself too early.
4120  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4121  * so, if the current request is modifying an instruction in the next request on
4122  * the same intel_context, we might pre-fetch and then execute the pre-update
4123  * instruction. To avoid this, the users of self-modifying code should either
4124  * disable the parser around the code emitting the memory writes, via a new flag
4125  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4126  * the in-kernel use-cases we've opted to use a separate context, see
4127  * reloc_gpu() as an example.
4128  * All the above applies only to the instructions themselves. Non-inline data
4129  * used by the instructions is not pre-fetched.
4130  */
4131
4132 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4133 {
4134         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4135                 MI_SEMAPHORE_GLOBAL_GTT |
4136                 MI_SEMAPHORE_POLL |
4137                 MI_SEMAPHORE_SAD_EQ_SDD;
4138         *cs++ = 0;
4139         *cs++ = intel_hws_preempt_address(request->engine);
4140         *cs++ = 0;
4141         *cs++ = 0;
4142         *cs++ = MI_NOOP;
4143
4144         return cs;
4145 }
4146
4147 static __always_inline u32*
4148 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4149 {
4150         *cs++ = MI_USER_INTERRUPT;
4151
4152         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4153         if (intel_engine_has_semaphores(request->engine))
4154                 cs = gen12_emit_preempt_busywait(request, cs);
4155
4156         request->tail = intel_ring_offset(request, cs);
4157         assert_ring_tail_valid(request->ring, request->tail);
4158
4159         return gen8_emit_wa_tail(request, cs);
4160 }
4161
4162 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4163 {
4164         cs = gen8_emit_ggtt_write(cs,
4165                                   request->fence.seqno,
4166                                   i915_request_active_timeline(request)->hwsp_offset,
4167                                   0);
4168
4169         return gen12_emit_fini_breadcrumb_footer(request, cs);
4170 }
4171
4172 static u32 *
4173 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4174 {
4175         cs = gen8_emit_ggtt_write_rcs(cs,
4176                                       request->fence.seqno,
4177                                       i915_request_active_timeline(request)->hwsp_offset,
4178                                       PIPE_CONTROL_CS_STALL |
4179                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4180                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4181                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4182                                       /* Wa_1409600907:tgl */
4183                                       PIPE_CONTROL_DEPTH_STALL |
4184                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4185                                       PIPE_CONTROL_FLUSH_ENABLE |
4186                                       PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4187
4188         return gen12_emit_fini_breadcrumb_footer(request, cs);
4189 }
4190
4191 static void execlists_park(struct intel_engine_cs *engine)
4192 {
4193         cancel_timer(&engine->execlists.timer);
4194         cancel_timer(&engine->execlists.preempt);
4195 }
4196
4197 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4198 {
4199         engine->submit_request = execlists_submit_request;
4200         engine->schedule = i915_schedule;
4201         engine->execlists.tasklet.func = execlists_submission_tasklet;
4202
4203         engine->reset.prepare = execlists_reset_prepare;
4204         engine->reset.rewind = execlists_reset_rewind;
4205         engine->reset.cancel = execlists_reset_cancel;
4206         engine->reset.finish = execlists_reset_finish;
4207
4208         engine->park = execlists_park;
4209         engine->unpark = NULL;
4210
4211         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4212         if (!intel_vgpu_active(engine->i915)) {
4213                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4214                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4215                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4216         }
4217
4218         if (INTEL_GEN(engine->i915) >= 12)
4219                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4220
4221         if (intel_engine_has_preemption(engine))
4222                 engine->emit_bb_start = gen8_emit_bb_start;
4223         else
4224                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4225 }
4226
4227 static void execlists_shutdown(struct intel_engine_cs *engine)
4228 {
4229         /* Synchronise with residual timers and any softirq they raise */
4230         del_timer_sync(&engine->execlists.timer);
4231         del_timer_sync(&engine->execlists.preempt);
4232         tasklet_kill(&engine->execlists.tasklet);
4233 }
4234
4235 static void execlists_release(struct intel_engine_cs *engine)
4236 {
4237         execlists_shutdown(engine);
4238
4239         intel_engine_cleanup_common(engine);
4240         lrc_destroy_wa_ctx(engine);
4241 }
4242
4243 static void
4244 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4245 {
4246         /* Default vfuncs which can be overriden by each engine. */
4247
4248         engine->resume = execlists_resume;
4249
4250         engine->cops = &execlists_context_ops;
4251         engine->request_alloc = execlists_request_alloc;
4252
4253         engine->emit_flush = gen8_emit_flush;
4254         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4255         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4256         if (INTEL_GEN(engine->i915) >= 12)
4257                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4258
4259         engine->set_default_submission = intel_execlists_set_default_submission;
4260
4261         if (INTEL_GEN(engine->i915) < 11) {
4262                 engine->irq_enable = gen8_logical_ring_enable_irq;
4263                 engine->irq_disable = gen8_logical_ring_disable_irq;
4264         } else {
4265                 /*
4266                  * TODO: On Gen11 interrupt masks need to be clear
4267                  * to allow C6 entry. Keep interrupts enabled at
4268                  * and take the hit of generating extra interrupts
4269                  * until a more refined solution exists.
4270                  */
4271         }
4272 }
4273
4274 static inline void
4275 logical_ring_default_irqs(struct intel_engine_cs *engine)
4276 {
4277         unsigned int shift = 0;
4278
4279         if (INTEL_GEN(engine->i915) < 11) {
4280                 const u8 irq_shifts[] = {
4281                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
4282                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
4283                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4284                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4285                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
4286                 };
4287
4288                 shift = irq_shifts[engine->id];
4289         }
4290
4291         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4292         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4293 }
4294
4295 static void rcs_submission_override(struct intel_engine_cs *engine)
4296 {
4297         switch (INTEL_GEN(engine->i915)) {
4298         case 12:
4299                 engine->emit_flush = gen12_emit_flush_render;
4300                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4301                 break;
4302         case 11:
4303                 engine->emit_flush = gen11_emit_flush_render;
4304                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4305                 break;
4306         default:
4307                 engine->emit_flush = gen8_emit_flush_render;
4308                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4309                 break;
4310         }
4311 }
4312
4313 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4314 {
4315         struct intel_engine_execlists * const execlists = &engine->execlists;
4316         struct drm_i915_private *i915 = engine->i915;
4317         struct intel_uncore *uncore = engine->uncore;
4318         u32 base = engine->mmio_base;
4319
4320         tasklet_init(&engine->execlists.tasklet,
4321                      execlists_submission_tasklet, (unsigned long)engine);
4322         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4323         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4324
4325         logical_ring_default_vfuncs(engine);
4326         logical_ring_default_irqs(engine);
4327
4328         if (engine->class == RENDER_CLASS)
4329                 rcs_submission_override(engine);
4330
4331         if (intel_init_workaround_bb(engine))
4332                 /*
4333                  * We continue even if we fail to initialize WA batch
4334                  * because we only expect rare glitches but nothing
4335                  * critical to prevent us from using GPU
4336                  */
4337                 DRM_ERROR("WA batch buffer initialization failed\n");
4338
4339         if (HAS_LOGICAL_RING_ELSQ(i915)) {
4340                 execlists->submit_reg = uncore->regs +
4341                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4342                 execlists->ctrl_reg = uncore->regs +
4343                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4344         } else {
4345                 execlists->submit_reg = uncore->regs +
4346                         i915_mmio_reg_offset(RING_ELSP(base));
4347         }
4348
4349         execlists->csb_status =
4350                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4351
4352         execlists->csb_write =
4353                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
4354
4355         if (INTEL_GEN(i915) < 11)
4356                 execlists->csb_size = GEN8_CSB_ENTRIES;
4357         else
4358                 execlists->csb_size = GEN11_CSB_ENTRIES;
4359
4360         reset_csb_pointers(engine);
4361
4362         /* Finally, take ownership and responsibility for cleanup! */
4363         engine->release = execlists_release;
4364
4365         return 0;
4366 }
4367
4368 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4369 {
4370         u32 indirect_ctx_offset;
4371
4372         switch (INTEL_GEN(engine->i915)) {
4373         default:
4374                 MISSING_CASE(INTEL_GEN(engine->i915));
4375                 /* fall through */
4376         case 12:
4377                 indirect_ctx_offset =
4378                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4379                 break;
4380         case 11:
4381                 indirect_ctx_offset =
4382                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4383                 break;
4384         case 10:
4385                 indirect_ctx_offset =
4386                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4387                 break;
4388         case 9:
4389                 indirect_ctx_offset =
4390                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4391                 break;
4392         case 8:
4393                 indirect_ctx_offset =
4394                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4395                 break;
4396         }
4397
4398         return indirect_ctx_offset;
4399 }
4400
4401
4402 static void init_common_reg_state(u32 * const regs,
4403                                   const struct intel_engine_cs *engine,
4404                                   const struct intel_ring *ring,
4405                                   bool inhibit)
4406 {
4407         u32 ctl;
4408
4409         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4410         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4411         if (inhibit)
4412                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4413         if (INTEL_GEN(engine->i915) < 11)
4414                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4415                                            CTX_CTRL_RS_CTX_ENABLE);
4416         regs[CTX_CONTEXT_CONTROL] = ctl;
4417
4418         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4419 }
4420
4421 static void init_wa_bb_reg_state(u32 * const regs,
4422                                  const struct intel_engine_cs *engine,
4423                                  u32 pos_bb_per_ctx)
4424 {
4425         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4426
4427         if (wa_ctx->per_ctx.size) {
4428                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4429
4430                 regs[pos_bb_per_ctx] =
4431                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4432         }
4433
4434         if (wa_ctx->indirect_ctx.size) {
4435                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4436
4437                 regs[pos_bb_per_ctx + 2] =
4438                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
4439                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4440
4441                 regs[pos_bb_per_ctx + 4] =
4442                         intel_lr_indirect_ctx_offset(engine) << 6;
4443         }
4444 }
4445
4446 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4447 {
4448         if (i915_vm_is_4lvl(&ppgtt->vm)) {
4449                 /* 64b PPGTT (48bit canonical)
4450                  * PDP0_DESCRIPTOR contains the base address to PML4 and
4451                  * other PDP Descriptors are ignored.
4452                  */
4453                 ASSIGN_CTX_PML4(ppgtt, regs);
4454         } else {
4455                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
4456                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
4457                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
4458                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
4459         }
4460 }
4461
4462 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4463 {
4464         if (i915_is_ggtt(vm))
4465                 return i915_vm_to_ggtt(vm)->alias;
4466         else
4467                 return i915_vm_to_ppgtt(vm);
4468 }
4469
4470 static void execlists_init_reg_state(u32 *regs,
4471                                      const struct intel_context *ce,
4472                                      const struct intel_engine_cs *engine,
4473                                      const struct intel_ring *ring,
4474                                      bool inhibit)
4475 {
4476         /*
4477          * A context is actually a big batch buffer with several
4478          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4479          * values we are setting here are only for the first context restore:
4480          * on a subsequent save, the GPU will recreate this batchbuffer with new
4481          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4482          * we are not initializing here).
4483          *
4484          * Must keep consistent with virtual_update_register_offsets().
4485          */
4486         set_offsets(regs, reg_offsets(engine), engine, inhibit);
4487
4488         init_common_reg_state(regs, engine, ring, inhibit);
4489         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4490
4491         init_wa_bb_reg_state(regs, engine,
4492                              INTEL_GEN(engine->i915) >= 12 ?
4493                              GEN12_CTX_BB_PER_CTX_PTR :
4494                              CTX_BB_PER_CTX_PTR);
4495
4496         __reset_stop_ring(regs, engine);
4497 }
4498
4499 static int
4500 populate_lr_context(struct intel_context *ce,
4501                     struct drm_i915_gem_object *ctx_obj,
4502                     struct intel_engine_cs *engine,
4503                     struct intel_ring *ring)
4504 {
4505         bool inhibit = true;
4506         void *vaddr;
4507         int ret;
4508
4509         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4510         if (IS_ERR(vaddr)) {
4511                 ret = PTR_ERR(vaddr);
4512                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4513                 return ret;
4514         }
4515
4516         set_redzone(vaddr, engine);
4517
4518         if (engine->default_state) {
4519                 void *defaults;
4520
4521                 defaults = i915_gem_object_pin_map(engine->default_state,
4522                                                    I915_MAP_WB);
4523                 if (IS_ERR(defaults)) {
4524                         ret = PTR_ERR(defaults);
4525                         goto err_unpin_ctx;
4526                 }
4527
4528                 memcpy(vaddr, defaults, engine->context_size);
4529                 i915_gem_object_unpin_map(engine->default_state);
4530                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
4531                 inhibit = false;
4532         }
4533
4534         /* The second page of the context object contains some fields which must
4535          * be set up prior to the first execution. */
4536         execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4537                                  ce, engine, ring, inhibit);
4538
4539         ret = 0;
4540 err_unpin_ctx:
4541         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4542         i915_gem_object_unpin_map(ctx_obj);
4543         return ret;
4544 }
4545
4546 static int __execlists_context_alloc(struct intel_context *ce,
4547                                      struct intel_engine_cs *engine)
4548 {
4549         struct drm_i915_gem_object *ctx_obj;
4550         struct intel_ring *ring;
4551         struct i915_vma *vma;
4552         u32 context_size;
4553         int ret;
4554
4555         GEM_BUG_ON(ce->state);
4556         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4557
4558         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4559                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4560
4561         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4562         if (IS_ERR(ctx_obj))
4563                 return PTR_ERR(ctx_obj);
4564
4565         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4566         if (IS_ERR(vma)) {
4567                 ret = PTR_ERR(vma);
4568                 goto error_deref_obj;
4569         }
4570
4571         if (!ce->timeline) {
4572                 struct intel_timeline *tl;
4573
4574                 tl = intel_timeline_create(engine->gt, NULL);
4575                 if (IS_ERR(tl)) {
4576                         ret = PTR_ERR(tl);
4577                         goto error_deref_obj;
4578                 }
4579
4580                 ce->timeline = tl;
4581         }
4582
4583         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4584         if (IS_ERR(ring)) {
4585                 ret = PTR_ERR(ring);
4586                 goto error_deref_obj;
4587         }
4588
4589         ret = populate_lr_context(ce, ctx_obj, engine, ring);
4590         if (ret) {
4591                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4592                 goto error_ring_free;
4593         }
4594
4595         ce->ring = ring;
4596         ce->state = vma;
4597
4598         return 0;
4599
4600 error_ring_free:
4601         intel_ring_put(ring);
4602 error_deref_obj:
4603         i915_gem_object_put(ctx_obj);
4604         return ret;
4605 }
4606
4607 static struct list_head *virtual_queue(struct virtual_engine *ve)
4608 {
4609         return &ve->base.execlists.default_priolist.requests[0];
4610 }
4611
4612 static void virtual_context_destroy(struct kref *kref)
4613 {
4614         struct virtual_engine *ve =
4615                 container_of(kref, typeof(*ve), context.ref);
4616         unsigned int n;
4617
4618         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4619         GEM_BUG_ON(ve->request);
4620         GEM_BUG_ON(ve->context.inflight);
4621
4622         for (n = 0; n < ve->num_siblings; n++) {
4623                 struct intel_engine_cs *sibling = ve->siblings[n];
4624                 struct rb_node *node = &ve->nodes[sibling->id].rb;
4625                 unsigned long flags;
4626
4627                 if (RB_EMPTY_NODE(node))
4628                         continue;
4629
4630                 spin_lock_irqsave(&sibling->active.lock, flags);
4631
4632                 /* Detachment is lazily performed in the execlists tasklet */
4633                 if (!RB_EMPTY_NODE(node))
4634                         rb_erase_cached(node, &sibling->execlists.virtual);
4635
4636                 spin_unlock_irqrestore(&sibling->active.lock, flags);
4637         }
4638         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4639
4640         if (ve->context.state)
4641                 __execlists_context_fini(&ve->context);
4642         intel_context_fini(&ve->context);
4643
4644         kfree(ve->bonds);
4645         kfree(ve);
4646 }
4647
4648 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4649 {
4650         int swp;
4651
4652         /*
4653          * Pick a random sibling on starting to help spread the load around.
4654          *
4655          * New contexts are typically created with exactly the same order
4656          * of siblings, and often started in batches. Due to the way we iterate
4657          * the array of sibling when submitting requests, sibling[0] is
4658          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4659          * randomised across the system, we also help spread the load by the
4660          * first engine we inspect being different each time.
4661          *
4662          * NB This does not force us to execute on this engine, it will just
4663          * typically be the first we inspect for submission.
4664          */
4665         swp = prandom_u32_max(ve->num_siblings);
4666         if (!swp)
4667                 return;
4668
4669         swap(ve->siblings[swp], ve->siblings[0]);
4670         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4671                 virtual_update_register_offsets(ve->context.lrc_reg_state,
4672                                                 ve->siblings[0]);
4673 }
4674
4675 static int virtual_context_alloc(struct intel_context *ce)
4676 {
4677         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4678
4679         return __execlists_context_alloc(ce, ve->siblings[0]);
4680 }
4681
4682 static int virtual_context_pin(struct intel_context *ce)
4683 {
4684         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4685         int err;
4686
4687         /* Note: we must use a real engine class for setting up reg state */
4688         err = __execlists_context_pin(ce, ve->siblings[0]);
4689         if (err)
4690                 return err;
4691
4692         virtual_engine_initial_hint(ve);
4693         return 0;
4694 }
4695
4696 static void virtual_context_enter(struct intel_context *ce)
4697 {
4698         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4699         unsigned int n;
4700
4701         for (n = 0; n < ve->num_siblings; n++)
4702                 intel_engine_pm_get(ve->siblings[n]);
4703
4704         intel_timeline_enter(ce->timeline);
4705 }
4706
4707 static void virtual_context_exit(struct intel_context *ce)
4708 {
4709         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4710         unsigned int n;
4711
4712         intel_timeline_exit(ce->timeline);
4713
4714         for (n = 0; n < ve->num_siblings; n++)
4715                 intel_engine_pm_put(ve->siblings[n]);
4716 }
4717
4718 static const struct intel_context_ops virtual_context_ops = {
4719         .alloc = virtual_context_alloc,
4720
4721         .pin = virtual_context_pin,
4722         .unpin = execlists_context_unpin,
4723
4724         .enter = virtual_context_enter,
4725         .exit = virtual_context_exit,
4726
4727         .destroy = virtual_context_destroy,
4728 };
4729
4730 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4731 {
4732         struct i915_request *rq;
4733         intel_engine_mask_t mask;
4734
4735         rq = READ_ONCE(ve->request);
4736         if (!rq)
4737                 return 0;
4738
4739         /* The rq is ready for submission; rq->execution_mask is now stable. */
4740         mask = rq->execution_mask;
4741         if (unlikely(!mask)) {
4742                 /* Invalid selection, submit to a random engine in error */
4743                 i915_request_skip(rq, -ENODEV);
4744                 mask = ve->siblings[0]->mask;
4745         }
4746
4747         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4748                      rq->fence.context, rq->fence.seqno,
4749                      mask, ve->base.execlists.queue_priority_hint);
4750
4751         return mask;
4752 }
4753
4754 static void virtual_submission_tasklet(unsigned long data)
4755 {
4756         struct virtual_engine * const ve = (struct virtual_engine *)data;
4757         const int prio = ve->base.execlists.queue_priority_hint;
4758         intel_engine_mask_t mask;
4759         unsigned int n;
4760
4761         rcu_read_lock();
4762         mask = virtual_submission_mask(ve);
4763         rcu_read_unlock();
4764         if (unlikely(!mask))
4765                 return;
4766
4767         local_irq_disable();
4768         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4769                 struct intel_engine_cs *sibling = ve->siblings[n];
4770                 struct ve_node * const node = &ve->nodes[sibling->id];
4771                 struct rb_node **parent, *rb;
4772                 bool first;
4773
4774                 if (unlikely(!(mask & sibling->mask))) {
4775                         if (!RB_EMPTY_NODE(&node->rb)) {
4776                                 spin_lock(&sibling->active.lock);
4777                                 rb_erase_cached(&node->rb,
4778                                                 &sibling->execlists.virtual);
4779                                 RB_CLEAR_NODE(&node->rb);
4780                                 spin_unlock(&sibling->active.lock);
4781                         }
4782                         continue;
4783                 }
4784
4785                 spin_lock(&sibling->active.lock);
4786
4787                 if (!RB_EMPTY_NODE(&node->rb)) {
4788                         /*
4789                          * Cheat and avoid rebalancing the tree if we can
4790                          * reuse this node in situ.
4791                          */
4792                         first = rb_first_cached(&sibling->execlists.virtual) ==
4793                                 &node->rb;
4794                         if (prio == node->prio || (prio > node->prio && first))
4795                                 goto submit_engine;
4796
4797                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4798                 }
4799
4800                 rb = NULL;
4801                 first = true;
4802                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4803                 while (*parent) {
4804                         struct ve_node *other;
4805
4806                         rb = *parent;
4807                         other = rb_entry(rb, typeof(*other), rb);
4808                         if (prio > other->prio) {
4809                                 parent = &rb->rb_left;
4810                         } else {
4811                                 parent = &rb->rb_right;
4812                                 first = false;
4813                         }
4814                 }
4815
4816                 rb_link_node(&node->rb, rb, parent);
4817                 rb_insert_color_cached(&node->rb,
4818                                        &sibling->execlists.virtual,
4819                                        first);
4820
4821 submit_engine:
4822                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4823                 node->prio = prio;
4824                 if (first && prio > sibling->execlists.queue_priority_hint) {
4825                         sibling->execlists.queue_priority_hint = prio;
4826                         tasklet_hi_schedule(&sibling->execlists.tasklet);
4827                 }
4828
4829                 spin_unlock(&sibling->active.lock);
4830         }
4831         local_irq_enable();
4832 }
4833
4834 static void virtual_submit_request(struct i915_request *rq)
4835 {
4836         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4837         struct i915_request *old;
4838         unsigned long flags;
4839
4840         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
4841                      rq->fence.context,
4842                      rq->fence.seqno);
4843
4844         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4845
4846         spin_lock_irqsave(&ve->base.active.lock, flags);
4847
4848         old = ve->request;
4849         if (old) { /* background completion event from preempt-to-busy */
4850                 GEM_BUG_ON(!i915_request_completed(old));
4851                 __i915_request_submit(old);
4852                 i915_request_put(old);
4853         }
4854
4855         if (i915_request_completed(rq)) {
4856                 __i915_request_submit(rq);
4857
4858                 ve->base.execlists.queue_priority_hint = INT_MIN;
4859                 ve->request = NULL;
4860         } else {
4861                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
4862                 ve->request = i915_request_get(rq);
4863
4864                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4865                 list_move_tail(&rq->sched.link, virtual_queue(ve));
4866
4867                 tasklet_schedule(&ve->base.execlists.tasklet);
4868         }
4869
4870         spin_unlock_irqrestore(&ve->base.active.lock, flags);
4871 }
4872
4873 static struct ve_bond *
4874 virtual_find_bond(struct virtual_engine *ve,
4875                   const struct intel_engine_cs *master)
4876 {
4877         int i;
4878
4879         for (i = 0; i < ve->num_bonds; i++) {
4880                 if (ve->bonds[i].master == master)
4881                         return &ve->bonds[i];
4882         }
4883
4884         return NULL;
4885 }
4886
4887 static void
4888 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
4889 {
4890         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4891         intel_engine_mask_t allowed, exec;
4892         struct ve_bond *bond;
4893
4894         allowed = ~to_request(signal)->engine->mask;
4895
4896         bond = virtual_find_bond(ve, to_request(signal)->engine);
4897         if (bond)
4898                 allowed &= bond->sibling_mask;
4899
4900         /* Restrict the bonded request to run on only the available engines */
4901         exec = READ_ONCE(rq->execution_mask);
4902         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
4903                 ;
4904
4905         /* Prevent the master from being re-run on the bonded engines */
4906         to_request(signal)->execution_mask &= ~allowed;
4907 }
4908
4909 struct intel_context *
4910 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
4911                                unsigned int count)
4912 {
4913         struct virtual_engine *ve;
4914         unsigned int n;
4915         int err;
4916
4917         if (count == 0)
4918                 return ERR_PTR(-EINVAL);
4919
4920         if (count == 1)
4921                 return intel_context_create(siblings[0]);
4922
4923         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
4924         if (!ve)
4925                 return ERR_PTR(-ENOMEM);
4926
4927         ve->base.i915 = siblings[0]->i915;
4928         ve->base.gt = siblings[0]->gt;
4929         ve->base.uncore = siblings[0]->uncore;
4930         ve->base.id = -1;
4931
4932         ve->base.class = OTHER_CLASS;
4933         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4934         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4935         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4936
4937         /*
4938          * The decision on whether to submit a request using semaphores
4939          * depends on the saturated state of the engine. We only compute
4940          * this during HW submission of the request, and we need for this
4941          * state to be globally applied to all requests being submitted
4942          * to this engine. Virtual engines encompass more than one physical
4943          * engine and so we cannot accurately tell in advance if one of those
4944          * engines is already saturated and so cannot afford to use a semaphore
4945          * and be pessimized in priority for doing so -- if we are the only
4946          * context using semaphores after all other clients have stopped, we
4947          * will be starved on the saturated system. Such a global switch for
4948          * semaphores is less than ideal, but alas is the current compromise.
4949          */
4950         ve->base.saturated = ALL_ENGINES;
4951
4952         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4953
4954         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
4955         intel_engine_init_breadcrumbs(&ve->base);
4956         intel_engine_init_execlists(&ve->base);
4957
4958         ve->base.cops = &virtual_context_ops;
4959         ve->base.request_alloc = execlists_request_alloc;
4960
4961         ve->base.schedule = i915_schedule;
4962         ve->base.submit_request = virtual_submit_request;
4963         ve->base.bond_execute = virtual_bond_execute;
4964
4965         INIT_LIST_HEAD(virtual_queue(ve));
4966         ve->base.execlists.queue_priority_hint = INT_MIN;
4967         tasklet_init(&ve->base.execlists.tasklet,
4968                      virtual_submission_tasklet,
4969                      (unsigned long)ve);
4970
4971         intel_context_init(&ve->context, &ve->base);
4972
4973         for (n = 0; n < count; n++) {
4974                 struct intel_engine_cs *sibling = siblings[n];
4975
4976                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4977                 if (sibling->mask & ve->base.mask) {
4978                         DRM_DEBUG("duplicate %s entry in load balancer\n",
4979                                   sibling->name);
4980                         err = -EINVAL;
4981                         goto err_put;
4982                 }
4983
4984                 /*
4985                  * The virtual engine implementation is tightly coupled to
4986                  * the execlists backend -- we push out request directly
4987                  * into a tree inside each physical engine. We could support
4988                  * layering if we handle cloning of the requests and
4989                  * submitting a copy into each backend.
4990                  */
4991                 if (sibling->execlists.tasklet.func !=
4992                     execlists_submission_tasklet) {
4993                         err = -ENODEV;
4994                         goto err_put;
4995                 }
4996
4997                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
4998                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
4999
5000                 ve->siblings[ve->num_siblings++] = sibling;
5001                 ve->base.mask |= sibling->mask;
5002
5003                 /*
5004                  * All physical engines must be compatible for their emission
5005                  * functions (as we build the instructions during request
5006                  * construction and do not alter them before submission
5007                  * on the physical engine). We use the engine class as a guide
5008                  * here, although that could be refined.
5009                  */
5010                 if (ve->base.class != OTHER_CLASS) {
5011                         if (ve->base.class != sibling->class) {
5012                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5013                                           sibling->class, ve->base.class);
5014                                 err = -EINVAL;
5015                                 goto err_put;
5016                         }
5017                         continue;
5018                 }
5019
5020                 ve->base.class = sibling->class;
5021                 ve->base.uabi_class = sibling->uabi_class;
5022                 snprintf(ve->base.name, sizeof(ve->base.name),
5023                          "v%dx%d", ve->base.class, count);
5024                 ve->base.context_size = sibling->context_size;
5025
5026                 ve->base.emit_bb_start = sibling->emit_bb_start;
5027                 ve->base.emit_flush = sibling->emit_flush;
5028                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5029                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5030                 ve->base.emit_fini_breadcrumb_dw =
5031                         sibling->emit_fini_breadcrumb_dw;
5032
5033                 ve->base.flags = sibling->flags;
5034         }
5035
5036         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5037
5038         return &ve->context;
5039
5040 err_put:
5041         intel_context_put(&ve->context);
5042         return ERR_PTR(err);
5043 }
5044
5045 struct intel_context *
5046 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5047 {
5048         struct virtual_engine *se = to_virtual_engine(src);
5049         struct intel_context *dst;
5050
5051         dst = intel_execlists_create_virtual(se->siblings,
5052                                              se->num_siblings);
5053         if (IS_ERR(dst))
5054                 return dst;
5055
5056         if (se->num_bonds) {
5057                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5058
5059                 de->bonds = kmemdup(se->bonds,
5060                                     sizeof(*se->bonds) * se->num_bonds,
5061                                     GFP_KERNEL);
5062                 if (!de->bonds) {
5063                         intel_context_put(dst);
5064                         return ERR_PTR(-ENOMEM);
5065                 }
5066
5067                 de->num_bonds = se->num_bonds;
5068         }
5069
5070         return dst;
5071 }
5072
5073 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5074                                      const struct intel_engine_cs *master,
5075                                      const struct intel_engine_cs *sibling)
5076 {
5077         struct virtual_engine *ve = to_virtual_engine(engine);
5078         struct ve_bond *bond;
5079         int n;
5080
5081         /* Sanity check the sibling is part of the virtual engine */
5082         for (n = 0; n < ve->num_siblings; n++)
5083                 if (sibling == ve->siblings[n])
5084                         break;
5085         if (n == ve->num_siblings)
5086                 return -EINVAL;
5087
5088         bond = virtual_find_bond(ve, master);
5089         if (bond) {
5090                 bond->sibling_mask |= sibling->mask;
5091                 return 0;
5092         }
5093
5094         bond = krealloc(ve->bonds,
5095                         sizeof(*bond) * (ve->num_bonds + 1),
5096                         GFP_KERNEL);
5097         if (!bond)
5098                 return -ENOMEM;
5099
5100         bond[ve->num_bonds].master = master;
5101         bond[ve->num_bonds].sibling_mask = sibling->mask;
5102
5103         ve->bonds = bond;
5104         ve->num_bonds++;
5105
5106         return 0;
5107 }
5108
5109 struct intel_engine_cs *
5110 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5111                                  unsigned int sibling)
5112 {
5113         struct virtual_engine *ve = to_virtual_engine(engine);
5114
5115         if (sibling >= ve->num_siblings)
5116                 return NULL;
5117
5118         return ve->siblings[sibling];
5119 }
5120
5121 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5122                                    struct drm_printer *m,
5123                                    void (*show_request)(struct drm_printer *m,
5124                                                         struct i915_request *rq,
5125                                                         const char *prefix),
5126                                    unsigned int max)
5127 {
5128         const struct intel_engine_execlists *execlists = &engine->execlists;
5129         struct i915_request *rq, *last;
5130         unsigned long flags;
5131         unsigned int count;
5132         struct rb_node *rb;
5133
5134         spin_lock_irqsave(&engine->active.lock, flags);
5135
5136         last = NULL;
5137         count = 0;
5138         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5139                 if (count++ < max - 1)
5140                         show_request(m, rq, "\t\tE ");
5141                 else
5142                         last = rq;
5143         }
5144         if (last) {
5145                 if (count > max) {
5146                         drm_printf(m,
5147                                    "\t\t...skipping %d executing requests...\n",
5148                                    count - max);
5149                 }
5150                 show_request(m, last, "\t\tE ");
5151         }
5152
5153         last = NULL;
5154         count = 0;
5155         if (execlists->queue_priority_hint != INT_MIN)
5156                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5157                            execlists->queue_priority_hint);
5158         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5159                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5160                 int i;
5161
5162                 priolist_for_each_request(rq, p, i) {
5163                         if (count++ < max - 1)
5164                                 show_request(m, rq, "\t\tQ ");
5165                         else
5166                                 last = rq;
5167                 }
5168         }
5169         if (last) {
5170                 if (count > max) {
5171                         drm_printf(m,
5172                                    "\t\t...skipping %d queued requests...\n",
5173                                    count - max);
5174                 }
5175                 show_request(m, last, "\t\tQ ");
5176         }
5177
5178         last = NULL;
5179         count = 0;
5180         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5181                 struct virtual_engine *ve =
5182                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5183                 struct i915_request *rq = READ_ONCE(ve->request);
5184
5185                 if (rq) {
5186                         if (count++ < max - 1)
5187                                 show_request(m, rq, "\t\tV ");
5188                         else
5189                                 last = rq;
5190                 }
5191         }
5192         if (last) {
5193                 if (count > max) {
5194                         drm_printf(m,
5195                                    "\t\t...skipping %d virtual requests...\n",
5196                                    count - max);
5197                 }
5198                 show_request(m, last, "\t\tV ");
5199         }
5200
5201         spin_unlock_irqrestore(&engine->active.lock, flags);
5202 }
5203
5204 void intel_lr_context_reset(struct intel_engine_cs *engine,
5205                             struct intel_context *ce,
5206                             u32 head,
5207                             bool scrub)
5208 {
5209         GEM_BUG_ON(!intel_context_is_pinned(ce));
5210
5211         /*
5212          * We want a simple context + ring to execute the breadcrumb update.
5213          * We cannot rely on the context being intact across the GPU hang,
5214          * so clear it and rebuild just what we need for the breadcrumb.
5215          * All pending requests for this context will be zapped, and any
5216          * future request will be after userspace has had the opportunity
5217          * to recreate its own state.
5218          */
5219         if (scrub)
5220                 restore_default_state(ce, engine);
5221
5222         /* Rerun the request; its payload has been neutered (if guilty). */
5223         ce->ring->head = head;
5224         intel_ring_update_space(ce->ring);
5225
5226         __execlists_update_reg_state(ce, engine);
5227 }
5228
5229 bool
5230 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5231 {
5232         return engine->set_default_submission ==
5233                intel_execlists_set_default_submission;
5234 }
5235
5236 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5237 #include "selftest_lrc.c"
5238 #endif