drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "gem/i915_gem_context.h"
 137
 138 #include "i915_drv.h"
 139 #include "i915_perf.h"
 140 #include "i915_trace.h"
 141 #include "i915_vgpu.h"
 142 #include "intel_engine_pm.h"
 143 #include "intel_gt.h"
 144 #include "intel_gt_pm.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_workarounds.h"
 149
 150 #define RING_EXECLIST_QFULL             (1 << 0x2)
 151 #define RING_EXECLIST1_VALID            (1 << 0x3)
 152 #define RING_EXECLIST0_VALID            (1 << 0x4)
 153 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 154 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 155 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 156
 157 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 158 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 159 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 160 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 161 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 162 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 163
 164 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 165          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 166
 167 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 168
 169 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 170 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 171 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 172 #define GEN12_IDLE_CTX_ID               0x7FF
 173 #define GEN12_CSB_CTX_VALID(csb_dw) \
 174         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 175
 176 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 177 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 178 #define WA_TAIL_DWORDS 2
 179 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 180
 181 struct virtual_engine {
 182         struct intel_engine_cs base;
 183         struct intel_context context;
 184
 185         /*
 186          * We allow only a single request through the virtual engine at a time
 187          * (each request in the timeline waits for the completion fence of
 188          * the previous before being submitted). By restricting ourselves to
 189          * only submitting a single request, each request is placed on to a
 190          * physical to maximise load spreading (by virtue of the late greedy
 191          * scheduling -- each real engine takes the next available request
 192          * upon idling).
 193          */
 194         struct i915_request *request;
 195
 196         /*
 197          * We keep a rbtree of available virtual engines inside each physical
 198          * engine, sorted by priority. Here we preallocate the nodes we need
 199          * for the virtual engine, indexed by physical_engine->id.
 200          */
 201         struct ve_node {
 202                 struct rb_node rb;
 203                 int prio;
 204         } nodes[I915_NUM_ENGINES];
 205
 206         /*
 207          * Keep track of bonded pairs -- restrictions upon on our selection
 208          * of physical engines any particular request may be submitted to.
 209          * If we receive a submit-fence from a master engine, we will only
 210          * use one of sibling_mask physical engines.
 211          */
 212         struct ve_bond {
 213                 const struct intel_engine_cs *master;
 214                 intel_engine_mask_t sibling_mask;
 215         } *bonds;
 216         unsigned int num_bonds;
 217
 218         /* And finally, which physical engines this virtual engine maps onto. */
 219         unsigned int num_siblings;
 220         struct intel_engine_cs *siblings[0];
 221 };
 222
 223 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 224 {
 225         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 226         return container_of(engine, struct virtual_engine, base);
 227 }
 228
 229 static int __execlists_context_alloc(struct intel_context *ce,
 230                                      struct intel_engine_cs *engine);
 231
 232 static void execlists_init_reg_state(u32 *reg_state,
 233                                      const struct intel_context *ce,
 234                                      const struct intel_engine_cs *engine,
 235                                      const struct intel_ring *ring,
 236                                      bool close);
 237
 238 static void __context_pin_acquire(struct intel_context *ce)
 239 {
 240         mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
 241 }
 242
 243 static void __context_pin_release(struct intel_context *ce)
 244 {
 245         mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
 246 }
 247
 248 static void mark_eio(struct i915_request *rq)
 249 {
 250         if (!i915_request_signaled(rq))
 251                 dma_fence_set_error(&rq->fence, -EIO);
 252         i915_request_mark_complete(rq);
 253 }
 254
 255 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 256 {
 257         return (i915_ggtt_offset(engine->status_page.vma) +
 258                 I915_GEM_HWS_PREEMPT_ADDR);
 259 }
 260
 261 static inline void
 262 ring_set_paused(const struct intel_engine_cs *engine, int state)
 263 {
 264         /*
 265          * We inspect HWS_PREEMPT with a semaphore inside
 266          * engine->emit_fini_breadcrumb. If the dword is true,
 267          * the ring is paused as the semaphore will busywait
 268          * until the dword is false.
 269          */
 270         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 271         if (state)
 272                 wmb();
 273 }
 274
 275 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 276 {
 277         return rb_entry(rb, struct i915_priolist, node);
 278 }
 279
 280 static inline int rq_prio(const struct i915_request *rq)
 281 {
 282         return rq->sched.attr.priority;
 283 }
 284
 285 static int effective_prio(const struct i915_request *rq)
 286 {
 287         int prio = rq_prio(rq);
 288
 289         /*
 290          * If this request is special and must not be interrupted at any
 291          * cost, so be it. Note we are only checking the most recent request
 292          * in the context and so may be masking an earlier vip request. It
 293          * is hoped that under the conditions where nopreempt is used, this
 294          * will not matter (i.e. all requests to that context will be
 295          * nopreempt for as long as desired).
 296          */
 297         if (i915_request_has_nopreempt(rq))
 298                 prio = I915_PRIORITY_UNPREEMPTABLE;
 299
 300         /*
 301          * On unwinding the active request, we give it a priority bump
 302          * if it has completed waiting on any semaphore. If we know that
 303          * the request has already started, we can prevent an unwanted
 304          * preempt-to-idle cycle by taking that into account now.
 305          */
 306         if (__i915_request_has_started(rq))
 307                 prio |= I915_PRIORITY_NOSEMAPHORE;
 308
 309         /* Restrict mere WAIT boosts from triggering preemption */
 310         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 311         return prio | __NO_PREEMPTION;
 312 }
 313
 314 static int queue_prio(const struct intel_engine_execlists *execlists)
 315 {
 316         struct i915_priolist *p;
 317         struct rb_node *rb;
 318
 319         rb = rb_first_cached(&execlists->queue);
 320         if (!rb)
 321                 return INT_MIN;
 322
 323         /*
 324          * As the priolist[] are inverted, with the highest priority in [0],
 325          * we have to flip the index value to become priority.
 326          */
 327         p = to_priolist(rb);
 328         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 329 }
 330
 331 static inline bool need_preempt(const struct intel_engine_cs *engine,
 332                                 const struct i915_request *rq,
 333                                 struct rb_node *rb)
 334 {
 335         int last_prio;
 336
 337         if (!intel_engine_has_semaphores(engine))
 338                 return false;
 339
 340         /*
 341          * Check if the current priority hint merits a preemption attempt.
 342          *
 343          * We record the highest value priority we saw during rescheduling
 344          * prior to this dequeue, therefore we know that if it is strictly
 345          * less than the current tail of ESLP[0], we do not need to force
 346          * a preempt-to-idle cycle.
 347          *
 348          * However, the priority hint is a mere hint that we may need to
 349          * preempt. If that hint is stale or we may be trying to preempt
 350          * ourselves, ignore the request.
 351          */
 352         last_prio = effective_prio(rq);
 353         if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
 354                                          last_prio))
 355                 return false;
 356
 357         /*
 358          * Check against the first request in ELSP[1], it will, thanks to the
 359          * power of PI, be the highest priority of that context.
 360          */
 361         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 362             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 363                 return true;
 364
 365         if (rb) {
 366                 struct virtual_engine *ve =
 367                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 368                 bool preempt = false;
 369
 370                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 371                         struct i915_request *next;
 372
 373                         rcu_read_lock();
 374                         next = READ_ONCE(ve->request);
 375                         if (next)
 376                                 preempt = rq_prio(next) > last_prio;
 377                         rcu_read_unlock();
 378                 }
 379
 380                 if (preempt)
 381                         return preempt;
 382         }
 383
 384         /*
 385          * If the inflight context did not trigger the preemption, then maybe
 386          * it was the set of queued requests? Pick the highest priority in
 387          * the queue (the first active priolist) and see if it deserves to be
 388          * running instead of ELSP[0].
 389          *
 390          * The highest priority request in the queue can not be either
 391          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 392          * context, it's priority would not exceed ELSP[0] aka last_prio.
 393          */
 394         return queue_prio(&engine->execlists) > last_prio;
 395 }
 396
 397 __maybe_unused static inline bool
 398 assert_priority_queue(const struct i915_request *prev,
 399                       const struct i915_request *next)
 400 {
 401         /*
 402          * Without preemption, the prev may refer to the still active element
 403          * which we refuse to let go.
 404          *
 405          * Even with preemption, there are times when we think it is better not
 406          * to preempt and leave an ostensibly lower priority request in flight.
 407          */
 408         if (i915_request_is_active(prev))
 409                 return true;
 410
 411         return rq_prio(prev) >= rq_prio(next);
 412 }
 413
 414 /*
 415  * The context descriptor encodes various attributes of a context,
 416  * including its GTT address and some flags. Because it's fairly
 417  * expensive to calculate, we'll just do it once and cache the result,
 418  * which remains valid until the context is unpinned.
 419  *
 420  * This is what a descriptor looks like, from LSB to MSB::
 421  *
 422  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 423  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 424  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 425  *      bits 53-54:    mbz, reserved for use by hardware
 426  *      bits 55-63:    group ID, currently unused and set to 0
 427  *
 428  * Starting from Gen11, the upper dword of the descriptor has a new format:
 429  *
 430  *      bits 32-36:    reserved
 431  *      bits 37-47:    SW context ID
 432  *      bits 48:53:    engine instance
 433  *      bit 54:        mbz, reserved for use by hardware
 434  *      bits 55-60:    SW counter
 435  *      bits 61-63:    engine class
 436  *
 437  * engine info, SW context ID and SW counter need to form a unique number
 438  * (Context ID) per lrc.
 439  */
 440 static u64
 441 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 442 {
 443         struct i915_gem_context *ctx = ce->gem_context;
 444         u64 desc;
 445
 446         BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
 447         BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
 448
 449         desc = INTEL_LEGACY_32B_CONTEXT;
 450         if (i915_vm_is_4lvl(ce->vm))
 451                 desc = INTEL_LEGACY_64B_CONTEXT;
 452         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 453
 454         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 455         if (IS_GEN(engine->i915, 8))
 456                 desc |= GEN8_CTX_L3LLC_COHERENT;
 457
 458         desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
 459                                                                 /* bits 12-31 */
 460         /*
 461          * The following 32bits are copied into the OA reports (dword 2).
 462          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 463          * anything below.
 464          */
 465         if (INTEL_GEN(engine->i915) >= 11) {
 466                 GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
 467                 desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
 468                                                                 /* bits 37-47 */
 469
 470                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 471                                                                 /* bits 48-53 */
 472
 473                 /* TODO: decide what to do with SW counter (bits 55-60) */
 474
 475                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 476                                                                 /* bits 61-63 */
 477         } else {
 478                 GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
 479                 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;   /* bits 32-52 */
 480         }
 481
 482         return desc;
 483 }
 484
 485 static u32 *set_offsets(u32 *regs,
 486                         const u8 *data,
 487                         const struct intel_engine_cs *engine)
 488 #define NOP(x) (BIT(7) | (x))
 489 #define LRI(count, flags) ((flags) << 6 | (count))
 490 #define POSTED BIT(0)
 491 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 492 #define REG16(x) \
 493         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 494         (((x) >> 2) & 0x7f)
 495 #define END() 0
 496 {
 497         const u32 base = engine->mmio_base;
 498
 499         while (*data) {
 500                 u8 count, flags;
 501
 502                 if (*data & BIT(7)) { /* skip */
 503                         regs += *data++ & ~BIT(7);
 504                         continue;
 505                 }
 506
 507                 count = *data & 0x3f;
 508                 flags = *data >> 6;
 509                 data++;
 510
 511                 *regs = MI_LOAD_REGISTER_IMM(count);
 512                 if (flags & POSTED)
 513                         *regs |= MI_LRI_FORCE_POSTED;
 514                 if (INTEL_GEN(engine->i915) >= 11)
 515                         *regs |= MI_LRI_CS_MMIO;
 516                 regs++;
 517
 518                 GEM_BUG_ON(!count);
 519                 do {
 520                         u32 offset = 0;
 521                         u8 v;
 522
 523                         do {
 524                                 v = *data++;
 525                                 offset <<= 7;
 526                                 offset |= v & ~BIT(7);
 527                         } while (v & BIT(7));
 528
 529                         *regs = base + (offset << 2);
 530                         regs += 2;
 531                 } while (--count);
 532         }
 533
 534         return regs;
 535 }
 536
 537 static const u8 gen8_xcs_offsets[] = {
 538         NOP(1),
 539         LRI(11, 0),
 540         REG16(0x244),
 541         REG(0x034),
 542         REG(0x030),
 543         REG(0x038),
 544         REG(0x03c),
 545         REG(0x168),
 546         REG(0x140),
 547         REG(0x110),
 548         REG(0x11c),
 549         REG(0x114),
 550         REG(0x118),
 551
 552         NOP(9),
 553         LRI(9, 0),
 554         REG16(0x3a8),
 555         REG16(0x28c),
 556         REG16(0x288),
 557         REG16(0x284),
 558         REG16(0x280),
 559         REG16(0x27c),
 560         REG16(0x278),
 561         REG16(0x274),
 562         REG16(0x270),
 563
 564         NOP(13),
 565         LRI(2, 0),
 566         REG16(0x200),
 567         REG(0x028),
 568
 569         END(),
 570 };
 571
 572 static const u8 gen9_xcs_offsets[] = {
 573         NOP(1),
 574         LRI(14, POSTED),
 575         REG16(0x244),
 576         REG(0x034),
 577         REG(0x030),
 578         REG(0x038),
 579         REG(0x03c),
 580         REG(0x168),
 581         REG(0x140),
 582         REG(0x110),
 583         REG(0x11c),
 584         REG(0x114),
 585         REG(0x118),
 586         REG(0x1c0),
 587         REG(0x1c4),
 588         REG(0x1c8),
 589
 590         NOP(3),
 591         LRI(9, POSTED),
 592         REG16(0x3a8),
 593         REG16(0x28c),
 594         REG16(0x288),
 595         REG16(0x284),
 596         REG16(0x280),
 597         REG16(0x27c),
 598         REG16(0x278),
 599         REG16(0x274),
 600         REG16(0x270),
 601
 602         NOP(13),
 603         LRI(1, POSTED),
 604         REG16(0x200),
 605
 606         NOP(13),
 607         LRI(44, POSTED),
 608         REG(0x028),
 609         REG(0x09c),
 610         REG(0x0c0),
 611         REG(0x178),
 612         REG(0x17c),
 613         REG16(0x358),
 614         REG(0x170),
 615         REG(0x150),
 616         REG(0x154),
 617         REG(0x158),
 618         REG16(0x41c),
 619         REG16(0x600),
 620         REG16(0x604),
 621         REG16(0x608),
 622         REG16(0x60c),
 623         REG16(0x610),
 624         REG16(0x614),
 625         REG16(0x618),
 626         REG16(0x61c),
 627         REG16(0x620),
 628         REG16(0x624),
 629         REG16(0x628),
 630         REG16(0x62c),
 631         REG16(0x630),
 632         REG16(0x634),
 633         REG16(0x638),
 634         REG16(0x63c),
 635         REG16(0x640),
 636         REG16(0x644),
 637         REG16(0x648),
 638         REG16(0x64c),
 639         REG16(0x650),
 640         REG16(0x654),
 641         REG16(0x658),
 642         REG16(0x65c),
 643         REG16(0x660),
 644         REG16(0x664),
 645         REG16(0x668),
 646         REG16(0x66c),
 647         REG16(0x670),
 648         REG16(0x674),
 649         REG16(0x678),
 650         REG16(0x67c),
 651         REG(0x068),
 652
 653         END(),
 654 };
 655
 656 static const u8 gen12_xcs_offsets[] = {
 657         NOP(1),
 658         LRI(13, POSTED),
 659         REG16(0x244),
 660         REG(0x034),
 661         REG(0x030),
 662         REG(0x038),
 663         REG(0x03c),
 664         REG(0x168),
 665         REG(0x140),
 666         REG(0x110),
 667         REG(0x1c0),
 668         REG(0x1c4),
 669         REG(0x1c8),
 670         REG(0x180),
 671         REG16(0x2b4),
 672
 673         NOP(5),
 674         LRI(9, POSTED),
 675         REG16(0x3a8),
 676         REG16(0x28c),
 677         REG16(0x288),
 678         REG16(0x284),
 679         REG16(0x280),
 680         REG16(0x27c),
 681         REG16(0x278),
 682         REG16(0x274),
 683         REG16(0x270),
 684
 685         NOP(13),
 686         LRI(2, POSTED),
 687         REG16(0x200),
 688         REG16(0x204),
 689
 690         NOP(11),
 691         LRI(50, POSTED),
 692         REG16(0x588),
 693         REG16(0x588),
 694         REG16(0x588),
 695         REG16(0x588),
 696         REG16(0x588),
 697         REG16(0x588),
 698         REG(0x028),
 699         REG(0x09c),
 700         REG(0x0c0),
 701         REG(0x178),
 702         REG(0x17c),
 703         REG16(0x358),
 704         REG(0x170),
 705         REG(0x150),
 706         REG(0x154),
 707         REG(0x158),
 708         REG16(0x41c),
 709         REG16(0x600),
 710         REG16(0x604),
 711         REG16(0x608),
 712         REG16(0x60c),
 713         REG16(0x610),
 714         REG16(0x614),
 715         REG16(0x618),
 716         REG16(0x61c),
 717         REG16(0x620),
 718         REG16(0x624),
 719         REG16(0x628),
 720         REG16(0x62c),
 721         REG16(0x630),
 722         REG16(0x634),
 723         REG16(0x638),
 724         REG16(0x63c),
 725         REG16(0x640),
 726         REG16(0x644),
 727         REG16(0x648),
 728         REG16(0x64c),
 729         REG16(0x650),
 730         REG16(0x654),
 731         REG16(0x658),
 732         REG16(0x65c),
 733         REG16(0x660),
 734         REG16(0x664),
 735         REG16(0x668),
 736         REG16(0x66c),
 737         REG16(0x670),
 738         REG16(0x674),
 739         REG16(0x678),
 740         REG16(0x67c),
 741         REG(0x068),
 742
 743         END(),
 744 };
 745
 746 static const u8 gen8_rcs_offsets[] = {
 747         NOP(1),
 748         LRI(14, POSTED),
 749         REG16(0x244),
 750         REG(0x034),
 751         REG(0x030),
 752         REG(0x038),
 753         REG(0x03c),
 754         REG(0x168),
 755         REG(0x140),
 756         REG(0x110),
 757         REG(0x11c),
 758         REG(0x114),
 759         REG(0x118),
 760         REG(0x1c0),
 761         REG(0x1c4),
 762         REG(0x1c8),
 763
 764         NOP(3),
 765         LRI(9, POSTED),
 766         REG16(0x3a8),
 767         REG16(0x28c),
 768         REG16(0x288),
 769         REG16(0x284),
 770         REG16(0x280),
 771         REG16(0x27c),
 772         REG16(0x278),
 773         REG16(0x274),
 774         REG16(0x270),
 775
 776         NOP(13),
 777         LRI(1, 0),
 778         REG(0x0c8),
 779
 780         END(),
 781 };
 782
 783 static const u8 gen11_rcs_offsets[] = {
 784         NOP(1),
 785         LRI(15, POSTED),
 786         REG16(0x244),
 787         REG(0x034),
 788         REG(0x030),
 789         REG(0x038),
 790         REG(0x03c),
 791         REG(0x168),
 792         REG(0x140),
 793         REG(0x110),
 794         REG(0x11c),
 795         REG(0x114),
 796         REG(0x118),
 797         REG(0x1c0),
 798         REG(0x1c4),
 799         REG(0x1c8),
 800         REG(0x180),
 801
 802         NOP(1),
 803         LRI(9, POSTED),
 804         REG16(0x3a8),
 805         REG16(0x28c),
 806         REG16(0x288),
 807         REG16(0x284),
 808         REG16(0x280),
 809         REG16(0x27c),
 810         REG16(0x278),
 811         REG16(0x274),
 812         REG16(0x270),
 813
 814         LRI(1, POSTED),
 815         REG(0x1b0),
 816
 817         NOP(10),
 818         LRI(1, 0),
 819         REG(0x0c8),
 820
 821         END(),
 822 };
 823
 824 static const u8 gen12_rcs_offsets[] = {
 825         NOP(1),
 826         LRI(13, POSTED),
 827         REG16(0x244),
 828         REG(0x034),
 829         REG(0x030),
 830         REG(0x038),
 831         REG(0x03c),
 832         REG(0x168),
 833         REG(0x140),
 834         REG(0x110),
 835         REG(0x1c0),
 836         REG(0x1c4),
 837         REG(0x1c8),
 838         REG(0x180),
 839         REG16(0x2b4),
 840
 841         NOP(5),
 842         LRI(9, POSTED),
 843         REG16(0x3a8),
 844         REG16(0x28c),
 845         REG16(0x288),
 846         REG16(0x284),
 847         REG16(0x280),
 848         REG16(0x27c),
 849         REG16(0x278),
 850         REG16(0x274),
 851         REG16(0x270),
 852
 853         LRI(3, POSTED),
 854         REG(0x1b0),
 855         REG16(0x5a8),
 856         REG16(0x5ac),
 857
 858         NOP(6),
 859         LRI(1, 0),
 860         REG(0x0c8),
 861
 862         END(),
 863 };
 864
 865 #undef END
 866 #undef REG16
 867 #undef REG
 868 #undef LRI
 869 #undef NOP
 870
 871 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 872 {
 873         if (engine->class == RENDER_CLASS) {
 874                 if (INTEL_GEN(engine->i915) >= 12)
 875                         return gen12_rcs_offsets;
 876                 else if (INTEL_GEN(engine->i915) >= 11)
 877                         return gen11_rcs_offsets;
 878                 else
 879                         return gen8_rcs_offsets;
 880         } else {
 881                 if (INTEL_GEN(engine->i915) >= 12)
 882                         return gen12_xcs_offsets;
 883                 else if (INTEL_GEN(engine->i915) >= 9)
 884                         return gen9_xcs_offsets;
 885                 else
 886                         return gen8_xcs_offsets;
 887         }
 888 }
 889
 890 static void unwind_wa_tail(struct i915_request *rq)
 891 {
 892         rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
 893         assert_ring_tail_valid(rq->ring, rq->tail);
 894 }
 895
 896 static struct i915_request *
 897 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 898 {
 899         struct i915_request *rq, *rn, *active = NULL;
 900         struct list_head *uninitialized_var(pl);
 901         int prio = I915_PRIORITY_INVALID;
 902
 903         lockdep_assert_held(&engine->active.lock);
 904
 905         list_for_each_entry_safe_reverse(rq, rn,
 906                                          &engine->active.requests,
 907                                          sched.link) {
 908                 struct intel_engine_cs *owner;
 909
 910                 if (i915_request_completed(rq))
 911                         continue; /* XXX */
 912
 913                 __i915_request_unsubmit(rq);
 914                 unwind_wa_tail(rq);
 915
 916                 /*
 917                  * Push the request back into the queue for later resubmission.
 918                  * If this request is not native to this physical engine (i.e.
 919                  * it came from a virtual source), push it back onto the virtual
 920                  * engine so that it can be moved across onto another physical
 921                  * engine as load dictates.
 922                  */
 923                 owner = rq->hw_context->engine;
 924                 if (likely(owner == engine)) {
 925                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 926                         if (rq_prio(rq) != prio) {
 927                                 prio = rq_prio(rq);
 928                                 pl = i915_sched_lookup_priolist(engine, prio);
 929                         }
 930                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 931
 932                         list_move(&rq->sched.link, pl);
 933                         active = rq;
 934                 } else {
 935                         /*
 936                          * Decouple the virtual breadcrumb before moving it
 937                          * back to the virtual engine -- we don't want the
 938                          * request to complete in the background and try
 939                          * and cancel the breadcrumb on the virtual engine
 940                          * (instead of the old engine where it is linked)!
 941                          */
 942                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 943                                      &rq->fence.flags)) {
 944                                 spin_lock(&rq->lock);
 945                                 i915_request_cancel_breadcrumb(rq);
 946                                 spin_unlock(&rq->lock);
 947                         }
 948                         rq->engine = owner;
 949                         owner->submit_request(rq);
 950                         active = NULL;
 951                 }
 952         }
 953
 954         return active;
 955 }
 956
 957 struct i915_request *
 958 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 959 {
 960         struct intel_engine_cs *engine =
 961                 container_of(execlists, typeof(*engine), execlists);
 962
 963         return __unwind_incomplete_requests(engine);
 964 }
 965
 966 static inline void
 967 execlists_context_status_change(struct i915_request *rq, unsigned long status)
 968 {
 969         /*
 970          * Only used when GVT-g is enabled now. When GVT-g is disabled,
 971          * The compiler should eliminate this function as dead-code.
 972          */
 973         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
 974                 return;
 975
 976         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
 977                                    status, rq);
 978 }
 979
 980 static inline struct intel_engine_cs *
 981 __execlists_schedule_in(struct i915_request *rq)
 982 {
 983         struct intel_engine_cs * const engine = rq->engine;
 984         struct intel_context * const ce = rq->hw_context;
 985
 986         intel_context_get(ce);
 987
 988         intel_gt_pm_get(engine->gt);
 989         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 990         intel_engine_context_in(engine);
 991
 992         return engine;
 993 }
 994
 995 static inline struct i915_request *
 996 execlists_schedule_in(struct i915_request *rq, int idx)
 997 {
 998         struct intel_context * const ce = rq->hw_context;
 999         struct intel_engine_cs *old;
1000
1001         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1002         trace_i915_request_in(rq, idx);
1003
1004         old = READ_ONCE(ce->inflight);
1005         do {
1006                 if (!old) {
1007                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1008                         break;
1009                 }
1010         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1011
1012         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1013         return i915_request_get(rq);
1014 }
1015
1016 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1017 {
1018         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1019         struct i915_request *next = READ_ONCE(ve->request);
1020
1021         if (next && next->execution_mask & ~rq->execution_mask)
1022                 tasklet_schedule(&ve->base.execlists.tasklet);
1023 }
1024
1025 static inline void
1026 __execlists_schedule_out(struct i915_request *rq,
1027                          struct intel_engine_cs * const engine)
1028 {
1029         struct intel_context * const ce = rq->hw_context;
1030
1031         intel_engine_context_out(engine);
1032         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1033         intel_gt_pm_put(engine->gt);
1034
1035         /*
1036          * If this is part of a virtual engine, its next request may
1037          * have been blocked waiting for access to the active context.
1038          * We have to kick all the siblings again in case we need to
1039          * switch (e.g. the next request is not runnable on this
1040          * engine). Hopefully, we will already have submitted the next
1041          * request before the tasklet runs and do not need to rebuild
1042          * each virtual tree and kick everyone again.
1043          */
1044         if (ce->engine != engine)
1045                 kick_siblings(rq, ce);
1046
1047         intel_context_put(ce);
1048 }
1049
1050 static inline void
1051 execlists_schedule_out(struct i915_request *rq)
1052 {
1053         struct intel_context * const ce = rq->hw_context;
1054         struct intel_engine_cs *cur, *old;
1055
1056         trace_i915_request_out(rq);
1057
1058         old = READ_ONCE(ce->inflight);
1059         do
1060                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1061         while (!try_cmpxchg(&ce->inflight, &old, cur));
1062         if (!cur)
1063                 __execlists_schedule_out(rq, old);
1064
1065         i915_request_put(rq);
1066 }
1067
1068 static u64 execlists_update_context(const struct i915_request *rq)
1069 {
1070         struct intel_context *ce = rq->hw_context;
1071         u64 desc;
1072
1073         ce->lrc_reg_state[CTX_RING_TAIL] =
1074                 intel_ring_set_tail(rq->ring, rq->tail);
1075
1076         /*
1077          * Make sure the context image is complete before we submit it to HW.
1078          *
1079          * Ostensibly, writes (including the WCB) should be flushed prior to
1080          * an uncached write such as our mmio register access, the empirical
1081          * evidence (esp. on Braswell) suggests that the WC write into memory
1082          * may not be visible to the HW prior to the completion of the UC
1083          * register write and that we may begin execution from the context
1084          * before its image is complete leading to invalid PD chasing.
1085          *
1086          * Furthermore, Braswell, at least, wants a full mb to be sure that
1087          * the writes are coherent in memory (visible to the GPU) prior to
1088          * execution, and not just visible to other CPUs (as is the result of
1089          * wmb).
1090          */
1091         mb();
1092
1093         desc = ce->lrc_desc;
1094         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1095
1096         return desc;
1097 }
1098
1099 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1100 {
1101         if (execlists->ctrl_reg) {
1102                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1103                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1104         } else {
1105                 writel(upper_32_bits(desc), execlists->submit_reg);
1106                 writel(lower_32_bits(desc), execlists->submit_reg);
1107         }
1108 }
1109
1110 static __maybe_unused void
1111 trace_ports(const struct intel_engine_execlists *execlists,
1112             const char *msg,
1113             struct i915_request * const *ports)
1114 {
1115         const struct intel_engine_cs *engine =
1116                 container_of(execlists, typeof(*engine), execlists);
1117
1118         if (!ports[0])
1119                 return;
1120
1121         GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
1122                   engine->name, msg,
1123                   ports[0]->fence.context,
1124                   ports[0]->fence.seqno,
1125                   i915_request_completed(ports[0]) ? "!" :
1126                   i915_request_started(ports[0]) ? "*" :
1127                   "",
1128                   ports[1] ? ports[1]->fence.context : 0,
1129                   ports[1] ? ports[1]->fence.seqno : 0);
1130 }
1131
1132 static __maybe_unused bool
1133 assert_pending_valid(const struct intel_engine_execlists *execlists,
1134                      const char *msg)
1135 {
1136         struct i915_request * const *port, *rq;
1137         struct intel_context *ce = NULL;
1138
1139         trace_ports(execlists, msg, execlists->pending);
1140
1141         if (!execlists->pending[0])
1142                 return false;
1143
1144         if (execlists->pending[execlists_num_ports(execlists)])
1145                 return false;
1146
1147         for (port = execlists->pending; (rq = *port); port++) {
1148                 if (ce == rq->hw_context)
1149                         return false;
1150
1151                 ce = rq->hw_context;
1152                 if (i915_request_completed(rq))
1153                         continue;
1154
1155                 if (i915_active_is_idle(&ce->active))
1156                         return false;
1157
1158                 if (!i915_vma_is_pinned(ce->state))
1159                         return false;
1160         }
1161
1162         return ce;
1163 }
1164
1165 static void execlists_submit_ports(struct intel_engine_cs *engine)
1166 {
1167         struct intel_engine_execlists *execlists = &engine->execlists;
1168         unsigned int n;
1169
1170         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1171
1172         /*
1173          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1174          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1175          * not be relinquished until the device is idle (see
1176          * i915_gem_idle_work_handler()). As a precaution, we make sure
1177          * that all ELSP are drained i.e. we have processed the CSB,
1178          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1179          */
1180         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1181
1182         /*
1183          * ELSQ note: the submit queue is not cleared after being submitted
1184          * to the HW so we need to make sure we always clean it up. This is
1185          * currently ensured by the fact that we always write the same number
1186          * of elsq entries, keep this in mind before changing the loop below.
1187          */
1188         for (n = execlists_num_ports(execlists); n--; ) {
1189                 struct i915_request *rq = execlists->pending[n];
1190
1191                 write_desc(execlists,
1192                            rq ? execlists_update_context(rq) : 0,
1193                            n);
1194         }
1195
1196         /* we need to manually load the submit queue */
1197         if (execlists->ctrl_reg)
1198                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1199 }
1200
1201 static bool ctx_single_port_submission(const struct intel_context *ce)
1202 {
1203         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1204                 i915_gem_context_force_single_submission(ce->gem_context));
1205 }
1206
1207 static bool can_merge_ctx(const struct intel_context *prev,
1208                           const struct intel_context *next)
1209 {
1210         if (prev != next)
1211                 return false;
1212
1213         if (ctx_single_port_submission(prev))
1214                 return false;
1215
1216         return true;
1217 }
1218
1219 static bool can_merge_rq(const struct i915_request *prev,
1220                          const struct i915_request *next)
1221 {
1222         GEM_BUG_ON(prev == next);
1223         GEM_BUG_ON(!assert_priority_queue(prev, next));
1224
1225         /*
1226          * We do not submit known completed requests. Therefore if the next
1227          * request is already completed, we can pretend to merge it in
1228          * with the previous context (and we will skip updating the ELSP
1229          * and tracking). Thus hopefully keeping the ELSP full with active
1230          * contexts, despite the best efforts of preempt-to-busy to confuse
1231          * us.
1232          */
1233         if (i915_request_completed(next))
1234                 return true;
1235
1236         if (!can_merge_ctx(prev->hw_context, next->hw_context))
1237                 return false;
1238
1239         return true;
1240 }
1241
1242 static void virtual_update_register_offsets(u32 *regs,
1243                                             struct intel_engine_cs *engine)
1244 {
1245         set_offsets(regs, reg_offsets(engine), engine);
1246 }
1247
1248 static bool virtual_matches(const struct virtual_engine *ve,
1249                             const struct i915_request *rq,
1250                             const struct intel_engine_cs *engine)
1251 {
1252         const struct intel_engine_cs *inflight;
1253
1254         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1255                 return false;
1256
1257         /*
1258          * We track when the HW has completed saving the context image
1259          * (i.e. when we have seen the final CS event switching out of
1260          * the context) and must not overwrite the context image before
1261          * then. This restricts us to only using the active engine
1262          * while the previous virtualized request is inflight (so
1263          * we reuse the register offsets). This is a very small
1264          * hystersis on the greedy seelction algorithm.
1265          */
1266         inflight = intel_context_inflight(&ve->context);
1267         if (inflight && inflight != engine)
1268                 return false;
1269
1270         return true;
1271 }
1272
1273 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1274                                      struct intel_engine_cs *engine)
1275 {
1276         struct intel_engine_cs *old = ve->siblings[0];
1277
1278         /* All unattached (rq->engine == old) must already be completed */
1279
1280         spin_lock(&old->breadcrumbs.irq_lock);
1281         if (!list_empty(&ve->context.signal_link)) {
1282                 list_move_tail(&ve->context.signal_link,
1283                                &engine->breadcrumbs.signalers);
1284                 intel_engine_queue_breadcrumbs(engine);
1285         }
1286         spin_unlock(&old->breadcrumbs.irq_lock);
1287 }
1288
1289 static struct i915_request *
1290 last_active(const struct intel_engine_execlists *execlists)
1291 {
1292         struct i915_request * const *last = execlists->active;
1293
1294         while (*last && i915_request_completed(*last))
1295                 last++;
1296
1297         return *last;
1298 }
1299
1300 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1301 {
1302         LIST_HEAD(list);
1303
1304         /*
1305          * We want to move the interrupted request to the back of
1306          * the round-robin list (i.e. its priority level), but
1307          * in doing so, we must then move all requests that were in
1308          * flight and were waiting for the interrupted request to
1309          * be run after it again.
1310          */
1311         do {
1312                 struct i915_dependency *p;
1313
1314                 GEM_BUG_ON(i915_request_is_active(rq));
1315                 list_move_tail(&rq->sched.link, pl);
1316
1317                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
1318                         struct i915_request *w =
1319                                 container_of(p->waiter, typeof(*w), sched);
1320
1321                         /* Leave semaphores spinning on the other engines */
1322                         if (w->engine != rq->engine)
1323                                 continue;
1324
1325                         /* No waiter should start before its signaler */
1326                         GEM_BUG_ON(i915_request_started(w) &&
1327                                    !i915_request_completed(rq));
1328
1329                         GEM_BUG_ON(i915_request_is_active(w));
1330                         if (list_empty(&w->sched.link))
1331                                 continue; /* Not yet submitted; unready */
1332
1333                         if (rq_prio(w) < rq_prio(rq))
1334                                 continue;
1335
1336                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1337                         list_move_tail(&w->sched.link, &list);
1338                 }
1339
1340                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1341         } while (rq);
1342 }
1343
1344 static void defer_active(struct intel_engine_cs *engine)
1345 {
1346         struct i915_request *rq;
1347
1348         rq = __unwind_incomplete_requests(engine);
1349         if (!rq)
1350                 return;
1351
1352         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1353 }
1354
1355 static bool
1356 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1357 {
1358         int hint;
1359
1360         if (!intel_engine_has_semaphores(engine))
1361                 return false;
1362
1363         if (list_is_last(&rq->sched.link, &engine->active.requests))
1364                 return false;
1365
1366         hint = max(rq_prio(list_next_entry(rq, sched.link)),
1367                    engine->execlists.queue_priority_hint);
1368
1369         return hint >= effective_prio(rq);
1370 }
1371
1372 static int
1373 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1374 {
1375         if (list_is_last(&rq->sched.link, &engine->active.requests))
1376                 return INT_MIN;
1377
1378         return rq_prio(list_next_entry(rq, sched.link));
1379 }
1380
1381 static bool
1382 enable_timeslice(const struct intel_engine_execlists *execlists)
1383 {
1384         const struct i915_request *rq = *execlists->active;
1385
1386         if (i915_request_completed(rq))
1387                 return false;
1388
1389         return execlists->switch_priority_hint >= effective_prio(rq);
1390 }
1391
1392 static void record_preemption(struct intel_engine_execlists *execlists)
1393 {
1394         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1395 }
1396
1397 static void execlists_dequeue(struct intel_engine_cs *engine)
1398 {
1399         struct intel_engine_execlists * const execlists = &engine->execlists;
1400         struct i915_request **port = execlists->pending;
1401         struct i915_request ** const last_port = port + execlists->port_mask;
1402         struct i915_request *last;
1403         struct rb_node *rb;
1404         bool submit = false;
1405
1406         /*
1407          * Hardware submission is through 2 ports. Conceptually each port
1408          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1409          * static for a context, and unique to each, so we only execute
1410          * requests belonging to a single context from each ring. RING_HEAD
1411          * is maintained by the CS in the context image, it marks the place
1412          * where it got up to last time, and through RING_TAIL we tell the CS
1413          * where we want to execute up to this time.
1414          *
1415          * In this list the requests are in order of execution. Consecutive
1416          * requests from the same context are adjacent in the ringbuffer. We
1417          * can combine these requests into a single RING_TAIL update:
1418          *
1419          *              RING_HEAD...req1...req2
1420          *                                    ^- RING_TAIL
1421          * since to execute req2 the CS must first execute req1.
1422          *
1423          * Our goal then is to point each port to the end of a consecutive
1424          * sequence of requests as being the most optimal (fewest wake ups
1425          * and context switches) submission.
1426          */
1427
1428         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1429                 struct virtual_engine *ve =
1430                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1431                 struct i915_request *rq = READ_ONCE(ve->request);
1432
1433                 if (!rq) { /* lazily cleanup after another engine handled rq */
1434                         rb_erase_cached(rb, &execlists->virtual);
1435                         RB_CLEAR_NODE(rb);
1436                         rb = rb_first_cached(&execlists->virtual);
1437                         continue;
1438                 }
1439
1440                 if (!virtual_matches(ve, rq, engine)) {
1441                         rb = rb_next(rb);
1442                         continue;
1443                 }
1444
1445                 break;
1446         }
1447
1448         /*
1449          * If the queue is higher priority than the last
1450          * request in the currently active context, submit afresh.
1451          * We will resubmit again afterwards in case we need to split
1452          * the active context to interject the preemption request,
1453          * i.e. we will retrigger preemption following the ack in case
1454          * of trouble.
1455          */
1456         last = last_active(execlists);
1457         if (last) {
1458                 if (need_preempt(engine, last, rb)) {
1459                         GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
1460                                   engine->name,
1461                                   last->fence.context,
1462                                   last->fence.seqno,
1463                                   last->sched.attr.priority,
1464                                   execlists->queue_priority_hint);
1465                         record_preemption(execlists);
1466
1467                         /*
1468                          * Don't let the RING_HEAD advance past the breadcrumb
1469                          * as we unwind (and until we resubmit) so that we do
1470                          * not accidentally tell it to go backwards.
1471                          */
1472                         ring_set_paused(engine, 1);
1473
1474                         /*
1475                          * Note that we have not stopped the GPU at this point,
1476                          * so we are unwinding the incomplete requests as they
1477                          * remain inflight and so by the time we do complete
1478                          * the preemption, some of the unwound requests may
1479                          * complete!
1480                          */
1481                         __unwind_incomplete_requests(engine);
1482
1483                         /*
1484                          * If we need to return to the preempted context, we
1485                          * need to skip the lite-restore and force it to
1486                          * reload the RING_TAIL. Otherwise, the HW has a
1487                          * tendency to ignore us rewinding the TAIL to the
1488                          * end of an earlier request.
1489                          */
1490                         last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1491                         last = NULL;
1492                 } else if (need_timeslice(engine, last) &&
1493                            !timer_pending(&engine->execlists.timer)) {
1494                         GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
1495                                   engine->name,
1496                                   last->fence.context,
1497                                   last->fence.seqno,
1498                                   last->sched.attr.priority,
1499                                   execlists->queue_priority_hint);
1500
1501                         ring_set_paused(engine, 1);
1502                         defer_active(engine);
1503
1504                         /*
1505                          * Unlike for preemption, if we rewind and continue
1506                          * executing the same context as previously active,
1507                          * the order of execution will remain the same and
1508                          * the tail will only advance. We do not need to
1509                          * force a full context restore, as a lite-restore
1510                          * is sufficient to resample the monotonic TAIL.
1511                          *
1512                          * If we switch to any other context, similarly we
1513                          * will not rewind TAIL of current context, and
1514                          * normal save/restore will preserve state and allow
1515                          * us to later continue executing the same request.
1516                          */
1517                         last = NULL;
1518                 } else {
1519                         /*
1520                          * Otherwise if we already have a request pending
1521                          * for execution after the current one, we can
1522                          * just wait until the next CS event before
1523                          * queuing more. In either case we will force a
1524                          * lite-restore preemption event, but if we wait
1525                          * we hopefully coalesce several updates into a single
1526                          * submission.
1527                          */
1528                         if (!list_is_last(&last->sched.link,
1529                                           &engine->active.requests))
1530                                 return;
1531
1532                         /*
1533                          * WaIdleLiteRestore:bdw,skl
1534                          * Apply the wa NOOPs to prevent
1535                          * ring:HEAD == rq:TAIL as we resubmit the
1536                          * request. See gen8_emit_fini_breadcrumb() for
1537                          * where we prepare the padding after the
1538                          * end of the request.
1539                          */
1540                         last->tail = last->wa_tail;
1541                 }
1542         }
1543
1544         while (rb) { /* XXX virtual is always taking precedence */
1545                 struct virtual_engine *ve =
1546                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1547                 struct i915_request *rq;
1548
1549                 spin_lock(&ve->base.active.lock);
1550
1551                 rq = ve->request;
1552                 if (unlikely(!rq)) { /* lost the race to a sibling */
1553                         spin_unlock(&ve->base.active.lock);
1554                         rb_erase_cached(rb, &execlists->virtual);
1555                         RB_CLEAR_NODE(rb);
1556                         rb = rb_first_cached(&execlists->virtual);
1557                         continue;
1558                 }
1559
1560                 GEM_BUG_ON(rq != ve->request);
1561                 GEM_BUG_ON(rq->engine != &ve->base);
1562                 GEM_BUG_ON(rq->hw_context != &ve->context);
1563
1564                 if (rq_prio(rq) >= queue_prio(execlists)) {
1565                         if (!virtual_matches(ve, rq, engine)) {
1566                                 spin_unlock(&ve->base.active.lock);
1567                                 rb = rb_next(rb);
1568                                 continue;
1569                         }
1570
1571                         if (last && !can_merge_rq(last, rq)) {
1572                                 spin_unlock(&ve->base.active.lock);
1573                                 return; /* leave this for another */
1574                         }
1575
1576                         GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
1577                                   engine->name,
1578                                   rq->fence.context,
1579                                   rq->fence.seqno,
1580                                   i915_request_completed(rq) ? "!" :
1581                                   i915_request_started(rq) ? "*" :
1582                                   "",
1583                                   yesno(engine != ve->siblings[0]));
1584
1585                         ve->request = NULL;
1586                         ve->base.execlists.queue_priority_hint = INT_MIN;
1587                         rb_erase_cached(rb, &execlists->virtual);
1588                         RB_CLEAR_NODE(rb);
1589
1590                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1591                         rq->engine = engine;
1592
1593                         if (engine != ve->siblings[0]) {
1594                                 u32 *regs = ve->context.lrc_reg_state;
1595                                 unsigned int n;
1596
1597                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1598
1599                                 if (!intel_engine_has_relative_mmio(engine))
1600                                         virtual_update_register_offsets(regs,
1601                                                                         engine);
1602
1603                                 if (!list_empty(&ve->context.signals))
1604                                         virtual_xfer_breadcrumbs(ve, engine);
1605
1606                                 /*
1607                                  * Move the bound engine to the top of the list
1608                                  * for future execution. We then kick this
1609                                  * tasklet first before checking others, so that
1610                                  * we preferentially reuse this set of bound
1611                                  * registers.
1612                                  */
1613                                 for (n = 1; n < ve->num_siblings; n++) {
1614                                         if (ve->siblings[n] == engine) {
1615                                                 swap(ve->siblings[n],
1616                                                      ve->siblings[0]);
1617                                                 break;
1618                                         }
1619                                 }
1620
1621                                 GEM_BUG_ON(ve->siblings[0] != engine);
1622                         }
1623
1624                         if (__i915_request_submit(rq)) {
1625                                 submit = true;
1626                                 last = rq;
1627                         }
1628                         i915_request_put(rq);
1629
1630                         /*
1631                          * Hmm, we have a bunch of virtual engine requests,
1632                          * but the first one was already completed (thanks
1633                          * preempt-to-busy!). Keep looking at the veng queue
1634                          * until we have no more relevant requests (i.e.
1635                          * the normal submit queue has higher priority).
1636                          */
1637                         if (!submit) {
1638                                 spin_unlock(&ve->base.active.lock);
1639                                 rb = rb_first_cached(&execlists->virtual);
1640                                 continue;
1641                         }
1642                 }
1643
1644                 spin_unlock(&ve->base.active.lock);
1645                 break;
1646         }
1647
1648         while ((rb = rb_first_cached(&execlists->queue))) {
1649                 struct i915_priolist *p = to_priolist(rb);
1650                 struct i915_request *rq, *rn;
1651                 int i;
1652
1653                 priolist_for_each_request_consume(rq, rn, p, i) {
1654                         bool merge = true;
1655
1656                         /*
1657                          * Can we combine this request with the current port?
1658                          * It has to be the same context/ringbuffer and not
1659                          * have any exceptions (e.g. GVT saying never to
1660                          * combine contexts).
1661                          *
1662                          * If we can combine the requests, we can execute both
1663                          * by updating the RING_TAIL to point to the end of the
1664                          * second request, and so we never need to tell the
1665                          * hardware about the first.
1666                          */
1667                         if (last && !can_merge_rq(last, rq)) {
1668                                 /*
1669                                  * If we are on the second port and cannot
1670                                  * combine this request with the last, then we
1671                                  * are done.
1672                                  */
1673                                 if (port == last_port)
1674                                         goto done;
1675
1676                                 /*
1677                                  * We must not populate both ELSP[] with the
1678                                  * same LRCA, i.e. we must submit 2 different
1679                                  * contexts if we submit 2 ELSP.
1680                                  */
1681                                 if (last->hw_context == rq->hw_context)
1682                                         goto done;
1683
1684                                 /*
1685                                  * If GVT overrides us we only ever submit
1686                                  * port[0], leaving port[1] empty. Note that we
1687                                  * also have to be careful that we don't queue
1688                                  * the same context (even though a different
1689                                  * request) to the second port.
1690                                  */
1691                                 if (ctx_single_port_submission(last->hw_context) ||
1692                                     ctx_single_port_submission(rq->hw_context))
1693                                         goto done;
1694
1695                                 merge = false;
1696                         }
1697
1698                         if (__i915_request_submit(rq)) {
1699                                 if (!merge) {
1700                                         *port = execlists_schedule_in(last, port - execlists->pending);
1701                                         port++;
1702                                         last = NULL;
1703                                 }
1704
1705                                 GEM_BUG_ON(last &&
1706                                            !can_merge_ctx(last->hw_context,
1707                                                           rq->hw_context));
1708
1709                                 submit = true;
1710                                 last = rq;
1711                         }
1712                 }
1713
1714                 rb_erase_cached(&p->node, &execlists->queue);
1715                 i915_priolist_free(p);
1716         }
1717
1718 done:
1719         /*
1720          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
1721          *
1722          * We choose the priority hint such that if we add a request of greater
1723          * priority than this, we kick the submission tasklet to decide on
1724          * the right order of submitting the requests to hardware. We must
1725          * also be prepared to reorder requests as they are in-flight on the
1726          * HW. We derive the priority hint then as the first "hole" in
1727          * the HW submission ports and if there are no available slots,
1728          * the priority of the lowest executing request, i.e. last.
1729          *
1730          * When we do receive a higher priority request ready to run from the
1731          * user, see queue_request(), the priority hint is bumped to that
1732          * request triggering preemption on the next dequeue (or subsequent
1733          * interrupt for secondary ports).
1734          */
1735         execlists->queue_priority_hint = queue_prio(execlists);
1736         GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
1737                   engine->name, execlists->queue_priority_hint,
1738                   yesno(submit));
1739
1740         if (submit) {
1741                 *port = execlists_schedule_in(last, port - execlists->pending);
1742                 execlists->switch_priority_hint =
1743                         switch_prio(engine, *execlists->pending);
1744
1745                 /*
1746                  * Skip if we ended up with exactly the same set of requests,
1747                  * e.g. trying to timeslice a pair of ordered contexts
1748                  */
1749                 if (!memcmp(execlists->active, execlists->pending,
1750                             (port - execlists->pending + 1) * sizeof(*port))) {
1751                         do
1752                                 execlists_schedule_out(fetch_and_zero(port));
1753                         while (port-- != execlists->pending);
1754
1755                         goto skip_submit;
1756                 }
1757
1758                 memset(port + 1, 0, (last_port - port) * sizeof(*port));
1759                 execlists_submit_ports(engine);
1760         } else {
1761 skip_submit:
1762                 ring_set_paused(engine, 0);
1763         }
1764 }
1765
1766 static void
1767 cancel_port_requests(struct intel_engine_execlists * const execlists)
1768 {
1769         struct i915_request * const *port, *rq;
1770
1771         for (port = execlists->pending; (rq = *port); port++)
1772                 execlists_schedule_out(rq);
1773         memset(execlists->pending, 0, sizeof(execlists->pending));
1774
1775         for (port = execlists->active; (rq = *port); port++)
1776                 execlists_schedule_out(rq);
1777         execlists->active =
1778                 memset(execlists->inflight, 0, sizeof(execlists->inflight));
1779 }
1780
1781 static inline void
1782 invalidate_csb_entries(const u32 *first, const u32 *last)
1783 {
1784         clflush((void *)first);
1785         clflush((void *)last);
1786 }
1787
1788 static inline bool
1789 reset_in_progress(const struct intel_engine_execlists *execlists)
1790 {
1791         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1792 }
1793
1794 /*
1795  * Starting with Gen12, the status has a new format:
1796  *
1797  *     bit  0:     switched to new queue
1798  *     bit  1:     reserved
1799  *     bit  2:     semaphore wait mode (poll or signal), only valid when
1800  *                 switch detail is set to "wait on semaphore"
1801  *     bits 3-5:   engine class
1802  *     bits 6-11:  engine instance
1803  *     bits 12-14: reserved
1804  *     bits 15-25: sw context id of the lrc the GT switched to
1805  *     bits 26-31: sw counter of the lrc the GT switched to
1806  *     bits 32-35: context switch detail
1807  *                  - 0: ctx complete
1808  *                  - 1: wait on sync flip
1809  *                  - 2: wait on vblank
1810  *                  - 3: wait on scanline
1811  *                  - 4: wait on semaphore
1812  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
1813  *                       WAIT_FOR_EVENT)
1814  *     bit  36:    reserved
1815  *     bits 37-43: wait detail (for switch detail 1 to 4)
1816  *     bits 44-46: reserved
1817  *     bits 47-57: sw context id of the lrc the GT switched away from
1818  *     bits 58-63: sw counter of the lrc the GT switched away from
1819  */
1820 static inline bool
1821 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
1822 {
1823         u32 lower_dw = csb[0];
1824         u32 upper_dw = csb[1];
1825         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
1826         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
1827         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
1828
1829         /*
1830          * The context switch detail is not guaranteed to be 5 when a preemption
1831          * occurs, so we can't just check for that. The check below works for
1832          * all the cases we care about, including preemptions of WAIT
1833          * instructions and lite-restore. Preempt-to-idle via the CTRL register
1834          * would require some extra handling, but we don't support that.
1835          */
1836         if (!ctx_away_valid || new_queue) {
1837                 GEM_BUG_ON(!ctx_to_valid);
1838                 return true;
1839         }
1840
1841         /*
1842          * switch detail = 5 is covered by the case above and we do not expect a
1843          * context switch on an unsuccessful wait instruction since we always
1844          * use polling mode.
1845          */
1846         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
1847         return false;
1848 }
1849
1850 static inline bool
1851 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
1852 {
1853         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
1854 }
1855
1856 static void process_csb(struct intel_engine_cs *engine)
1857 {
1858         struct intel_engine_execlists * const execlists = &engine->execlists;
1859         const u32 * const buf = execlists->csb_status;
1860         const u8 num_entries = execlists->csb_size;
1861         u8 head, tail;
1862
1863         GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
1864
1865         /*
1866          * Note that csb_write, csb_status may be either in HWSP or mmio.
1867          * When reading from the csb_write mmio register, we have to be
1868          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
1869          * the low 4bits. As it happens we know the next 4bits are always
1870          * zero and so we can simply masked off the low u8 of the register
1871          * and treat it identically to reading from the HWSP (without having
1872          * to use explicit shifting and masking, and probably bifurcating
1873          * the code to handle the legacy mmio read).
1874          */
1875         head = execlists->csb_head;
1876         tail = READ_ONCE(*execlists->csb_write);
1877         GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
1878         if (unlikely(head == tail))
1879                 return;
1880
1881         /*
1882          * Hopefully paired with a wmb() in HW!
1883          *
1884          * We must complete the read of the write pointer before any reads
1885          * from the CSB, so that we do not see stale values. Without an rmb
1886          * (lfence) the HW may speculatively perform the CSB[] reads *before*
1887          * we perform the READ_ONCE(*csb_write).
1888          */
1889         rmb();
1890
1891         do {
1892                 bool promote;
1893
1894                 if (++head == num_entries)
1895                         head = 0;
1896
1897                 /*
1898                  * We are flying near dragons again.
1899                  *
1900                  * We hold a reference to the request in execlist_port[]
1901                  * but no more than that. We are operating in softirq
1902                  * context and so cannot hold any mutex or sleep. That
1903                  * prevents us stopping the requests we are processing
1904                  * in port[] from being retired simultaneously (the
1905                  * breadcrumb will be complete before we see the
1906                  * context-switch). As we only hold the reference to the
1907                  * request, any pointer chasing underneath the request
1908                  * is subject to a potential use-after-free. Thus we
1909                  * store all of the bookkeeping within port[] as
1910                  * required, and avoid using unguarded pointers beneath
1911                  * request itself. The same applies to the atomic
1912                  * status notifier.
1913                  */
1914
1915                 GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
1916                           engine->name, head,
1917                           buf[2 * head + 0], buf[2 * head + 1]);
1918
1919                 if (INTEL_GEN(engine->i915) >= 12)
1920                         promote = gen12_csb_parse(execlists, buf + 2 * head);
1921                 else
1922                         promote = gen8_csb_parse(execlists, buf + 2 * head);
1923                 if (promote) {
1924                         /* cancel old inflight, prepare for switch */
1925                         trace_ports(execlists, "preempted", execlists->active);
1926                         while (*execlists->active)
1927                                 execlists_schedule_out(*execlists->active++);
1928
1929                         /* switch pending to inflight */
1930                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
1931                         execlists->active =
1932                                 memcpy(execlists->inflight,
1933                                        execlists->pending,
1934                                        execlists_num_ports(execlists) *
1935                                        sizeof(*execlists->pending));
1936
1937                         if (enable_timeslice(execlists))
1938                                 mod_timer(&execlists->timer, jiffies + 1);
1939
1940                         if (!inject_preempt_hang(execlists))
1941                                 ring_set_paused(engine, 0);
1942
1943                         WRITE_ONCE(execlists->pending[0], NULL);
1944                 } else {
1945                         GEM_BUG_ON(!*execlists->active);
1946
1947                         /* port0 completed, advanced to port1 */
1948                         trace_ports(execlists, "completed", execlists->active);
1949
1950                         /*
1951                          * We rely on the hardware being strongly
1952                          * ordered, that the breadcrumb write is
1953                          * coherent (visible from the CPU) before the
1954                          * user interrupt and CSB is processed.
1955                          */
1956                         GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
1957                                    !reset_in_progress(execlists));
1958                         execlists_schedule_out(*execlists->active++);
1959
1960                         GEM_BUG_ON(execlists->active - execlists->inflight >
1961                                    execlists_num_ports(execlists));
1962                 }
1963         } while (head != tail);
1964
1965         execlists->csb_head = head;
1966
1967         /*
1968          * Gen11 has proven to fail wrt global observation point between
1969          * entry and tail update, failing on the ordering and thus
1970          * we see an old entry in the context status buffer.
1971          *
1972          * Forcibly evict out entries for the next gpu csb update,
1973          * to increase the odds that we get a fresh entries with non
1974          * working hardware. The cost for doing so comes out mostly with
1975          * the wash as hardware, working or not, will need to do the
1976          * invalidation before.
1977          */
1978         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
1979 }
1980
1981 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
1982 {
1983         lockdep_assert_held(&engine->active.lock);
1984         if (!engine->execlists.pending[0])
1985                 execlists_dequeue(engine);
1986 }
1987
1988 /*
1989  * Check the unread Context Status Buffers and manage the submission of new
1990  * contexts to the ELSP accordingly.
1991  */
1992 static void execlists_submission_tasklet(unsigned long data)
1993 {
1994         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
1995         unsigned long flags;
1996
1997         process_csb(engine);
1998         if (!READ_ONCE(engine->execlists.pending[0])) {
1999                 spin_lock_irqsave(&engine->active.lock, flags);
2000                 __execlists_submission_tasklet(engine);
2001                 spin_unlock_irqrestore(&engine->active.lock, flags);
2002         }
2003 }
2004
2005 static void execlists_submission_timer(struct timer_list *timer)
2006 {
2007         struct intel_engine_cs *engine =
2008                 from_timer(engine, timer, execlists.timer);
2009
2010         /* Kick the tasklet for some interrupt coalescing and reset handling */
2011         tasklet_hi_schedule(&engine->execlists.tasklet);
2012 }
2013
2014 static void queue_request(struct intel_engine_cs *engine,
2015                           struct i915_sched_node *node,
2016                           int prio)
2017 {
2018         GEM_BUG_ON(!list_empty(&node->link));
2019         list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
2020 }
2021
2022 static void __submit_queue_imm(struct intel_engine_cs *engine)
2023 {
2024         struct intel_engine_execlists * const execlists = &engine->execlists;
2025
2026         if (reset_in_progress(execlists))
2027                 return; /* defer until we restart the engine following reset */
2028
2029         if (execlists->tasklet.func == execlists_submission_tasklet)
2030                 __execlists_submission_tasklet(engine);
2031         else
2032                 tasklet_hi_schedule(&execlists->tasklet);
2033 }
2034
2035 static void submit_queue(struct intel_engine_cs *engine,
2036                          const struct i915_request *rq)
2037 {
2038         struct intel_engine_execlists *execlists = &engine->execlists;
2039
2040         if (rq_prio(rq) <= execlists->queue_priority_hint)
2041                 return;
2042
2043         execlists->queue_priority_hint = rq_prio(rq);
2044         __submit_queue_imm(engine);
2045 }
2046
2047 static void execlists_submit_request(struct i915_request *request)
2048 {
2049         struct intel_engine_cs *engine = request->engine;
2050         unsigned long flags;
2051
2052         /* Will be called from irq-context when using foreign fences. */
2053         spin_lock_irqsave(&engine->active.lock, flags);
2054
2055         queue_request(engine, &request->sched, rq_prio(request));
2056
2057         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2058         GEM_BUG_ON(list_empty(&request->sched.link));
2059
2060         submit_queue(engine, request);
2061
2062         spin_unlock_irqrestore(&engine->active.lock, flags);
2063 }
2064
2065 static void __execlists_context_fini(struct intel_context *ce)
2066 {
2067         intel_ring_put(ce->ring);
2068         i915_vma_put(ce->state);
2069 }
2070
2071 static void execlists_context_destroy(struct kref *kref)
2072 {
2073         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2074
2075         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2076         GEM_BUG_ON(intel_context_is_pinned(ce));
2077
2078         if (ce->state)
2079                 __execlists_context_fini(ce);
2080
2081         intel_context_fini(ce);
2082         intel_context_free(ce);
2083 }
2084
2085 static void
2086 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2087 {
2088         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2089                 return;
2090
2091         vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
2092         vaddr += engine->context_size;
2093
2094         memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
2095 }
2096
2097 static void
2098 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2099 {
2100         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2101                 return;
2102
2103         vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
2104         vaddr += engine->context_size;
2105
2106         if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
2107                 dev_err_once(engine->i915->drm.dev,
2108                              "%s context redzone overwritten!\n",
2109                              engine->name);
2110 }
2111
2112 static void execlists_context_unpin(struct intel_context *ce)
2113 {
2114         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2115                       ce->engine);
2116
2117         i915_gem_context_unpin_hw_id(ce->gem_context);
2118         i915_gem_object_unpin_map(ce->state->obj);
2119         intel_ring_reset(ce->ring, ce->ring->tail);
2120 }
2121
2122 static void
2123 __execlists_update_reg_state(const struct intel_context *ce,
2124                              const struct intel_engine_cs *engine)
2125 {
2126         struct intel_ring *ring = ce->ring;
2127         u32 *regs = ce->lrc_reg_state;
2128
2129         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
2130         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2131
2132         regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
2133         regs[CTX_RING_HEAD] = ring->head;
2134         regs[CTX_RING_TAIL] = ring->tail;
2135
2136         /* RPCS */
2137         if (engine->class == RENDER_CLASS) {
2138                 regs[CTX_R_PWR_CLK_STATE] =
2139                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2140
2141                 i915_oa_init_reg_state(ce, engine);
2142         }
2143 }
2144
2145 static int
2146 __execlists_context_pin(struct intel_context *ce,
2147                         struct intel_engine_cs *engine)
2148 {
2149         void *vaddr;
2150         int ret;
2151
2152         GEM_BUG_ON(!ce->state);
2153
2154         ret = intel_context_active_acquire(ce);
2155         if (ret)
2156                 goto err;
2157         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2158
2159         vaddr = i915_gem_object_pin_map(ce->state->obj,
2160                                         i915_coherent_map_type(engine->i915) |
2161                                         I915_MAP_OVERRIDE);
2162         if (IS_ERR(vaddr)) {
2163                 ret = PTR_ERR(vaddr);
2164                 goto unpin_active;
2165         }
2166
2167         ret = i915_gem_context_pin_hw_id(ce->gem_context);
2168         if (ret)
2169                 goto unpin_map;
2170
2171         ce->lrc_desc = lrc_descriptor(ce, engine);
2172         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2173         __execlists_update_reg_state(ce, engine);
2174
2175         return 0;
2176
2177 unpin_map:
2178         i915_gem_object_unpin_map(ce->state->obj);
2179 unpin_active:
2180         intel_context_active_release(ce);
2181 err:
2182         return ret;
2183 }
2184
2185 static int execlists_context_pin(struct intel_context *ce)
2186 {
2187         return __execlists_context_pin(ce, ce->engine);
2188 }
2189
2190 static int execlists_context_alloc(struct intel_context *ce)
2191 {
2192         return __execlists_context_alloc(ce, ce->engine);
2193 }
2194
2195 static void execlists_context_reset(struct intel_context *ce)
2196 {
2197         /*
2198          * Because we emit WA_TAIL_DWORDS there may be a disparity
2199          * between our bookkeeping in ce->ring->head and ce->ring->tail and
2200          * that stored in context. As we only write new commands from
2201          * ce->ring->tail onwards, everything before that is junk. If the GPU
2202          * starts reading from its RING_HEAD from the context, it may try to
2203          * execute that junk and die.
2204          *
2205          * The contexts that are stilled pinned on resume belong to the
2206          * kernel, and are local to each engine. All other contexts will
2207          * have their head/tail sanitized upon pinning before use, so they
2208          * will never see garbage,
2209          *
2210          * So to avoid that we reset the context images upon resume. For
2211          * simplicity, we just zero everything out.
2212          */
2213         intel_ring_reset(ce->ring, 0);
2214         __execlists_update_reg_state(ce, ce->engine);
2215 }
2216
2217 static const struct intel_context_ops execlists_context_ops = {
2218         .alloc = execlists_context_alloc,
2219
2220         .pin = execlists_context_pin,
2221         .unpin = execlists_context_unpin,
2222
2223         .enter = intel_context_enter_engine,
2224         .exit = intel_context_exit_engine,
2225
2226         .reset = execlists_context_reset,
2227         .destroy = execlists_context_destroy,
2228 };
2229
2230 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
2231 {
2232         u32 *cs;
2233
2234         GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
2235
2236         cs = intel_ring_begin(rq, 6);
2237         if (IS_ERR(cs))
2238                 return PTR_ERR(cs);
2239
2240         /*
2241          * Check if we have been preempted before we even get started.
2242          *
2243          * After this point i915_request_started() reports true, even if
2244          * we get preempted and so are no longer running.
2245          */
2246         *cs++ = MI_ARB_CHECK;
2247         *cs++ = MI_NOOP;
2248
2249         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2250         *cs++ = i915_request_timeline(rq)->hwsp_offset;
2251         *cs++ = 0;
2252         *cs++ = rq->fence.seqno - 1;
2253
2254         intel_ring_advance(rq, cs);
2255
2256         /* Record the updated position of the request's payload */
2257         rq->infix = intel_ring_offset(rq, cs);
2258
2259         return 0;
2260 }
2261
2262 static int execlists_request_alloc(struct i915_request *request)
2263 {
2264         int ret;
2265
2266         GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
2267
2268         /*
2269          * Flush enough space to reduce the likelihood of waiting after
2270          * we start building the request - in which case we will just
2271          * have to repeat work.
2272          */
2273         request->reserved_space += EXECLISTS_REQUEST_SIZE;
2274
2275         /*
2276          * Note that after this point, we have committed to using
2277          * this request as it is being used to both track the
2278          * state of engine initialisation and liveness of the
2279          * golden renderstate above. Think twice before you try
2280          * to cancel/unwind this request now.
2281          */
2282
2283         /* Unconditionally invalidate GPU caches and TLBs. */
2284         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
2285         if (ret)
2286                 return ret;
2287
2288         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
2289         return 0;
2290 }
2291
2292 /*
2293  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
2294  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
2295  * but there is a slight complication as this is applied in WA batch where the
2296  * values are only initialized once so we cannot take register value at the
2297  * beginning and reuse it further; hence we save its value to memory, upload a
2298  * constant value with bit21 set and then we restore it back with the saved value.
2299  * To simplify the WA, a constant value is formed by using the default value
2300  * of this register. This shouldn't be a problem because we are only modifying
2301  * it for a short period and this batch in non-premptible. We can ofcourse
2302  * use additional instructions that read the actual value of the register
2303  * at that time and set our bit of interest but it makes the WA complicated.
2304  *
2305  * This WA is also required for Gen9 so extracting as a function avoids
2306  * code duplication.
2307  */
2308 static u32 *
2309 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
2310 {
2311         /* NB no one else is allowed to scribble over scratch + 256! */
2312         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
2313         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2314         *batch++ = intel_gt_scratch_offset(engine->gt,
2315                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
2316         *batch++ = 0;
2317
2318         *batch++ = MI_LOAD_REGISTER_IMM(1);
2319         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2320         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
2321
2322         batch = gen8_emit_pipe_control(batch,
2323                                        PIPE_CONTROL_CS_STALL |
2324                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
2325                                        0);
2326
2327         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
2328         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
2329         *batch++ = intel_gt_scratch_offset(engine->gt,
2330                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
2331         *batch++ = 0;
2332
2333         return batch;
2334 }
2335
2336 /*
2337  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
2338  * initialized at the beginning and shared across all contexts but this field
2339  * helps us to have multiple batches at different offsets and select them based
2340  * on a criteria. At the moment this batch always start at the beginning of the page
2341  * and at this point we don't have multiple wa_ctx batch buffers.
2342  *
2343  * The number of WA applied are not known at the beginning; we use this field
2344  * to return the no of DWORDS written.
2345  *
2346  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
2347  * so it adds NOOPs as padding to make it cacheline aligned.
2348  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
2349  * makes a complete batch buffer.
2350  */
2351 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2352 {
2353         /* WaDisableCtxRestoreArbitration:bdw,chv */
2354         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2355
2356         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
2357         if (IS_BROADWELL(engine->i915))
2358                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
2359
2360         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
2361         /* Actual scratch location is at 128 bytes offset */
2362         batch = gen8_emit_pipe_control(batch,
2363                                        PIPE_CONTROL_FLUSH_L3 |
2364                                        PIPE_CONTROL_STORE_DATA_INDEX |
2365                                        PIPE_CONTROL_CS_STALL |
2366                                        PIPE_CONTROL_QW_WRITE,
2367                                        LRC_PPHWSP_SCRATCH_ADDR);
2368
2369         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2370
2371         /* Pad to end of cacheline */
2372         while ((unsigned long)batch % CACHELINE_BYTES)
2373                 *batch++ = MI_NOOP;
2374
2375         /*
2376          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
2377          * execution depends on the length specified in terms of cache lines
2378          * in the register CTX_RCS_INDIRECT_CTX
2379          */
2380
2381         return batch;
2382 }
2383
2384 struct lri {
2385         i915_reg_t reg;
2386         u32 value;
2387 };
2388
2389 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
2390 {
2391         GEM_BUG_ON(!count || count > 63);
2392
2393         *batch++ = MI_LOAD_REGISTER_IMM(count);
2394         do {
2395                 *batch++ = i915_mmio_reg_offset(lri->reg);
2396                 *batch++ = lri->value;
2397         } while (lri++, --count);
2398         *batch++ = MI_NOOP;
2399
2400         return batch;
2401 }
2402
2403 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2404 {
2405         static const struct lri lri[] = {
2406                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
2407                 {
2408                         COMMON_SLICE_CHICKEN2,
2409                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
2410                                        0),
2411                 },
2412
2413                 /* BSpec: 11391 */
2414                 {
2415                         FF_SLICE_CHICKEN,
2416                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
2417                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
2418                 },
2419
2420                 /* BSpec: 11299 */
2421                 {
2422                         _3D_CHICKEN3,
2423                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
2424                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
2425                 }
2426         };
2427
2428         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2429
2430         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
2431         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
2432
2433         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
2434
2435         /* WaMediaPoolStateCmdInWABB:bxt,glk */
2436         if (HAS_POOLED_EU(engine->i915)) {
2437                 /*
2438                  * EU pool configuration is setup along with golden context
2439                  * during context initialization. This value depends on
2440                  * device type (2x6 or 3x6) and needs to be updated based
2441                  * on which subslice is disabled especially for 2x6
2442                  * devices, however it is safe to load default
2443                  * configuration of 3x6 device instead of masking off
2444                  * corresponding bits because HW ignores bits of a disabled
2445                  * subslice and drops down to appropriate config. Please
2446                  * see render_state_setup() in i915_gem_render_state.c for
2447                  * possible configurations, to avoid duplication they are
2448                  * not shown here again.
2449                  */
2450                 *batch++ = GEN9_MEDIA_POOL_STATE;
2451                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
2452                 *batch++ = 0x00777000;
2453                 *batch++ = 0;
2454                 *batch++ = 0;
2455                 *batch++ = 0;
2456         }
2457
2458         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2459
2460         /* Pad to end of cacheline */
2461         while ((unsigned long)batch % CACHELINE_BYTES)
2462                 *batch++ = MI_NOOP;
2463
2464         return batch;
2465 }
2466
2467 static u32 *
2468 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
2469 {
2470         int i;
2471
2472         /*
2473          * WaPipeControlBefore3DStateSamplePattern: cnl
2474          *
2475          * Ensure the engine is idle prior to programming a
2476          * 3DSTATE_SAMPLE_PATTERN during a context restore.
2477          */
2478         batch = gen8_emit_pipe_control(batch,
2479                                        PIPE_CONTROL_CS_STALL,
2480                                        0);
2481         /*
2482          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
2483          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
2484          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
2485          * confusing. Since gen8_emit_pipe_control() already advances the
2486          * batch by 6 dwords, we advance the other 10 here, completing a
2487          * cacheline. It's not clear if the workaround requires this padding
2488          * before other commands, or if it's just the regular padding we would
2489          * already have for the workaround bb, so leave it here for now.
2490          */
2491         for (i = 0; i < 10; i++)
2492                 *batch++ = MI_NOOP;
2493
2494         /* Pad to end of cacheline */
2495         while ((unsigned long)batch % CACHELINE_BYTES)
2496                 *batch++ = MI_NOOP;
2497
2498         return batch;
2499 }
2500
2501 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
2502
2503 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
2504 {
2505         struct drm_i915_gem_object *obj;
2506         struct i915_vma *vma;
2507         int err;
2508
2509         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
2510         if (IS_ERR(obj))
2511                 return PTR_ERR(obj);
2512
2513         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
2514         if (IS_ERR(vma)) {
2515                 err = PTR_ERR(vma);
2516                 goto err;
2517         }
2518
2519         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
2520         if (err)
2521                 goto err;
2522
2523         engine->wa_ctx.vma = vma;
2524         return 0;
2525
2526 err:
2527         i915_gem_object_put(obj);
2528         return err;
2529 }
2530
2531 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
2532 {
2533         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
2534 }
2535
2536 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
2537
2538 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
2539 {
2540         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
2541         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
2542                                             &wa_ctx->per_ctx };
2543         wa_bb_func_t wa_bb_fn[2];
2544         struct page *page;
2545         void *batch, *batch_ptr;
2546         unsigned int i;
2547         int ret;
2548
2549         if (engine->class != RENDER_CLASS)
2550                 return 0;
2551
2552         switch (INTEL_GEN(engine->i915)) {
2553         case 12:
2554         case 11:
2555                 return 0;
2556         case 10:
2557                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
2558                 wa_bb_fn[1] = NULL;
2559                 break;
2560         case 9:
2561                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
2562                 wa_bb_fn[1] = NULL;
2563                 break;
2564         case 8:
2565                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
2566                 wa_bb_fn[1] = NULL;
2567                 break;
2568         default:
2569                 MISSING_CASE(INTEL_GEN(engine->i915));
2570                 return 0;
2571         }
2572
2573         ret = lrc_setup_wa_ctx(engine);
2574         if (ret) {
2575                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
2576                 return ret;
2577         }
2578
2579         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
2580         batch = batch_ptr = kmap_atomic(page);
2581
2582         /*
2583          * Emit the two workaround batch buffers, recording the offset from the
2584          * start of the workaround batch buffer object for each and their
2585          * respective sizes.
2586          */
2587         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
2588                 wa_bb[i]->offset = batch_ptr - batch;
2589                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
2590                                                   CACHELINE_BYTES))) {
2591                         ret = -EINVAL;
2592                         break;
2593                 }
2594                 if (wa_bb_fn[i])
2595                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
2596                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
2597         }
2598
2599         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
2600
2601         kunmap_atomic(batch);
2602         if (ret)
2603                 lrc_destroy_wa_ctx(engine);
2604
2605         return ret;
2606 }
2607
2608 static void enable_execlists(struct intel_engine_cs *engine)
2609 {
2610         u32 mode;
2611
2612         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
2613
2614         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
2615
2616         if (INTEL_GEN(engine->i915) >= 11)
2617                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
2618         else
2619                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
2620         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
2621
2622         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
2623
2624         ENGINE_WRITE_FW(engine,
2625                         RING_HWS_PGA,
2626                         i915_ggtt_offset(engine->status_page.vma));
2627         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
2628 }
2629
2630 static bool unexpected_starting_state(struct intel_engine_cs *engine)
2631 {
2632         bool unexpected = false;
2633
2634         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
2635                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
2636                 unexpected = true;
2637         }
2638
2639         return unexpected;
2640 }
2641
2642 static int execlists_resume(struct intel_engine_cs *engine)
2643 {
2644         intel_engine_apply_workarounds(engine);
2645         intel_engine_apply_whitelist(engine);
2646
2647         intel_mocs_init_engine(engine);
2648
2649         intel_engine_reset_breadcrumbs(engine);
2650
2651         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
2652                 struct drm_printer p = drm_debug_printer(__func__);
2653
2654                 intel_engine_dump(engine, &p, NULL);
2655         }
2656
2657         enable_execlists(engine);
2658
2659         return 0;
2660 }
2661
2662 static void execlists_reset_prepare(struct intel_engine_cs *engine)
2663 {
2664         struct intel_engine_execlists * const execlists = &engine->execlists;
2665         unsigned long flags;
2666
2667         GEM_TRACE("%s: depth<-%d\n", engine->name,
2668                   atomic_read(&execlists->tasklet.count));
2669
2670         /*
2671          * Prevent request submission to the hardware until we have
2672          * completed the reset in i915_gem_reset_finish(). If a request
2673          * is completed by one engine, it may then queue a request
2674          * to a second via its execlists->tasklet *just* as we are
2675          * calling engine->resume() and also writing the ELSP.
2676          * Turning off the execlists->tasklet until the reset is over
2677          * prevents the race.
2678          */
2679         __tasklet_disable_sync_once(&execlists->tasklet);
2680         GEM_BUG_ON(!reset_in_progress(execlists));
2681
2682         /* And flush any current direct submission. */
2683         spin_lock_irqsave(&engine->active.lock, flags);
2684         spin_unlock_irqrestore(&engine->active.lock, flags);
2685
2686         /*
2687          * We stop engines, otherwise we might get failed reset and a
2688          * dead gpu (on elk). Also as modern gpu as kbl can suffer
2689          * from system hang if batchbuffer is progressing when
2690          * the reset is issued, regardless of READY_TO_RESET ack.
2691          * Thus assume it is best to stop engines on all gens
2692          * where we have a gpu reset.
2693          *
2694          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
2695          *
2696          * FIXME: Wa for more modern gens needs to be validated
2697          */
2698         intel_engine_stop_cs(engine);
2699 }
2700
2701 static void reset_csb_pointers(struct intel_engine_cs *engine)
2702 {
2703         struct intel_engine_execlists * const execlists = &engine->execlists;
2704         const unsigned int reset_value = execlists->csb_size - 1;
2705
2706         ring_set_paused(engine, 0);
2707
2708         /*
2709          * After a reset, the HW starts writing into CSB entry [0]. We
2710          * therefore have to set our HEAD pointer back one entry so that
2711          * the *first* entry we check is entry 0. To complicate this further,
2712          * as we don't wait for the first interrupt after reset, we have to
2713          * fake the HW write to point back to the last entry so that our
2714          * inline comparison of our cached head position against the last HW
2715          * write works even before the first interrupt.
2716          */
2717         execlists->csb_head = reset_value;
2718         WRITE_ONCE(*execlists->csb_write, reset_value);
2719         wmb(); /* Make sure this is visible to HW (paranoia?) */
2720
2721         invalidate_csb_entries(&execlists->csb_status[0],
2722                                &execlists->csb_status[reset_value]);
2723 }
2724
2725 static struct i915_request *active_request(struct i915_request *rq)
2726 {
2727         const struct intel_context * const ce = rq->hw_context;
2728         struct i915_request *active = NULL;
2729         struct list_head *list;
2730
2731         if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
2732                 return rq;
2733
2734         list = &i915_request_active_timeline(rq)->requests;
2735         list_for_each_entry_from_reverse(rq, list, link) {
2736                 if (i915_request_completed(rq))
2737                         break;
2738
2739                 if (rq->hw_context != ce)
2740                         break;
2741
2742                 active = rq;
2743         }
2744
2745         return active;
2746 }
2747
2748 static void __execlists_reset_reg_state(const struct intel_context *ce,
2749                                         const struct intel_engine_cs *engine)
2750 {
2751         u32 *regs = ce->lrc_reg_state;
2752
2753         if (INTEL_GEN(engine->i915) >= 9) {
2754                 regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING;
2755                 regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16;
2756         }
2757 }
2758
2759 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
2760 {
2761         struct intel_engine_execlists * const execlists = &engine->execlists;
2762         struct intel_context *ce;
2763         struct i915_request *rq;
2764         u32 *regs;
2765
2766         mb(); /* paranoia: read the CSB pointers from after the reset */
2767         clflush(execlists->csb_write);
2768         mb();
2769
2770         process_csb(engine); /* drain preemption events */
2771
2772         /* Following the reset, we need to reload the CSB read/write pointers */
2773         reset_csb_pointers(engine);
2774
2775         /*
2776          * Save the currently executing context, even if we completed
2777          * its request, it was still running at the time of the
2778          * reset and will have been clobbered.
2779          */
2780         rq = execlists_active(execlists);
2781         if (!rq)
2782                 goto unwind;
2783
2784         ce = rq->hw_context;
2785         GEM_BUG_ON(i915_active_is_idle(&ce->active));
2786         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2787
2788         /* Proclaim we have exclusive access to the context image! */
2789         __context_pin_acquire(ce);
2790
2791         rq = active_request(rq);
2792         if (!rq) {
2793                 ce->ring->head = ce->ring->tail;
2794                 goto out_replay;
2795         }
2796
2797         ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
2798
2799         /*
2800          * If this request hasn't started yet, e.g. it is waiting on a
2801          * semaphore, we need to avoid skipping the request or else we
2802          * break the signaling chain. However, if the context is corrupt
2803          * the request will not restart and we will be stuck with a wedged
2804          * device. It is quite often the case that if we issue a reset
2805          * while the GPU is loading the context image, that the context
2806          * image becomes corrupt.
2807          *
2808          * Otherwise, if we have not started yet, the request should replay
2809          * perfectly and we do not need to flag the result as being erroneous.
2810          */
2811         if (!i915_request_started(rq))
2812                 goto out_replay;
2813
2814         /*
2815          * If the request was innocent, we leave the request in the ELSP
2816          * and will try to replay it on restarting. The context image may
2817          * have been corrupted by the reset, in which case we may have
2818          * to service a new GPU hang, but more likely we can continue on
2819          * without impact.
2820          *
2821          * If the request was guilty, we presume the context is corrupt
2822          * and have to at least restore the RING register in the context
2823          * image back to the expected values to skip over the guilty request.
2824          */
2825         __i915_request_reset(rq, stalled);
2826         if (!stalled)
2827                 goto out_replay;
2828
2829         /*
2830          * We want a simple context + ring to execute the breadcrumb update.
2831          * We cannot rely on the context being intact across the GPU hang,
2832          * so clear it and rebuild just what we need for the breadcrumb.
2833          * All pending requests for this context will be zapped, and any
2834          * future request will be after userspace has had the opportunity
2835          * to recreate its own state.
2836          */
2837         GEM_BUG_ON(!intel_context_is_pinned(ce));
2838         regs = ce->lrc_reg_state;
2839         if (engine->pinned_default_state) {
2840                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
2841                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
2842                        engine->context_size - PAGE_SIZE);
2843         }
2844         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
2845
2846 out_replay:
2847         GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
2848                   engine->name, ce->ring->head, ce->ring->tail);
2849         intel_ring_update_space(ce->ring);
2850         __execlists_reset_reg_state(ce, engine);
2851         __execlists_update_reg_state(ce, engine);
2852         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
2853         __context_pin_release(ce);
2854
2855 unwind:
2856         /* Push back any incomplete requests for replay after the reset. */
2857         cancel_port_requests(execlists);
2858         __unwind_incomplete_requests(engine);
2859 }
2860
2861 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
2862 {
2863         unsigned long flags;
2864
2865         GEM_TRACE("%s\n", engine->name);
2866
2867         spin_lock_irqsave(&engine->active.lock, flags);
2868
2869         __execlists_reset(engine, stalled);
2870
2871         spin_unlock_irqrestore(&engine->active.lock, flags);
2872 }
2873
2874 static void nop_submission_tasklet(unsigned long data)
2875 {
2876         /* The driver is wedged; don't process any more events. */
2877 }
2878
2879 static void execlists_cancel_requests(struct intel_engine_cs *engine)
2880 {
2881         struct intel_engine_execlists * const execlists = &engine->execlists;
2882         struct i915_request *rq, *rn;
2883         struct rb_node *rb;
2884         unsigned long flags;
2885
2886         GEM_TRACE("%s\n", engine->name);
2887
2888         /*
2889          * Before we call engine->cancel_requests(), we should have exclusive
2890          * access to the submission state. This is arranged for us by the
2891          * caller disabling the interrupt generation, the tasklet and other
2892          * threads that may then access the same state, giving us a free hand
2893          * to reset state. However, we still need to let lockdep be aware that
2894          * we know this state may be accessed in hardirq context, so we
2895          * disable the irq around this manipulation and we want to keep
2896          * the spinlock focused on its duties and not accidentally conflate
2897          * coverage to the submission's irq state. (Similarly, although we
2898          * shouldn't need to disable irq around the manipulation of the
2899          * submission's irq state, we also wish to remind ourselves that
2900          * it is irq state.)
2901          */
2902         spin_lock_irqsave(&engine->active.lock, flags);
2903
2904         __execlists_reset(engine, true);
2905
2906         /* Mark all executing requests as skipped. */
2907         list_for_each_entry(rq, &engine->active.requests, sched.link)
2908                 mark_eio(rq);
2909
2910         /* Flush the queued requests to the timeline list (for retiring). */
2911         while ((rb = rb_first_cached(&execlists->queue))) {
2912                 struct i915_priolist *p = to_priolist(rb);
2913                 int i;
2914
2915                 priolist_for_each_request_consume(rq, rn, p, i) {
2916                         mark_eio(rq);
2917                         __i915_request_submit(rq);
2918                 }
2919
2920                 rb_erase_cached(&p->node, &execlists->queue);
2921                 i915_priolist_free(p);
2922         }
2923
2924         /* Cancel all attached virtual engines */
2925         while ((rb = rb_first_cached(&execlists->virtual))) {
2926                 struct virtual_engine *ve =
2927                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2928
2929                 rb_erase_cached(rb, &execlists->virtual);
2930                 RB_CLEAR_NODE(rb);
2931
2932                 spin_lock(&ve->base.active.lock);
2933                 rq = fetch_and_zero(&ve->request);
2934                 if (rq) {
2935                         mark_eio(rq);
2936
2937                         rq->engine = engine;
2938                         __i915_request_submit(rq);
2939                         i915_request_put(rq);
2940
2941                         ve->base.execlists.queue_priority_hint = INT_MIN;
2942                 }
2943                 spin_unlock(&ve->base.active.lock);
2944         }
2945
2946         /* Remaining _unready_ requests will be nop'ed when submitted */
2947
2948         execlists->queue_priority_hint = INT_MIN;
2949         execlists->queue = RB_ROOT_CACHED;
2950
2951         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
2952         execlists->tasklet.func = nop_submission_tasklet;
2953
2954         spin_unlock_irqrestore(&engine->active.lock, flags);
2955 }
2956
2957 static void execlists_reset_finish(struct intel_engine_cs *engine)
2958 {
2959         struct intel_engine_execlists * const execlists = &engine->execlists;
2960
2961         /*
2962          * After a GPU reset, we may have requests to replay. Do so now while
2963          * we still have the forcewake to be sure that the GPU is not allowed
2964          * to sleep before we restart and reload a context.
2965          */
2966         GEM_BUG_ON(!reset_in_progress(execlists));
2967         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
2968                 execlists->tasklet.func(execlists->tasklet.data);
2969
2970         if (__tasklet_enable(&execlists->tasklet))
2971                 /* And kick in case we missed a new request submission. */
2972                 tasklet_hi_schedule(&execlists->tasklet);
2973         GEM_TRACE("%s: depth->%d\n", engine->name,
2974                   atomic_read(&execlists->tasklet.count));
2975 }
2976
2977 static int gen8_emit_bb_start(struct i915_request *rq,
2978                               u64 offset, u32 len,
2979                               const unsigned int flags)
2980 {
2981         u32 *cs;
2982
2983         cs = intel_ring_begin(rq, 4);
2984         if (IS_ERR(cs))
2985                 return PTR_ERR(cs);
2986
2987         /*
2988          * WaDisableCtxRestoreArbitration:bdw,chv
2989          *
2990          * We don't need to perform MI_ARB_ENABLE as often as we do (in
2991          * particular all the gen that do not need the w/a at all!), if we
2992          * took care to make sure that on every switch into this context
2993          * (both ordinary and for preemption) that arbitrartion was enabled
2994          * we would be fine.  However, for gen8 there is another w/a that
2995          * requires us to not preempt inside GPGPU execution, so we keep
2996          * arbitration disabled for gen8 batches. Arbitration will be
2997          * re-enabled before we close the request
2998          * (engine->emit_fini_breadcrumb).
2999          */
3000         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3001
3002         /* FIXME(BDW+): Address space and security selectors. */
3003         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3004                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3005         *cs++ = lower_32_bits(offset);
3006         *cs++ = upper_32_bits(offset);
3007
3008         intel_ring_advance(rq, cs);
3009
3010         return 0;
3011 }
3012
3013 static int gen9_emit_bb_start(struct i915_request *rq,
3014                               u64 offset, u32 len,
3015                               const unsigned int flags)
3016 {
3017         u32 *cs;
3018
3019         cs = intel_ring_begin(rq, 6);
3020         if (IS_ERR(cs))
3021                 return PTR_ERR(cs);
3022
3023         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3024
3025         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3026                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3027         *cs++ = lower_32_bits(offset);
3028         *cs++ = upper_32_bits(offset);
3029
3030         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3031         *cs++ = MI_NOOP;
3032
3033         intel_ring_advance(rq, cs);
3034
3035         return 0;
3036 }
3037
3038 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3039 {
3040         ENGINE_WRITE(engine, RING_IMR,
3041                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3042         ENGINE_POSTING_READ(engine, RING_IMR);
3043 }
3044
3045 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3046 {
3047         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3048 }
3049
3050 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3051 {
3052         u32 cmd, *cs;
3053
3054         cs = intel_ring_begin(request, 4);
3055         if (IS_ERR(cs))
3056                 return PTR_ERR(cs);
3057
3058         cmd = MI_FLUSH_DW + 1;
3059
3060         /* We always require a command barrier so that subsequent
3061          * commands, such as breadcrumb interrupts, are strictly ordered
3062          * wrt the contents of the write cache being flushed to memory
3063          * (and thus being coherent from the CPU).
3064          */
3065         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3066
3067         if (mode & EMIT_INVALIDATE) {
3068                 cmd |= MI_INVALIDATE_TLB;
3069                 if (request->engine->class == VIDEO_DECODE_CLASS)
3070                         cmd |= MI_INVALIDATE_BSD;
3071         }
3072
3073         *cs++ = cmd;
3074         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3075         *cs++ = 0; /* upper addr */
3076         *cs++ = 0; /* value */
3077         intel_ring_advance(request, cs);
3078
3079         return 0;
3080 }
3081
3082 static int gen8_emit_flush_render(struct i915_request *request,
3083                                   u32 mode)
3084 {
3085         bool vf_flush_wa = false, dc_flush_wa = false;
3086         u32 *cs, flags = 0;
3087         int len;
3088
3089         flags |= PIPE_CONTROL_CS_STALL;
3090
3091         if (mode & EMIT_FLUSH) {
3092                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3093                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3094                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3095                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3096         }
3097
3098         if (mode & EMIT_INVALIDATE) {
3099                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3100                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3101                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3102                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3103                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3104                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3105                 flags |= PIPE_CONTROL_QW_WRITE;
3106                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3107
3108                 /*
3109                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3110                  * pipe control.
3111                  */
3112                 if (IS_GEN(request->i915, 9))
3113                         vf_flush_wa = true;
3114
3115                 /* WaForGAMHang:kbl */
3116                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3117                         dc_flush_wa = true;
3118         }
3119
3120         len = 6;
3121
3122         if (vf_flush_wa)
3123                 len += 6;
3124
3125         if (dc_flush_wa)
3126                 len += 12;
3127
3128         cs = intel_ring_begin(request, len);
3129         if (IS_ERR(cs))
3130                 return PTR_ERR(cs);
3131
3132         if (vf_flush_wa)
3133                 cs = gen8_emit_pipe_control(cs, 0, 0);
3134
3135         if (dc_flush_wa)
3136                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3137                                             0);
3138
3139         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3140
3141         if (dc_flush_wa)
3142                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3143
3144         intel_ring_advance(request, cs);
3145
3146         return 0;
3147 }
3148
3149 static int gen11_emit_flush_render(struct i915_request *request,
3150                                    u32 mode)
3151 {
3152         if (mode & EMIT_FLUSH) {
3153                 u32 *cs;
3154                 u32 flags = 0;
3155
3156                 flags |= PIPE_CONTROL_CS_STALL;
3157
3158                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3159                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3160                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3161                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3162                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3163                 flags |= PIPE_CONTROL_QW_WRITE;
3164                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3165
3166                 cs = intel_ring_begin(request, 6);
3167                 if (IS_ERR(cs))
3168                         return PTR_ERR(cs);
3169
3170                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3171                 intel_ring_advance(request, cs);
3172         }
3173
3174         if (mode & EMIT_INVALIDATE) {
3175                 u32 *cs;
3176                 u32 flags = 0;
3177
3178                 flags |= PIPE_CONTROL_CS_STALL;
3179
3180                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3181                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3182                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3183                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3184                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3185                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3186                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3187                 flags |= PIPE_CONTROL_QW_WRITE;
3188                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3189
3190                 cs = intel_ring_begin(request, 6);
3191                 if (IS_ERR(cs))
3192                         return PTR_ERR(cs);
3193
3194                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3195                 intel_ring_advance(request, cs);
3196         }
3197
3198         return 0;
3199 }
3200
3201 static u32 preparser_disable(bool state)
3202 {
3203         return MI_ARB_CHECK | 1 << 8 | state;
3204 }
3205
3206 static int gen12_emit_flush_render(struct i915_request *request,
3207                                    u32 mode)
3208 {
3209         if (mode & EMIT_FLUSH) {
3210                 u32 flags = 0;
3211                 u32 *cs;
3212
3213                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3214                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3215                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3216                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3217                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3218
3219                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3220                 flags |= PIPE_CONTROL_QW_WRITE;
3221
3222                 flags |= PIPE_CONTROL_CS_STALL;
3223
3224                 cs = intel_ring_begin(request, 6);
3225                 if (IS_ERR(cs))
3226                         return PTR_ERR(cs);
3227
3228                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3229                 intel_ring_advance(request, cs);
3230         }
3231
3232         if (mode & EMIT_INVALIDATE) {
3233                 u32 flags = 0;
3234                 u32 *cs;
3235
3236                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3237                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3238                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3239                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3240                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3241                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3242                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3243
3244                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3245                 flags |= PIPE_CONTROL_QW_WRITE;
3246
3247                 flags |= PIPE_CONTROL_CS_STALL;
3248
3249                 cs = intel_ring_begin(request, 8);
3250                 if (IS_ERR(cs))
3251                         return PTR_ERR(cs);
3252
3253                 /*
3254                  * Prevent the pre-parser from skipping past the TLB
3255                  * invalidate and loading a stale page for the batch
3256                  * buffer / request payload.
3257                  */
3258                 *cs++ = preparser_disable(true);
3259
3260                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3261
3262                 *cs++ = preparser_disable(false);
3263                 intel_ring_advance(request, cs);
3264         }
3265
3266         return 0;
3267 }
3268
3269 /*
3270  * Reserve space for 2 NOOPs at the end of each request to be
3271  * used as a workaround for not being allowed to do lite
3272  * restore with HEAD==TAIL (WaIdleLiteRestore).
3273  */
3274 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
3275 {
3276         /* Ensure there's always at least one preemption point per-request. */
3277         *cs++ = MI_ARB_CHECK;
3278         *cs++ = MI_NOOP;
3279         request->wa_tail = intel_ring_offset(request, cs);
3280
3281         return cs;
3282 }
3283
3284 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
3285 {
3286         *cs++ = MI_SEMAPHORE_WAIT |
3287                 MI_SEMAPHORE_GLOBAL_GTT |
3288                 MI_SEMAPHORE_POLL |
3289                 MI_SEMAPHORE_SAD_EQ_SDD;
3290         *cs++ = 0;
3291         *cs++ = intel_hws_preempt_address(request->engine);
3292         *cs++ = 0;
3293
3294         return cs;
3295 }
3296
3297 static __always_inline u32*
3298 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
3299                                  u32 *cs)
3300 {
3301         *cs++ = MI_USER_INTERRUPT;
3302
3303         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3304         if (intel_engine_has_semaphores(request->engine))
3305                 cs = emit_preempt_busywait(request, cs);
3306
3307         request->tail = intel_ring_offset(request, cs);
3308         assert_ring_tail_valid(request->ring, request->tail);
3309
3310         return gen8_emit_wa_tail(request, cs);
3311 }
3312
3313 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
3314 {
3315         cs = gen8_emit_ggtt_write(cs,
3316                                   request->fence.seqno,
3317                                   i915_request_active_timeline(request)->hwsp_offset,
3318                                   0);
3319
3320         return gen8_emit_fini_breadcrumb_footer(request, cs);
3321 }
3322
3323 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3324 {
3325         cs = gen8_emit_pipe_control(cs,
3326                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3327                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3328                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
3329                                     0);
3330
3331         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
3332         cs = gen8_emit_ggtt_write_rcs(cs,
3333                                       request->fence.seqno,
3334                                       i915_request_active_timeline(request)->hwsp_offset,
3335                                       PIPE_CONTROL_FLUSH_ENABLE |
3336                                       PIPE_CONTROL_CS_STALL);
3337
3338         return gen8_emit_fini_breadcrumb_footer(request, cs);
3339 }
3340
3341 static u32 *
3342 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3343 {
3344         cs = gen8_emit_ggtt_write_rcs(cs,
3345                                       request->fence.seqno,
3346                                       i915_request_active_timeline(request)->hwsp_offset,
3347                                       PIPE_CONTROL_CS_STALL |
3348                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
3349                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3350                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3351                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
3352                                       PIPE_CONTROL_FLUSH_ENABLE);
3353
3354         return gen8_emit_fini_breadcrumb_footer(request, cs);
3355 }
3356
3357 /*
3358  * Note that the CS instruction pre-parser will not stall on the breadcrumb
3359  * flush and will continue pre-fetching the instructions after it before the
3360  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
3361  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
3362  * of the next request before the memory has been flushed, we're guaranteed that
3363  * we won't access the batch itself too early.
3364  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
3365  * so, if the current request is modifying an instruction in the next request on
3366  * the same intel_context, we might pre-fetch and then execute the pre-update
3367  * instruction. To avoid this, the users of self-modifying code should either
3368  * disable the parser around the code emitting the memory writes, via a new flag
3369  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
3370  * the in-kernel use-cases we've opted to use a separate context, see
3371  * reloc_gpu() as an example.
3372  * All the above applies only to the instructions themselves. Non-inline data
3373  * used by the instructions is not pre-fetched.
3374  */
3375
3376 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
3377 {
3378         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
3379                 MI_SEMAPHORE_GLOBAL_GTT |
3380                 MI_SEMAPHORE_POLL |
3381                 MI_SEMAPHORE_SAD_EQ_SDD;
3382         *cs++ = 0;
3383         *cs++ = intel_hws_preempt_address(request->engine);
3384         *cs++ = 0;
3385         *cs++ = 0;
3386         *cs++ = MI_NOOP;
3387
3388         return cs;
3389 }
3390
3391 static __always_inline u32*
3392 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
3393 {
3394         *cs++ = MI_USER_INTERRUPT;
3395
3396         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3397         if (intel_engine_has_semaphores(request->engine))
3398                 cs = gen12_emit_preempt_busywait(request, cs);
3399
3400         request->tail = intel_ring_offset(request, cs);
3401         assert_ring_tail_valid(request->ring, request->tail);
3402
3403         return gen8_emit_wa_tail(request, cs);
3404 }
3405
3406 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
3407 {
3408         cs = gen8_emit_ggtt_write(cs,
3409                                   request->fence.seqno,
3410                                   i915_request_active_timeline(request)->hwsp_offset,
3411                                   0);
3412
3413         return gen12_emit_fini_breadcrumb_footer(request, cs);
3414 }
3415
3416 static u32 *
3417 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
3418 {
3419         cs = gen8_emit_ggtt_write_rcs(cs,
3420                                       request->fence.seqno,
3421                                       i915_request_active_timeline(request)->hwsp_offset,
3422                                       PIPE_CONTROL_CS_STALL |
3423                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
3424                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
3425                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3426                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
3427                                       PIPE_CONTROL_FLUSH_ENABLE);
3428
3429         return gen12_emit_fini_breadcrumb_footer(request, cs);
3430 }
3431
3432 static void execlists_park(struct intel_engine_cs *engine)
3433 {
3434         del_timer(&engine->execlists.timer);
3435 }
3436
3437 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
3438 {
3439         engine->submit_request = execlists_submit_request;
3440         engine->cancel_requests = execlists_cancel_requests;
3441         engine->schedule = i915_schedule;
3442         engine->execlists.tasklet.func = execlists_submission_tasklet;
3443
3444         engine->reset.prepare = execlists_reset_prepare;
3445         engine->reset.reset = execlists_reset;
3446         engine->reset.finish = execlists_reset_finish;
3447
3448         engine->park = execlists_park;
3449         engine->unpark = NULL;
3450
3451         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
3452         if (!intel_vgpu_active(engine->i915)) {
3453                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
3454                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
3455                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
3456         }
3457
3458         if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
3459                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
3460 }
3461
3462 static void execlists_destroy(struct intel_engine_cs *engine)
3463 {
3464         intel_engine_cleanup_common(engine);
3465         lrc_destroy_wa_ctx(engine);
3466         kfree(engine);
3467 }
3468
3469 static void
3470 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
3471 {
3472         /* Default vfuncs which can be overriden by each engine. */
3473
3474         engine->destroy = execlists_destroy;
3475         engine->resume = execlists_resume;
3476
3477         engine->reset.prepare = execlists_reset_prepare;
3478         engine->reset.reset = execlists_reset;
3479         engine->reset.finish = execlists_reset_finish;
3480
3481         engine->cops = &execlists_context_ops;
3482         engine->request_alloc = execlists_request_alloc;
3483
3484         engine->emit_flush = gen8_emit_flush;
3485         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3486         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
3487         if (INTEL_GEN(engine->i915) >= 12)
3488                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
3489
3490         engine->set_default_submission = intel_execlists_set_default_submission;
3491
3492         if (INTEL_GEN(engine->i915) < 11) {
3493                 engine->irq_enable = gen8_logical_ring_enable_irq;
3494                 engine->irq_disable = gen8_logical_ring_disable_irq;
3495         } else {
3496                 /*
3497                  * TODO: On Gen11 interrupt masks need to be clear
3498                  * to allow C6 entry. Keep interrupts enabled at
3499                  * and take the hit of generating extra interrupts
3500                  * until a more refined solution exists.
3501                  */
3502         }
3503         if (IS_GEN(engine->i915, 8))
3504                 engine->emit_bb_start = gen8_emit_bb_start;
3505         else
3506                 engine->emit_bb_start = gen9_emit_bb_start;
3507 }
3508
3509 static inline void
3510 logical_ring_default_irqs(struct intel_engine_cs *engine)
3511 {
3512         unsigned int shift = 0;
3513
3514         if (INTEL_GEN(engine->i915) < 11) {
3515                 const u8 irq_shifts[] = {
3516                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
3517                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
3518                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
3519                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
3520                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
3521                 };
3522
3523                 shift = irq_shifts[engine->id];
3524         }
3525
3526         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
3527         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
3528 }
3529
3530 static void rcs_submission_override(struct intel_engine_cs *engine)
3531 {
3532         switch (INTEL_GEN(engine->i915)) {
3533         case 12:
3534                 engine->emit_flush = gen12_emit_flush_render;
3535                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3536                 break;
3537         case 11:
3538                 engine->emit_flush = gen11_emit_flush_render;
3539                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3540                 break;
3541         default:
3542                 engine->emit_flush = gen8_emit_flush_render;
3543                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3544                 break;
3545         }
3546 }
3547
3548 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
3549 {
3550         tasklet_init(&engine->execlists.tasklet,
3551                      execlists_submission_tasklet, (unsigned long)engine);
3552         timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
3553
3554         logical_ring_default_vfuncs(engine);
3555         logical_ring_default_irqs(engine);
3556
3557         if (engine->class == RENDER_CLASS)
3558                 rcs_submission_override(engine);
3559
3560         return 0;
3561 }
3562
3563 int intel_execlists_submission_init(struct intel_engine_cs *engine)
3564 {
3565         struct intel_engine_execlists * const execlists = &engine->execlists;
3566         struct drm_i915_private *i915 = engine->i915;
3567         struct intel_uncore *uncore = engine->uncore;
3568         u32 base = engine->mmio_base;
3569         int ret;
3570
3571         ret = intel_engine_init_common(engine);
3572         if (ret)
3573                 return ret;
3574
3575         if (intel_init_workaround_bb(engine))
3576                 /*
3577                  * We continue even if we fail to initialize WA batch
3578                  * because we only expect rare glitches but nothing
3579                  * critical to prevent us from using GPU
3580                  */
3581                 DRM_ERROR("WA batch buffer initialization failed\n");
3582
3583         if (HAS_LOGICAL_RING_ELSQ(i915)) {
3584                 execlists->submit_reg = uncore->regs +
3585                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
3586                 execlists->ctrl_reg = uncore->regs +
3587                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
3588         } else {
3589                 execlists->submit_reg = uncore->regs +
3590                         i915_mmio_reg_offset(RING_ELSP(base));
3591         }
3592
3593         execlists->csb_status =
3594                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
3595
3596         execlists->csb_write =
3597                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
3598
3599         if (INTEL_GEN(i915) < 11)
3600                 execlists->csb_size = GEN8_CSB_ENTRIES;
3601         else
3602                 execlists->csb_size = GEN11_CSB_ENTRIES;
3603
3604         reset_csb_pointers(engine);
3605
3606         return 0;
3607 }
3608
3609 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
3610 {
3611         u32 indirect_ctx_offset;
3612
3613         switch (INTEL_GEN(engine->i915)) {
3614         default:
3615                 MISSING_CASE(INTEL_GEN(engine->i915));
3616                 /* fall through */
3617         case 12:
3618                 indirect_ctx_offset =
3619                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3620                 break;
3621         case 11:
3622                 indirect_ctx_offset =
3623                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3624                 break;
3625         case 10:
3626                 indirect_ctx_offset =
3627                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3628                 break;
3629         case 9:
3630                 indirect_ctx_offset =
3631                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3632                 break;
3633         case 8:
3634                 indirect_ctx_offset =
3635                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
3636                 break;
3637         }
3638
3639         return indirect_ctx_offset;
3640 }
3641
3642
3643 static void init_common_reg_state(u32 * const regs,
3644                                   const struct intel_engine_cs *engine,
3645                                   const struct intel_ring *ring)
3646 {
3647         regs[CTX_CONTEXT_CONTROL] =
3648                 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
3649                 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
3650         if (INTEL_GEN(engine->i915) < 11)
3651                 regs[CTX_CONTEXT_CONTROL] |=
3652                         _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
3653                                             CTX_CTRL_RS_CTX_ENABLE);
3654
3655         regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3656         regs[CTX_BB_STATE] = RING_BB_PPGTT;
3657 }
3658
3659 static void init_wa_bb_reg_state(u32 * const regs,
3660                                  const struct intel_engine_cs *engine,
3661                                  u32 pos_bb_per_ctx)
3662 {
3663         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
3664
3665         if (wa_ctx->per_ctx.size) {
3666                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
3667
3668                 regs[pos_bb_per_ctx] =
3669                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
3670         }
3671
3672         if (wa_ctx->indirect_ctx.size) {
3673                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
3674
3675                 regs[pos_bb_per_ctx + 2] =
3676                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
3677                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
3678
3679                 regs[pos_bb_per_ctx + 4] =
3680                         intel_lr_indirect_ctx_offset(engine) << 6;
3681         }
3682 }
3683
3684 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
3685 {
3686         if (i915_vm_is_4lvl(&ppgtt->vm)) {
3687                 /* 64b PPGTT (48bit canonical)
3688                  * PDP0_DESCRIPTOR contains the base address to PML4 and
3689                  * other PDP Descriptors are ignored.
3690                  */
3691                 ASSIGN_CTX_PML4(ppgtt, regs);
3692         } else {
3693                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
3694                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
3695                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
3696                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
3697         }
3698 }
3699
3700 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
3701 {
3702         if (i915_is_ggtt(vm))
3703                 return i915_vm_to_ggtt(vm)->alias;
3704         else
3705                 return i915_vm_to_ppgtt(vm);
3706 }
3707
3708 static void execlists_init_reg_state(u32 *regs,
3709                                      const struct intel_context *ce,
3710                                      const struct intel_engine_cs *engine,
3711                                      const struct intel_ring *ring,
3712                                      bool close)
3713 {
3714         /*
3715          * A context is actually a big batch buffer with several
3716          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
3717          * values we are setting here are only for the first context restore:
3718          * on a subsequent save, the GPU will recreate this batchbuffer with new
3719          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
3720          * we are not initializing here).
3721          *
3722          * Must keep consistent with virtual_update_register_offsets().
3723          */
3724         u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
3725
3726         if (close) { /* Close the batch; used mainly by live_lrc_layout() */
3727                 *bbe = MI_BATCH_BUFFER_END;
3728                 if (INTEL_GEN(engine->i915) >= 10)
3729                         *bbe |= BIT(0);
3730         }
3731
3732         init_common_reg_state(regs, engine, ring);
3733         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
3734
3735         init_wa_bb_reg_state(regs, engine,
3736                              INTEL_GEN(engine->i915) >= 12 ?
3737                              GEN12_CTX_BB_PER_CTX_PTR :
3738                              CTX_BB_PER_CTX_PTR);
3739 }
3740
3741 static int
3742 populate_lr_context(struct intel_context *ce,
3743                     struct drm_i915_gem_object *ctx_obj,
3744                     struct intel_engine_cs *engine,
3745                     struct intel_ring *ring)
3746 {
3747         bool inhibit = true;
3748         void *vaddr;
3749         u32 *regs;
3750         int ret;
3751
3752         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
3753         if (IS_ERR(vaddr)) {
3754                 ret = PTR_ERR(vaddr);
3755                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
3756                 return ret;
3757         }
3758
3759         set_redzone(vaddr, engine);
3760
3761         if (engine->default_state) {
3762                 /*
3763                  * We only want to copy over the template context state;
3764                  * skipping over the headers reserved for GuC communication,
3765                  * leaving those as zero.
3766                  */
3767                 const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
3768                 void *defaults;
3769
3770                 defaults = i915_gem_object_pin_map(engine->default_state,
3771                                                    I915_MAP_WB);
3772                 if (IS_ERR(defaults)) {
3773                         ret = PTR_ERR(defaults);
3774                         goto err_unpin_ctx;
3775                 }
3776
3777                 memcpy(vaddr + start, defaults + start, engine->context_size);
3778                 i915_gem_object_unpin_map(engine->default_state);
3779                 inhibit = false;
3780         }
3781
3782         /* The second page of the context object contains some fields which must
3783          * be set up prior to the first execution. */
3784         regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
3785         execlists_init_reg_state(regs, ce, engine, ring, inhibit);
3786         if (inhibit)
3787                 regs[CTX_CONTEXT_CONTROL] |=
3788                         _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
3789
3790         ret = 0;
3791 err_unpin_ctx:
3792         __i915_gem_object_flush_map(ctx_obj,
3793                                     LRC_HEADER_PAGES * PAGE_SIZE,
3794                                     engine->context_size);
3795         i915_gem_object_unpin_map(ctx_obj);
3796         return ret;
3797 }
3798
3799 static int __execlists_context_alloc(struct intel_context *ce,
3800                                      struct intel_engine_cs *engine)
3801 {
3802         struct drm_i915_gem_object *ctx_obj;
3803         struct intel_ring *ring;
3804         struct i915_vma *vma;
3805         u32 context_size;
3806         int ret;
3807
3808         GEM_BUG_ON(ce->state);
3809         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
3810
3811         /*
3812          * Before the actual start of the context image, we insert a few pages
3813          * for our own use and for sharing with the GuC.
3814          */
3815         context_size += LRC_HEADER_PAGES * PAGE_SIZE;
3816         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3817                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
3818
3819         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
3820         if (IS_ERR(ctx_obj))
3821                 return PTR_ERR(ctx_obj);
3822
3823         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
3824         if (IS_ERR(vma)) {
3825                 ret = PTR_ERR(vma);
3826                 goto error_deref_obj;
3827         }
3828
3829         if (!ce->timeline) {
3830                 struct intel_timeline *tl;
3831
3832                 tl = intel_timeline_create(engine->gt, NULL);
3833                 if (IS_ERR(tl)) {
3834                         ret = PTR_ERR(tl);
3835                         goto error_deref_obj;
3836                 }
3837
3838                 ce->timeline = tl;
3839         }
3840
3841         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
3842         if (IS_ERR(ring)) {
3843                 ret = PTR_ERR(ring);
3844                 goto error_deref_obj;
3845         }
3846
3847         ret = populate_lr_context(ce, ctx_obj, engine, ring);
3848         if (ret) {
3849                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
3850                 goto error_ring_free;
3851         }
3852
3853         ce->ring = ring;
3854         ce->state = vma;
3855
3856         return 0;
3857
3858 error_ring_free:
3859         intel_ring_put(ring);
3860 error_deref_obj:
3861         i915_gem_object_put(ctx_obj);
3862         return ret;
3863 }
3864
3865 static struct list_head *virtual_queue(struct virtual_engine *ve)
3866 {
3867         return &ve->base.execlists.default_priolist.requests[0];
3868 }
3869
3870 static void virtual_context_destroy(struct kref *kref)
3871 {
3872         struct virtual_engine *ve =
3873                 container_of(kref, typeof(*ve), context.ref);
3874         unsigned int n;
3875
3876         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3877         GEM_BUG_ON(ve->request);
3878         GEM_BUG_ON(ve->context.inflight);
3879
3880         for (n = 0; n < ve->num_siblings; n++) {
3881                 struct intel_engine_cs *sibling = ve->siblings[n];
3882                 struct rb_node *node = &ve->nodes[sibling->id].rb;
3883
3884                 if (RB_EMPTY_NODE(node))
3885                         continue;
3886
3887                 spin_lock_irq(&sibling->active.lock);
3888
3889                 /* Detachment is lazily performed in the execlists tasklet */
3890                 if (!RB_EMPTY_NODE(node))
3891                         rb_erase_cached(node, &sibling->execlists.virtual);
3892
3893                 spin_unlock_irq(&sibling->active.lock);
3894         }
3895         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
3896
3897         if (ve->context.state)
3898                 __execlists_context_fini(&ve->context);
3899         intel_context_fini(&ve->context);
3900
3901         kfree(ve->bonds);
3902         kfree(ve);
3903 }
3904
3905 static void virtual_engine_initial_hint(struct virtual_engine *ve)
3906 {
3907         int swp;
3908
3909         /*
3910          * Pick a random sibling on starting to help spread the load around.
3911          *
3912          * New contexts are typically created with exactly the same order
3913          * of siblings, and often started in batches. Due to the way we iterate
3914          * the array of sibling when submitting requests, sibling[0] is
3915          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
3916          * randomised across the system, we also help spread the load by the
3917          * first engine we inspect being different each time.
3918          *
3919          * NB This does not force us to execute on this engine, it will just
3920          * typically be the first we inspect for submission.
3921          */
3922         swp = prandom_u32_max(ve->num_siblings);
3923         if (!swp)
3924                 return;
3925
3926         swap(ve->siblings[swp], ve->siblings[0]);
3927         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
3928                 virtual_update_register_offsets(ve->context.lrc_reg_state,
3929                                                 ve->siblings[0]);
3930 }
3931
3932 static int virtual_context_pin(struct intel_context *ce)
3933 {
3934         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3935         int err;
3936
3937         /* Note: we must use a real engine class for setting up reg state */
3938         err = __execlists_context_pin(ce, ve->siblings[0]);
3939         if (err)
3940                 return err;
3941
3942         virtual_engine_initial_hint(ve);
3943         return 0;
3944 }
3945
3946 static void virtual_context_enter(struct intel_context *ce)
3947 {
3948         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3949         unsigned int n;
3950
3951         for (n = 0; n < ve->num_siblings; n++)
3952                 intel_engine_pm_get(ve->siblings[n]);
3953
3954         intel_timeline_enter(ce->timeline);
3955 }
3956
3957 static void virtual_context_exit(struct intel_context *ce)
3958 {
3959         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3960         unsigned int n;
3961
3962         intel_timeline_exit(ce->timeline);
3963
3964         for (n = 0; n < ve->num_siblings; n++)
3965                 intel_engine_pm_put(ve->siblings[n]);
3966 }
3967
3968 static const struct intel_context_ops virtual_context_ops = {
3969         .pin = virtual_context_pin,
3970         .unpin = execlists_context_unpin,
3971
3972         .enter = virtual_context_enter,
3973         .exit = virtual_context_exit,
3974
3975         .destroy = virtual_context_destroy,
3976 };
3977
3978 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
3979 {
3980         struct i915_request *rq;
3981         intel_engine_mask_t mask;
3982
3983         rq = READ_ONCE(ve->request);
3984         if (!rq)
3985                 return 0;
3986
3987         /* The rq is ready for submission; rq->execution_mask is now stable. */
3988         mask = rq->execution_mask;
3989         if (unlikely(!mask)) {
3990                 /* Invalid selection, submit to a random engine in error */
3991                 i915_request_skip(rq, -ENODEV);
3992                 mask = ve->siblings[0]->mask;
3993         }
3994
3995         GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
3996                   ve->base.name,
3997                   rq->fence.context, rq->fence.seqno,
3998                   mask, ve->base.execlists.queue_priority_hint);
3999
4000         return mask;
4001 }
4002
4003 static void virtual_submission_tasklet(unsigned long data)
4004 {
4005         struct virtual_engine * const ve = (struct virtual_engine *)data;
4006         const int prio = ve->base.execlists.queue_priority_hint;
4007         intel_engine_mask_t mask;
4008         unsigned int n;
4009
4010         rcu_read_lock();
4011         mask = virtual_submission_mask(ve);
4012         rcu_read_unlock();
4013         if (unlikely(!mask))
4014                 return;
4015
4016         local_irq_disable();
4017         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4018                 struct intel_engine_cs *sibling = ve->siblings[n];
4019                 struct ve_node * const node = &ve->nodes[sibling->id];
4020                 struct rb_node **parent, *rb;
4021                 bool first;
4022
4023                 if (unlikely(!(mask & sibling->mask))) {
4024                         if (!RB_EMPTY_NODE(&node->rb)) {
4025                                 spin_lock(&sibling->active.lock);
4026                                 rb_erase_cached(&node->rb,
4027                                                 &sibling->execlists.virtual);
4028                                 RB_CLEAR_NODE(&node->rb);
4029                                 spin_unlock(&sibling->active.lock);
4030                         }
4031                         continue;
4032                 }
4033
4034                 spin_lock(&sibling->active.lock);
4035
4036                 if (!RB_EMPTY_NODE(&node->rb)) {
4037                         /*
4038                          * Cheat and avoid rebalancing the tree if we can
4039                          * reuse this node in situ.
4040                          */
4041                         first = rb_first_cached(&sibling->execlists.virtual) ==
4042                                 &node->rb;
4043                         if (prio == node->prio || (prio > node->prio && first))
4044                                 goto submit_engine;
4045
4046                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4047                 }
4048
4049                 rb = NULL;
4050                 first = true;
4051                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4052                 while (*parent) {
4053                         struct ve_node *other;
4054
4055                         rb = *parent;
4056                         other = rb_entry(rb, typeof(*other), rb);
4057                         if (prio > other->prio) {
4058                                 parent = &rb->rb_left;
4059                         } else {
4060                                 parent = &rb->rb_right;
4061                                 first = false;
4062                         }
4063                 }
4064
4065                 rb_link_node(&node->rb, rb, parent);
4066                 rb_insert_color_cached(&node->rb,
4067                                        &sibling->execlists.virtual,
4068                                        first);
4069
4070 submit_engine:
4071                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4072                 node->prio = prio;
4073                 if (first && prio > sibling->execlists.queue_priority_hint) {
4074                         sibling->execlists.queue_priority_hint = prio;
4075                         tasklet_hi_schedule(&sibling->execlists.tasklet);
4076                 }
4077
4078                 spin_unlock(&sibling->active.lock);
4079         }
4080         local_irq_enable();
4081 }
4082
4083 static void virtual_submit_request(struct i915_request *rq)
4084 {
4085         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4086         struct i915_request *old;
4087         unsigned long flags;
4088
4089         GEM_TRACE("%s: rq=%llx:%lld\n",
4090                   ve->base.name,
4091                   rq->fence.context,
4092                   rq->fence.seqno);
4093
4094         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4095
4096         spin_lock_irqsave(&ve->base.active.lock, flags);
4097
4098         old = ve->request;
4099         if (old) { /* background completion event from preempt-to-busy */
4100                 GEM_BUG_ON(!i915_request_completed(old));
4101                 __i915_request_submit(old);
4102                 i915_request_put(old);
4103         }
4104
4105         if (i915_request_completed(rq)) {
4106                 __i915_request_submit(rq);
4107
4108                 ve->base.execlists.queue_priority_hint = INT_MIN;
4109                 ve->request = NULL;
4110         } else {
4111                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
4112                 ve->request = i915_request_get(rq);
4113
4114                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4115                 list_move_tail(&rq->sched.link, virtual_queue(ve));
4116
4117                 tasklet_schedule(&ve->base.execlists.tasklet);
4118         }
4119
4120         spin_unlock_irqrestore(&ve->base.active.lock, flags);
4121 }
4122
4123 static struct ve_bond *
4124 virtual_find_bond(struct virtual_engine *ve,
4125                   const struct intel_engine_cs *master)
4126 {
4127         int i;
4128
4129         for (i = 0; i < ve->num_bonds; i++) {
4130                 if (ve->bonds[i].master == master)
4131                         return &ve->bonds[i];
4132         }
4133
4134         return NULL;
4135 }
4136
4137 static void
4138 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
4139 {
4140         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4141         intel_engine_mask_t allowed, exec;
4142         struct ve_bond *bond;
4143
4144         allowed = ~to_request(signal)->engine->mask;
4145
4146         bond = virtual_find_bond(ve, to_request(signal)->engine);
4147         if (bond)
4148                 allowed &= bond->sibling_mask;
4149
4150         /* Restrict the bonded request to run on only the available engines */
4151         exec = READ_ONCE(rq->execution_mask);
4152         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
4153                 ;
4154
4155         /* Prevent the master from being re-run on the bonded engines */
4156         to_request(signal)->execution_mask &= ~allowed;
4157 }
4158
4159 struct intel_context *
4160 intel_execlists_create_virtual(struct i915_gem_context *ctx,
4161                                struct intel_engine_cs **siblings,
4162                                unsigned int count)
4163 {
4164         struct virtual_engine *ve;
4165         unsigned int n;
4166         int err;
4167
4168         if (count == 0)
4169                 return ERR_PTR(-EINVAL);
4170
4171         if (count == 1)
4172                 return intel_context_create(ctx, siblings[0]);
4173
4174         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
4175         if (!ve)
4176                 return ERR_PTR(-ENOMEM);
4177
4178         ve->base.i915 = ctx->i915;
4179         ve->base.gt = siblings[0]->gt;
4180         ve->base.id = -1;
4181         ve->base.class = OTHER_CLASS;
4182         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4183         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4184
4185         /*
4186          * The decision on whether to submit a request using semaphores
4187          * depends on the saturated state of the engine. We only compute
4188          * this during HW submission of the request, and we need for this
4189          * state to be globally applied to all requests being submitted
4190          * to this engine. Virtual engines encompass more than one physical
4191          * engine and so we cannot accurately tell in advance if one of those
4192          * engines is already saturated and so cannot afford to use a semaphore
4193          * and be pessimized in priority for doing so -- if we are the only
4194          * context using semaphores after all other clients have stopped, we
4195          * will be starved on the saturated system. Such a global switch for
4196          * semaphores is less than ideal, but alas is the current compromise.
4197          */
4198         ve->base.saturated = ALL_ENGINES;
4199
4200         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4201
4202         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
4203         intel_engine_init_breadcrumbs(&ve->base);
4204
4205         intel_engine_init_execlists(&ve->base);
4206
4207         ve->base.cops = &virtual_context_ops;
4208         ve->base.request_alloc = execlists_request_alloc;
4209
4210         ve->base.schedule = i915_schedule;
4211         ve->base.submit_request = virtual_submit_request;
4212         ve->base.bond_execute = virtual_bond_execute;
4213
4214         INIT_LIST_HEAD(virtual_queue(ve));
4215         ve->base.execlists.queue_priority_hint = INT_MIN;
4216         tasklet_init(&ve->base.execlists.tasklet,
4217                      virtual_submission_tasklet,
4218                      (unsigned long)ve);
4219
4220         intel_context_init(&ve->context, ctx, &ve->base);
4221
4222         for (n = 0; n < count; n++) {
4223                 struct intel_engine_cs *sibling = siblings[n];
4224
4225                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4226                 if (sibling->mask & ve->base.mask) {
4227                         DRM_DEBUG("duplicate %s entry in load balancer\n",
4228                                   sibling->name);
4229                         err = -EINVAL;
4230                         goto err_put;
4231                 }
4232
4233                 /*
4234                  * The virtual engine implementation is tightly coupled to
4235                  * the execlists backend -- we push out request directly
4236                  * into a tree inside each physical engine. We could support
4237                  * layering if we handle cloning of the requests and
4238                  * submitting a copy into each backend.
4239                  */
4240                 if (sibling->execlists.tasklet.func !=
4241                     execlists_submission_tasklet) {
4242                         err = -ENODEV;
4243                         goto err_put;
4244                 }
4245
4246                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
4247                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
4248
4249                 ve->siblings[ve->num_siblings++] = sibling;
4250                 ve->base.mask |= sibling->mask;
4251
4252                 /*
4253                  * All physical engines must be compatible for their emission
4254                  * functions (as we build the instructions during request
4255                  * construction and do not alter them before submission
4256                  * on the physical engine). We use the engine class as a guide
4257                  * here, although that could be refined.
4258                  */
4259                 if (ve->base.class != OTHER_CLASS) {
4260                         if (ve->base.class != sibling->class) {
4261                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
4262                                           sibling->class, ve->base.class);
4263                                 err = -EINVAL;
4264                                 goto err_put;
4265                         }
4266                         continue;
4267                 }
4268
4269                 ve->base.class = sibling->class;
4270                 ve->base.uabi_class = sibling->uabi_class;
4271                 snprintf(ve->base.name, sizeof(ve->base.name),
4272                          "v%dx%d", ve->base.class, count);
4273                 ve->base.context_size = sibling->context_size;
4274
4275                 ve->base.emit_bb_start = sibling->emit_bb_start;
4276                 ve->base.emit_flush = sibling->emit_flush;
4277                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
4278                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
4279                 ve->base.emit_fini_breadcrumb_dw =
4280                         sibling->emit_fini_breadcrumb_dw;
4281
4282                 ve->base.flags = sibling->flags;
4283         }
4284
4285         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
4286
4287         err = __execlists_context_alloc(&ve->context, siblings[0]);
4288         if (err)
4289                 goto err_put;
4290
4291         __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
4292
4293         return &ve->context;
4294
4295 err_put:
4296         intel_context_put(&ve->context);
4297         return ERR_PTR(err);
4298 }
4299
4300 struct intel_context *
4301 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
4302                               struct intel_engine_cs *src)
4303 {
4304         struct virtual_engine *se = to_virtual_engine(src);
4305         struct intel_context *dst;
4306
4307         dst = intel_execlists_create_virtual(ctx,
4308                                              se->siblings,
4309                                              se->num_siblings);
4310         if (IS_ERR(dst))
4311                 return dst;
4312
4313         if (se->num_bonds) {
4314                 struct virtual_engine *de = to_virtual_engine(dst->engine);
4315
4316                 de->bonds = kmemdup(se->bonds,
4317                                     sizeof(*se->bonds) * se->num_bonds,
4318                                     GFP_KERNEL);
4319                 if (!de->bonds) {
4320                         intel_context_put(dst);
4321                         return ERR_PTR(-ENOMEM);
4322                 }
4323
4324                 de->num_bonds = se->num_bonds;
4325         }
4326
4327         return dst;
4328 }
4329
4330 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
4331                                      const struct intel_engine_cs *master,
4332                                      const struct intel_engine_cs *sibling)
4333 {
4334         struct virtual_engine *ve = to_virtual_engine(engine);
4335         struct ve_bond *bond;
4336         int n;
4337
4338         /* Sanity check the sibling is part of the virtual engine */
4339         for (n = 0; n < ve->num_siblings; n++)
4340                 if (sibling == ve->siblings[n])
4341                         break;
4342         if (n == ve->num_siblings)
4343                 return -EINVAL;
4344
4345         bond = virtual_find_bond(ve, master);
4346         if (bond) {
4347                 bond->sibling_mask |= sibling->mask;
4348                 return 0;
4349         }
4350
4351         bond = krealloc(ve->bonds,
4352                         sizeof(*bond) * (ve->num_bonds + 1),
4353                         GFP_KERNEL);
4354         if (!bond)
4355                 return -ENOMEM;
4356
4357         bond[ve->num_bonds].master = master;
4358         bond[ve->num_bonds].sibling_mask = sibling->mask;
4359
4360         ve->bonds = bond;
4361         ve->num_bonds++;
4362
4363         return 0;
4364 }
4365
4366 struct intel_engine_cs *
4367 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
4368                                  unsigned int sibling)
4369 {
4370         struct virtual_engine *ve = to_virtual_engine(engine);
4371
4372         if (sibling >= ve->num_siblings)
4373                 return NULL;
4374
4375         return ve->siblings[sibling];
4376 }
4377
4378 void intel_execlists_show_requests(struct intel_engine_cs *engine,
4379                                    struct drm_printer *m,
4380                                    void (*show_request)(struct drm_printer *m,
4381                                                         struct i915_request *rq,
4382                                                         const char *prefix),
4383                                    unsigned int max)
4384 {
4385         const struct intel_engine_execlists *execlists = &engine->execlists;
4386         struct i915_request *rq, *last;
4387         unsigned long flags;
4388         unsigned int count;
4389         struct rb_node *rb;
4390
4391         spin_lock_irqsave(&engine->active.lock, flags);
4392
4393         last = NULL;
4394         count = 0;
4395         list_for_each_entry(rq, &engine->active.requests, sched.link) {
4396                 if (count++ < max - 1)
4397                         show_request(m, rq, "\t\tE ");
4398                 else
4399                         last = rq;
4400         }
4401         if (last) {
4402                 if (count > max) {
4403                         drm_printf(m,
4404                                    "\t\t...skipping %d executing requests...\n",
4405                                    count - max);
4406                 }
4407                 show_request(m, last, "\t\tE ");
4408         }
4409
4410         last = NULL;
4411         count = 0;
4412         if (execlists->queue_priority_hint != INT_MIN)
4413                 drm_printf(m, "\t\tQueue priority hint: %d\n",
4414                            execlists->queue_priority_hint);
4415         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
4416                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
4417                 int i;
4418
4419                 priolist_for_each_request(rq, p, i) {
4420                         if (count++ < max - 1)
4421                                 show_request(m, rq, "\t\tQ ");
4422                         else
4423                                 last = rq;
4424                 }
4425         }
4426         if (last) {
4427                 if (count > max) {
4428                         drm_printf(m,
4429                                    "\t\t...skipping %d queued requests...\n",
4430                                    count - max);
4431                 }
4432                 show_request(m, last, "\t\tQ ");
4433         }
4434
4435         last = NULL;
4436         count = 0;
4437         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
4438                 struct virtual_engine *ve =
4439                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4440                 struct i915_request *rq = READ_ONCE(ve->request);
4441
4442                 if (rq) {
4443                         if (count++ < max - 1)
4444                                 show_request(m, rq, "\t\tV ");
4445                         else
4446                                 last = rq;
4447                 }
4448         }
4449         if (last) {
4450                 if (count > max) {
4451                         drm_printf(m,
4452                                    "\t\t...skipping %d virtual requests...\n",
4453                                    count - max);
4454                 }
4455                 show_request(m, last, "\t\tV ");
4456         }
4457
4458         spin_unlock_irqrestore(&engine->active.lock, flags);
4459 }
4460
4461 void intel_lr_context_reset(struct intel_engine_cs *engine,
4462                             struct intel_context *ce,
4463                             u32 head,
4464                             bool scrub)
4465 {
4466         GEM_BUG_ON(!intel_context_is_pinned(ce));
4467         __context_pin_acquire(ce);
4468
4469         /*
4470          * We want a simple context + ring to execute the breadcrumb update.
4471          * We cannot rely on the context being intact across the GPU hang,
4472          * so clear it and rebuild just what we need for the breadcrumb.
4473          * All pending requests for this context will be zapped, and any
4474          * future request will be after userspace has had the opportunity
4475          * to recreate its own state.
4476          */
4477         if (scrub) {
4478                 u32 *regs = ce->lrc_reg_state;
4479
4480                 if (engine->pinned_default_state) {
4481                         memcpy(regs, /* skip restoring the vanilla PPHWSP */
4482                                engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
4483                                engine->context_size - PAGE_SIZE);
4484                 }
4485                 execlists_init_reg_state(regs, ce, engine, ce->ring, false);
4486         }
4487
4488         /* Rerun the request; its payload has been neutered (if guilty). */
4489         ce->ring->head = head;
4490         intel_ring_update_space(ce->ring);
4491
4492         __execlists_update_reg_state(ce, engine);
4493         __context_pin_release(ce);
4494 }
4495
4496 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4497 #include "selftest_lrc.c"
4498 #endif