drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150
 151 #define RING_EXECLIST_QFULL             (1 << 0x2)
 152 #define RING_EXECLIST1_VALID            (1 << 0x3)
 153 #define RING_EXECLIST0_VALID            (1 << 0x4)
 154 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 155 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 156 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 157
 158 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 159 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 160 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 161 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 162 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 163 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 164
 165 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 166          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 167
 168 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 169
 170 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 171 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 172 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 173 #define GEN12_IDLE_CTX_ID               0x7FF
 174 #define GEN12_CSB_CTX_VALID(csb_dw) \
 175         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 176
 177 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 178 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 179 #define WA_TAIL_DWORDS 2
 180 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 181
 182 struct virtual_engine {
 183         struct intel_engine_cs base;
 184         struct intel_context context;
 185
 186         /*
 187          * We allow only a single request through the virtual engine at a time
 188          * (each request in the timeline waits for the completion fence of
 189          * the previous before being submitted). By restricting ourselves to
 190          * only submitting a single request, each request is placed on to a
 191          * physical to maximise load spreading (by virtue of the late greedy
 192          * scheduling -- each real engine takes the next available request
 193          * upon idling).
 194          */
 195         struct i915_request *request;
 196
 197         /*
 198          * We keep a rbtree of available virtual engines inside each physical
 199          * engine, sorted by priority. Here we preallocate the nodes we need
 200          * for the virtual engine, indexed by physical_engine->id.
 201          */
 202         struct ve_node {
 203                 struct rb_node rb;
 204                 int prio;
 205         } nodes[I915_NUM_ENGINES];
 206
 207         /*
 208          * Keep track of bonded pairs -- restrictions upon on our selection
 209          * of physical engines any particular request may be submitted to.
 210          * If we receive a submit-fence from a master engine, we will only
 211          * use one of sibling_mask physical engines.
 212          */
 213         struct ve_bond {
 214                 const struct intel_engine_cs *master;
 215                 intel_engine_mask_t sibling_mask;
 216         } *bonds;
 217         unsigned int num_bonds;
 218
 219         /* And finally, which physical engines this virtual engine maps onto. */
 220         unsigned int num_siblings;
 221         struct intel_engine_cs *siblings[0];
 222 };
 223
 224 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 225 {
 226         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 227         return container_of(engine, struct virtual_engine, base);
 228 }
 229
 230 static int __execlists_context_alloc(struct intel_context *ce,
 231                                      struct intel_engine_cs *engine);
 232
 233 static void execlists_init_reg_state(u32 *reg_state,
 234                                      const struct intel_context *ce,
 235                                      const struct intel_engine_cs *engine,
 236                                      const struct intel_ring *ring,
 237                                      bool close);
 238 static void
 239 __execlists_update_reg_state(const struct intel_context *ce,
 240                              const struct intel_engine_cs *engine,
 241                              u32 head);
 242
 243 static void mark_eio(struct i915_request *rq)
 244 {
 245         if (i915_request_completed(rq))
 246                 return;
 247
 248         GEM_BUG_ON(i915_request_signaled(rq));
 249
 250         dma_fence_set_error(&rq->fence, -EIO);
 251         i915_request_mark_complete(rq);
 252 }
 253
 254 static struct i915_request *
 255 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 256 {
 257         struct i915_request *active = rq;
 258
 259         rcu_read_lock();
 260         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 261                 if (i915_request_completed(rq))
 262                         break;
 263
 264                 active = rq;
 265         }
 266         rcu_read_unlock();
 267
 268         return active;
 269 }
 270
 271 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 272 {
 273         return (i915_ggtt_offset(engine->status_page.vma) +
 274                 I915_GEM_HWS_PREEMPT_ADDR);
 275 }
 276
 277 static inline void
 278 ring_set_paused(const struct intel_engine_cs *engine, int state)
 279 {
 280         /*
 281          * We inspect HWS_PREEMPT with a semaphore inside
 282          * engine->emit_fini_breadcrumb. If the dword is true,
 283          * the ring is paused as the semaphore will busywait
 284          * until the dword is false.
 285          */
 286         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 287         if (state)
 288                 wmb();
 289 }
 290
 291 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 292 {
 293         return rb_entry(rb, struct i915_priolist, node);
 294 }
 295
 296 static inline int rq_prio(const struct i915_request *rq)
 297 {
 298         return rq->sched.attr.priority;
 299 }
 300
 301 static int effective_prio(const struct i915_request *rq)
 302 {
 303         int prio = rq_prio(rq);
 304
 305         /*
 306          * If this request is special and must not be interrupted at any
 307          * cost, so be it. Note we are only checking the most recent request
 308          * in the context and so may be masking an earlier vip request. It
 309          * is hoped that under the conditions where nopreempt is used, this
 310          * will not matter (i.e. all requests to that context will be
 311          * nopreempt for as long as desired).
 312          */
 313         if (i915_request_has_nopreempt(rq))
 314                 prio = I915_PRIORITY_UNPREEMPTABLE;
 315
 316         /*
 317          * On unwinding the active request, we give it a priority bump
 318          * if it has completed waiting on any semaphore. If we know that
 319          * the request has already started, we can prevent an unwanted
 320          * preempt-to-idle cycle by taking that into account now.
 321          */
 322         if (__i915_request_has_started(rq))
 323                 prio |= I915_PRIORITY_NOSEMAPHORE;
 324
 325         /* Restrict mere WAIT boosts from triggering preemption */
 326         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 327         return prio | __NO_PREEMPTION;
 328 }
 329
 330 static int queue_prio(const struct intel_engine_execlists *execlists)
 331 {
 332         struct i915_priolist *p;
 333         struct rb_node *rb;
 334
 335         rb = rb_first_cached(&execlists->queue);
 336         if (!rb)
 337                 return INT_MIN;
 338
 339         /*
 340          * As the priolist[] are inverted, with the highest priority in [0],
 341          * we have to flip the index value to become priority.
 342          */
 343         p = to_priolist(rb);
 344         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 345 }
 346
 347 static inline bool need_preempt(const struct intel_engine_cs *engine,
 348                                 const struct i915_request *rq,
 349                                 struct rb_node *rb)
 350 {
 351         int last_prio;
 352
 353         if (!intel_engine_has_semaphores(engine))
 354                 return false;
 355
 356         /*
 357          * Check if the current priority hint merits a preemption attempt.
 358          *
 359          * We record the highest value priority we saw during rescheduling
 360          * prior to this dequeue, therefore we know that if it is strictly
 361          * less than the current tail of ESLP[0], we do not need to force
 362          * a preempt-to-idle cycle.
 363          *
 364          * However, the priority hint is a mere hint that we may need to
 365          * preempt. If that hint is stale or we may be trying to preempt
 366          * ourselves, ignore the request.
 367          *
 368          * More naturally we would write
 369          *      prio >= max(0, last);
 370          * except that we wish to prevent triggering preemption at the same
 371          * priority level: the task that is running should remain running
 372          * to preserve FIFO ordering of dependencies.
 373          */
 374         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 375         if (engine->execlists.queue_priority_hint <= last_prio)
 376                 return false;
 377
 378         /*
 379          * Check against the first request in ELSP[1], it will, thanks to the
 380          * power of PI, be the highest priority of that context.
 381          */
 382         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 383             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 384                 return true;
 385
 386         if (rb) {
 387                 struct virtual_engine *ve =
 388                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 389                 bool preempt = false;
 390
 391                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 392                         struct i915_request *next;
 393
 394                         rcu_read_lock();
 395                         next = READ_ONCE(ve->request);
 396                         if (next)
 397                                 preempt = rq_prio(next) > last_prio;
 398                         rcu_read_unlock();
 399                 }
 400
 401                 if (preempt)
 402                         return preempt;
 403         }
 404
 405         /*
 406          * If the inflight context did not trigger the preemption, then maybe
 407          * it was the set of queued requests? Pick the highest priority in
 408          * the queue (the first active priolist) and see if it deserves to be
 409          * running instead of ELSP[0].
 410          *
 411          * The highest priority request in the queue can not be either
 412          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 413          * context, it's priority would not exceed ELSP[0] aka last_prio.
 414          */
 415         return queue_prio(&engine->execlists) > last_prio;
 416 }
 417
 418 __maybe_unused static inline bool
 419 assert_priority_queue(const struct i915_request *prev,
 420                       const struct i915_request *next)
 421 {
 422         /*
 423          * Without preemption, the prev may refer to the still active element
 424          * which we refuse to let go.
 425          *
 426          * Even with preemption, there are times when we think it is better not
 427          * to preempt and leave an ostensibly lower priority request in flight.
 428          */
 429         if (i915_request_is_active(prev))
 430                 return true;
 431
 432         return rq_prio(prev) >= rq_prio(next);
 433 }
 434
 435 /*
 436  * The context descriptor encodes various attributes of a context,
 437  * including its GTT address and some flags. Because it's fairly
 438  * expensive to calculate, we'll just do it once and cache the result,
 439  * which remains valid until the context is unpinned.
 440  *
 441  * This is what a descriptor looks like, from LSB to MSB::
 442  *
 443  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 444  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 445  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 446  *      bits 53-54:    mbz, reserved for use by hardware
 447  *      bits 55-63:    group ID, currently unused and set to 0
 448  *
 449  * Starting from Gen11, the upper dword of the descriptor has a new format:
 450  *
 451  *      bits 32-36:    reserved
 452  *      bits 37-47:    SW context ID
 453  *      bits 48:53:    engine instance
 454  *      bit 54:        mbz, reserved for use by hardware
 455  *      bits 55-60:    SW counter
 456  *      bits 61-63:    engine class
 457  *
 458  * engine info, SW context ID and SW counter need to form a unique number
 459  * (Context ID) per lrc.
 460  */
 461 static u64
 462 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 463 {
 464         u64 desc;
 465
 466         desc = INTEL_LEGACY_32B_CONTEXT;
 467         if (i915_vm_is_4lvl(ce->vm))
 468                 desc = INTEL_LEGACY_64B_CONTEXT;
 469         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 470
 471         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 472         if (IS_GEN(engine->i915, 8))
 473                 desc |= GEN8_CTX_L3LLC_COHERENT;
 474
 475         desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
 476         /*
 477          * The following 32bits are copied into the OA reports (dword 2).
 478          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 479          * anything below.
 480          */
 481         if (INTEL_GEN(engine->i915) >= 11) {
 482                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 483                                                                 /* bits 48-53 */
 484
 485                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 486                                                                 /* bits 61-63 */
 487         }
 488
 489         return desc;
 490 }
 491
 492 static inline unsigned int dword_in_page(void *addr)
 493 {
 494         return offset_in_page(addr) / sizeof(u32);
 495 }
 496
 497 static void set_offsets(u32 *regs,
 498                         const u8 *data,
 499                         const struct intel_engine_cs *engine,
 500                         bool clear)
 501 #define NOP(x) (BIT(7) | (x))
 502 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 503 #define POSTED BIT(0)
 504 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 505 #define REG16(x) \
 506         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 507         (((x) >> 2) & 0x7f)
 508 #define END(x) 0, (x)
 509 {
 510         const u32 base = engine->mmio_base;
 511
 512         while (*data) {
 513                 u8 count, flags;
 514
 515                 if (*data & BIT(7)) { /* skip */
 516                         count = *data++ & ~BIT(7);
 517                         if (clear)
 518                                 memset32(regs, MI_NOOP, count);
 519                         regs += count;
 520                         continue;
 521                 }
 522
 523                 count = *data & 0x3f;
 524                 flags = *data >> 6;
 525                 data++;
 526
 527                 *regs = MI_LOAD_REGISTER_IMM(count);
 528                 if (flags & POSTED)
 529                         *regs |= MI_LRI_FORCE_POSTED;
 530                 if (INTEL_GEN(engine->i915) >= 11)
 531                         *regs |= MI_LRI_CS_MMIO;
 532                 regs++;
 533
 534                 GEM_BUG_ON(!count);
 535                 do {
 536                         u32 offset = 0;
 537                         u8 v;
 538
 539                         do {
 540                                 v = *data++;
 541                                 offset <<= 7;
 542                                 offset |= v & ~BIT(7);
 543                         } while (v & BIT(7));
 544
 545                         regs[0] = base + (offset << 2);
 546                         if (clear)
 547                                 regs[1] = 0;
 548                         regs += 2;
 549                 } while (--count);
 550         }
 551
 552         if (clear) {
 553                 u8 count = *++data;
 554
 555                 /* Clear past the tail for HW access */
 556                 GEM_BUG_ON(dword_in_page(regs) > count);
 557                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 558
 559                 /* Close the batch; used mainly by live_lrc_layout() */
 560                 *regs = MI_BATCH_BUFFER_END;
 561                 if (INTEL_GEN(engine->i915) >= 10)
 562                         *regs |= BIT(0);
 563         }
 564 }
 565
 566 static const u8 gen8_xcs_offsets[] = {
 567         NOP(1),
 568         LRI(11, 0),
 569         REG16(0x244),
 570         REG(0x034),
 571         REG(0x030),
 572         REG(0x038),
 573         REG(0x03c),
 574         REG(0x168),
 575         REG(0x140),
 576         REG(0x110),
 577         REG(0x11c),
 578         REG(0x114),
 579         REG(0x118),
 580
 581         NOP(9),
 582         LRI(9, 0),
 583         REG16(0x3a8),
 584         REG16(0x28c),
 585         REG16(0x288),
 586         REG16(0x284),
 587         REG16(0x280),
 588         REG16(0x27c),
 589         REG16(0x278),
 590         REG16(0x274),
 591         REG16(0x270),
 592
 593         NOP(13),
 594         LRI(2, 0),
 595         REG16(0x200),
 596         REG(0x028),
 597
 598         END(80)
 599 };
 600
 601 static const u8 gen9_xcs_offsets[] = {
 602         NOP(1),
 603         LRI(14, POSTED),
 604         REG16(0x244),
 605         REG(0x034),
 606         REG(0x030),
 607         REG(0x038),
 608         REG(0x03c),
 609         REG(0x168),
 610         REG(0x140),
 611         REG(0x110),
 612         REG(0x11c),
 613         REG(0x114),
 614         REG(0x118),
 615         REG(0x1c0),
 616         REG(0x1c4),
 617         REG(0x1c8),
 618
 619         NOP(3),
 620         LRI(9, POSTED),
 621         REG16(0x3a8),
 622         REG16(0x28c),
 623         REG16(0x288),
 624         REG16(0x284),
 625         REG16(0x280),
 626         REG16(0x27c),
 627         REG16(0x278),
 628         REG16(0x274),
 629         REG16(0x270),
 630
 631         NOP(13),
 632         LRI(1, POSTED),
 633         REG16(0x200),
 634
 635         NOP(13),
 636         LRI(44, POSTED),
 637         REG(0x028),
 638         REG(0x09c),
 639         REG(0x0c0),
 640         REG(0x178),
 641         REG(0x17c),
 642         REG16(0x358),
 643         REG(0x170),
 644         REG(0x150),
 645         REG(0x154),
 646         REG(0x158),
 647         REG16(0x41c),
 648         REG16(0x600),
 649         REG16(0x604),
 650         REG16(0x608),
 651         REG16(0x60c),
 652         REG16(0x610),
 653         REG16(0x614),
 654         REG16(0x618),
 655         REG16(0x61c),
 656         REG16(0x620),
 657         REG16(0x624),
 658         REG16(0x628),
 659         REG16(0x62c),
 660         REG16(0x630),
 661         REG16(0x634),
 662         REG16(0x638),
 663         REG16(0x63c),
 664         REG16(0x640),
 665         REG16(0x644),
 666         REG16(0x648),
 667         REG16(0x64c),
 668         REG16(0x650),
 669         REG16(0x654),
 670         REG16(0x658),
 671         REG16(0x65c),
 672         REG16(0x660),
 673         REG16(0x664),
 674         REG16(0x668),
 675         REG16(0x66c),
 676         REG16(0x670),
 677         REG16(0x674),
 678         REG16(0x678),
 679         REG16(0x67c),
 680         REG(0x068),
 681
 682         END(176)
 683 };
 684
 685 static const u8 gen12_xcs_offsets[] = {
 686         NOP(1),
 687         LRI(13, POSTED),
 688         REG16(0x244),
 689         REG(0x034),
 690         REG(0x030),
 691         REG(0x038),
 692         REG(0x03c),
 693         REG(0x168),
 694         REG(0x140),
 695         REG(0x110),
 696         REG(0x1c0),
 697         REG(0x1c4),
 698         REG(0x1c8),
 699         REG(0x180),
 700         REG16(0x2b4),
 701
 702         NOP(5),
 703         LRI(9, POSTED),
 704         REG16(0x3a8),
 705         REG16(0x28c),
 706         REG16(0x288),
 707         REG16(0x284),
 708         REG16(0x280),
 709         REG16(0x27c),
 710         REG16(0x278),
 711         REG16(0x274),
 712         REG16(0x270),
 713
 714         END(80)
 715 };
 716
 717 static const u8 gen8_rcs_offsets[] = {
 718         NOP(1),
 719         LRI(14, POSTED),
 720         REG16(0x244),
 721         REG(0x034),
 722         REG(0x030),
 723         REG(0x038),
 724         REG(0x03c),
 725         REG(0x168),
 726         REG(0x140),
 727         REG(0x110),
 728         REG(0x11c),
 729         REG(0x114),
 730         REG(0x118),
 731         REG(0x1c0),
 732         REG(0x1c4),
 733         REG(0x1c8),
 734
 735         NOP(3),
 736         LRI(9, POSTED),
 737         REG16(0x3a8),
 738         REG16(0x28c),
 739         REG16(0x288),
 740         REG16(0x284),
 741         REG16(0x280),
 742         REG16(0x27c),
 743         REG16(0x278),
 744         REG16(0x274),
 745         REG16(0x270),
 746
 747         NOP(13),
 748         LRI(1, 0),
 749         REG(0x0c8),
 750
 751         END(80)
 752 };
 753
 754 static const u8 gen9_rcs_offsets[] = {
 755         NOP(1),
 756         LRI(14, POSTED),
 757         REG16(0x244),
 758         REG(0x34),
 759         REG(0x30),
 760         REG(0x38),
 761         REG(0x3c),
 762         REG(0x168),
 763         REG(0x140),
 764         REG(0x110),
 765         REG(0x11c),
 766         REG(0x114),
 767         REG(0x118),
 768         REG(0x1c0),
 769         REG(0x1c4),
 770         REG(0x1c8),
 771
 772         NOP(3),
 773         LRI(9, POSTED),
 774         REG16(0x3a8),
 775         REG16(0x28c),
 776         REG16(0x288),
 777         REG16(0x284),
 778         REG16(0x280),
 779         REG16(0x27c),
 780         REG16(0x278),
 781         REG16(0x274),
 782         REG16(0x270),
 783
 784         NOP(13),
 785         LRI(1, 0),
 786         REG(0xc8),
 787
 788         NOP(13),
 789         LRI(44, POSTED),
 790         REG(0x28),
 791         REG(0x9c),
 792         REG(0xc0),
 793         REG(0x178),
 794         REG(0x17c),
 795         REG16(0x358),
 796         REG(0x170),
 797         REG(0x150),
 798         REG(0x154),
 799         REG(0x158),
 800         REG16(0x41c),
 801         REG16(0x600),
 802         REG16(0x604),
 803         REG16(0x608),
 804         REG16(0x60c),
 805         REG16(0x610),
 806         REG16(0x614),
 807         REG16(0x618),
 808         REG16(0x61c),
 809         REG16(0x620),
 810         REG16(0x624),
 811         REG16(0x628),
 812         REG16(0x62c),
 813         REG16(0x630),
 814         REG16(0x634),
 815         REG16(0x638),
 816         REG16(0x63c),
 817         REG16(0x640),
 818         REG16(0x644),
 819         REG16(0x648),
 820         REG16(0x64c),
 821         REG16(0x650),
 822         REG16(0x654),
 823         REG16(0x658),
 824         REG16(0x65c),
 825         REG16(0x660),
 826         REG16(0x664),
 827         REG16(0x668),
 828         REG16(0x66c),
 829         REG16(0x670),
 830         REG16(0x674),
 831         REG16(0x678),
 832         REG16(0x67c),
 833         REG(0x68),
 834
 835         END(176)
 836 };
 837
 838 static const u8 gen11_rcs_offsets[] = {
 839         NOP(1),
 840         LRI(15, POSTED),
 841         REG16(0x244),
 842         REG(0x034),
 843         REG(0x030),
 844         REG(0x038),
 845         REG(0x03c),
 846         REG(0x168),
 847         REG(0x140),
 848         REG(0x110),
 849         REG(0x11c),
 850         REG(0x114),
 851         REG(0x118),
 852         REG(0x1c0),
 853         REG(0x1c4),
 854         REG(0x1c8),
 855         REG(0x180),
 856
 857         NOP(1),
 858         LRI(9, POSTED),
 859         REG16(0x3a8),
 860         REG16(0x28c),
 861         REG16(0x288),
 862         REG16(0x284),
 863         REG16(0x280),
 864         REG16(0x27c),
 865         REG16(0x278),
 866         REG16(0x274),
 867         REG16(0x270),
 868
 869         LRI(1, POSTED),
 870         REG(0x1b0),
 871
 872         NOP(10),
 873         LRI(1, 0),
 874         REG(0x0c8),
 875
 876         END(80)
 877 };
 878
 879 static const u8 gen12_rcs_offsets[] = {
 880         NOP(1),
 881         LRI(13, POSTED),
 882         REG16(0x244),
 883         REG(0x034),
 884         REG(0x030),
 885         REG(0x038),
 886         REG(0x03c),
 887         REG(0x168),
 888         REG(0x140),
 889         REG(0x110),
 890         REG(0x1c0),
 891         REG(0x1c4),
 892         REG(0x1c8),
 893         REG(0x180),
 894         REG16(0x2b4),
 895
 896         NOP(5),
 897         LRI(9, POSTED),
 898         REG16(0x3a8),
 899         REG16(0x28c),
 900         REG16(0x288),
 901         REG16(0x284),
 902         REG16(0x280),
 903         REG16(0x27c),
 904         REG16(0x278),
 905         REG16(0x274),
 906         REG16(0x270),
 907
 908         LRI(3, POSTED),
 909         REG(0x1b0),
 910         REG16(0x5a8),
 911         REG16(0x5ac),
 912
 913         NOP(6),
 914         LRI(1, 0),
 915         REG(0x0c8),
 916
 917         END(80)
 918 };
 919
 920 #undef END
 921 #undef REG16
 922 #undef REG
 923 #undef LRI
 924 #undef NOP
 925
 926 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 927 {
 928         /*
 929          * The gen12+ lists only have the registers we program in the basic
 930          * default state. We rely on the context image using relative
 931          * addressing to automatic fixup the register state between the
 932          * physical engines for virtual engine.
 933          */
 934         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
 935                    !intel_engine_has_relative_mmio(engine));
 936
 937         if (engine->class == RENDER_CLASS) {
 938                 if (INTEL_GEN(engine->i915) >= 12)
 939                         return gen12_rcs_offsets;
 940                 else if (INTEL_GEN(engine->i915) >= 11)
 941                         return gen11_rcs_offsets;
 942                 else if (INTEL_GEN(engine->i915) >= 9)
 943                         return gen9_rcs_offsets;
 944                 else
 945                         return gen8_rcs_offsets;
 946         } else {
 947                 if (INTEL_GEN(engine->i915) >= 12)
 948                         return gen12_xcs_offsets;
 949                 else if (INTEL_GEN(engine->i915) >= 9)
 950                         return gen9_xcs_offsets;
 951                 else
 952                         return gen8_xcs_offsets;
 953         }
 954 }
 955
 956 static struct i915_request *
 957 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 958 {
 959         struct i915_request *rq, *rn, *active = NULL;
 960         struct list_head *uninitialized_var(pl);
 961         int prio = I915_PRIORITY_INVALID;
 962
 963         lockdep_assert_held(&engine->active.lock);
 964
 965         list_for_each_entry_safe_reverse(rq, rn,
 966                                          &engine->active.requests,
 967                                          sched.link) {
 968                 if (i915_request_completed(rq))
 969                         continue; /* XXX */
 970
 971                 __i915_request_unsubmit(rq);
 972
 973                 /*
 974                  * Push the request back into the queue for later resubmission.
 975                  * If this request is not native to this physical engine (i.e.
 976                  * it came from a virtual source), push it back onto the virtual
 977                  * engine so that it can be moved across onto another physical
 978                  * engine as load dictates.
 979                  */
 980                 if (likely(rq->execution_mask == engine->mask)) {
 981                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 982                         if (rq_prio(rq) != prio) {
 983                                 prio = rq_prio(rq);
 984                                 pl = i915_sched_lookup_priolist(engine, prio);
 985                         }
 986                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 987
 988                         list_move(&rq->sched.link, pl);
 989                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 990
 991                         active = rq;
 992                 } else {
 993                         struct intel_engine_cs *owner = rq->context->engine;
 994
 995                         /*
 996                          * Decouple the virtual breadcrumb before moving it
 997                          * back to the virtual engine -- we don't want the
 998                          * request to complete in the background and try
 999                          * and cancel the breadcrumb on the virtual engine
1000                          * (instead of the old engine where it is linked)!
1001                          */
1002                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1003                                      &rq->fence.flags)) {
1004                                 spin_lock_nested(&rq->lock,
1005                                                  SINGLE_DEPTH_NESTING);
1006                                 i915_request_cancel_breadcrumb(rq);
1007                                 spin_unlock(&rq->lock);
1008                         }
1009                         rq->engine = owner;
1010                         owner->submit_request(rq);
1011                         active = NULL;
1012                 }
1013         }
1014
1015         return active;
1016 }
1017
1018 struct i915_request *
1019 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1020 {
1021         struct intel_engine_cs *engine =
1022                 container_of(execlists, typeof(*engine), execlists);
1023
1024         return __unwind_incomplete_requests(engine);
1025 }
1026
1027 static inline void
1028 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1029 {
1030         /*
1031          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1032          * The compiler should eliminate this function as dead-code.
1033          */
1034         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1035                 return;
1036
1037         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1038                                    status, rq);
1039 }
1040
1041 static void intel_engine_context_in(struct intel_engine_cs *engine)
1042 {
1043         unsigned long flags;
1044
1045         if (READ_ONCE(engine->stats.enabled) == 0)
1046                 return;
1047
1048         write_seqlock_irqsave(&engine->stats.lock, flags);
1049
1050         if (engine->stats.enabled > 0) {
1051                 if (engine->stats.active++ == 0)
1052                         engine->stats.start = ktime_get();
1053                 GEM_BUG_ON(engine->stats.active == 0);
1054         }
1055
1056         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1057 }
1058
1059 static void intel_engine_context_out(struct intel_engine_cs *engine)
1060 {
1061         unsigned long flags;
1062
1063         if (READ_ONCE(engine->stats.enabled) == 0)
1064                 return;
1065
1066         write_seqlock_irqsave(&engine->stats.lock, flags);
1067
1068         if (engine->stats.enabled > 0) {
1069                 ktime_t last;
1070
1071                 if (engine->stats.active && --engine->stats.active == 0) {
1072                         /*
1073                          * Decrement the active context count and in case GPU
1074                          * is now idle add up to the running total.
1075                          */
1076                         last = ktime_sub(ktime_get(), engine->stats.start);
1077
1078                         engine->stats.total = ktime_add(engine->stats.total,
1079                                                         last);
1080                 } else if (engine->stats.active == 0) {
1081                         /*
1082                          * After turning on engine stats, context out might be
1083                          * the first event in which case we account from the
1084                          * time stats gathering was turned on.
1085                          */
1086                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1087
1088                         engine->stats.total = ktime_add(engine->stats.total,
1089                                                         last);
1090                 }
1091         }
1092
1093         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1094 }
1095
1096 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1097 {
1098         if (INTEL_GEN(engine->i915) >= 12)
1099                 return 0x60;
1100         else if (INTEL_GEN(engine->i915) >= 9)
1101                 return 0x54;
1102         else if (engine->class == RENDER_CLASS)
1103                 return 0x58;
1104         else
1105                 return -1;
1106 }
1107
1108 static void
1109 execlists_check_context(const struct intel_context *ce,
1110                         const struct intel_engine_cs *engine)
1111 {
1112         const struct intel_ring *ring = ce->ring;
1113         u32 *regs = ce->lrc_reg_state;
1114         bool valid = true;
1115         int x;
1116
1117         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1118                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1119                        engine->name,
1120                        regs[CTX_RING_START],
1121                        i915_ggtt_offset(ring->vma));
1122                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1123                 valid = false;
1124         }
1125
1126         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1127             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1128                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1129                        engine->name,
1130                        regs[CTX_RING_CTL],
1131                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1132                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1133                 valid = false;
1134         }
1135
1136         x = lrc_ring_mi_mode(engine);
1137         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1138                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1139                        engine->name, regs[x + 1]);
1140                 regs[x + 1] &= ~STOP_RING;
1141                 regs[x + 1] |= STOP_RING << 16;
1142                 valid = false;
1143         }
1144
1145         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1146 }
1147
1148 static void restore_default_state(struct intel_context *ce,
1149                                   struct intel_engine_cs *engine)
1150 {
1151         u32 *regs = ce->lrc_reg_state;
1152
1153         if (engine->pinned_default_state)
1154                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
1155                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1156                        engine->context_size - PAGE_SIZE);
1157
1158         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1159 }
1160
1161 static void reset_active(struct i915_request *rq,
1162                          struct intel_engine_cs *engine)
1163 {
1164         struct intel_context * const ce = rq->context;
1165         u32 head;
1166
1167         /*
1168          * The executing context has been cancelled. We want to prevent
1169          * further execution along this context and propagate the error on
1170          * to anything depending on its results.
1171          *
1172          * In __i915_request_submit(), we apply the -EIO and remove the
1173          * requests' payloads for any banned requests. But first, we must
1174          * rewind the context back to the start of the incomplete request so
1175          * that we do not jump back into the middle of the batch.
1176          *
1177          * We preserve the breadcrumbs and semaphores of the incomplete
1178          * requests so that inter-timeline dependencies (i.e other timelines)
1179          * remain correctly ordered. And we defer to __i915_request_submit()
1180          * so that all asynchronous waits are correctly handled.
1181          */
1182         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1183                      rq->fence.context, rq->fence.seqno);
1184
1185         /* On resubmission of the active request, payload will be scrubbed */
1186         if (i915_request_completed(rq))
1187                 head = rq->tail;
1188         else
1189                 head = active_request(ce->timeline, rq)->head;
1190         head = intel_ring_wrap(ce->ring, head);
1191
1192         /* Scrub the context image to prevent replaying the previous batch */
1193         restore_default_state(ce, engine);
1194         __execlists_update_reg_state(ce, engine, head);
1195
1196         /* We've switched away, so this should be a no-op, but intent matters */
1197         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1198 }
1199
1200 static inline struct intel_engine_cs *
1201 __execlists_schedule_in(struct i915_request *rq)
1202 {
1203         struct intel_engine_cs * const engine = rq->engine;
1204         struct intel_context * const ce = rq->context;
1205
1206         intel_context_get(ce);
1207
1208         if (unlikely(intel_context_is_banned(ce)))
1209                 reset_active(rq, engine);
1210
1211         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1212                 execlists_check_context(ce, engine);
1213
1214         if (ce->tag) {
1215                 /* Use a fixed tag for OA and friends */
1216                 ce->lrc_desc |= (u64)ce->tag << 32;
1217         } else {
1218                 /* We don't need a strict matching tag, just different values */
1219                 ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1220                 ce->lrc_desc |=
1221                         (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1222                         GEN11_SW_CTX_ID_SHIFT;
1223                 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1224         }
1225
1226         __intel_gt_pm_get(engine->gt);
1227         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1228         intel_engine_context_in(engine);
1229
1230         return engine;
1231 }
1232
1233 static inline struct i915_request *
1234 execlists_schedule_in(struct i915_request *rq, int idx)
1235 {
1236         struct intel_context * const ce = rq->context;
1237         struct intel_engine_cs *old;
1238
1239         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1240         trace_i915_request_in(rq, idx);
1241
1242         old = READ_ONCE(ce->inflight);
1243         do {
1244                 if (!old) {
1245                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1246                         break;
1247                 }
1248         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1249
1250         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1251         return i915_request_get(rq);
1252 }
1253
1254 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1255 {
1256         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1257         struct i915_request *next = READ_ONCE(ve->request);
1258
1259         if (next && next->execution_mask & ~rq->execution_mask)
1260                 tasklet_schedule(&ve->base.execlists.tasklet);
1261 }
1262
1263 static inline void
1264 __execlists_schedule_out(struct i915_request *rq,
1265                          struct intel_engine_cs * const engine)
1266 {
1267         struct intel_context * const ce = rq->context;
1268
1269         /*
1270          * NB process_csb() is not under the engine->active.lock and hence
1271          * schedule_out can race with schedule_in meaning that we should
1272          * refrain from doing non-trivial work here.
1273          */
1274
1275         /*
1276          * If we have just completed this context, the engine may now be
1277          * idle and we want to re-enter powersaving.
1278          */
1279         if (list_is_last(&rq->link, &ce->timeline->requests) &&
1280             i915_request_completed(rq))
1281                 intel_engine_add_retire(engine, ce->timeline);
1282
1283         intel_engine_context_out(engine);
1284         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1285         intel_gt_pm_put_async(engine->gt);
1286
1287         /*
1288          * If this is part of a virtual engine, its next request may
1289          * have been blocked waiting for access to the active context.
1290          * We have to kick all the siblings again in case we need to
1291          * switch (e.g. the next request is not runnable on this
1292          * engine). Hopefully, we will already have submitted the next
1293          * request before the tasklet runs and do not need to rebuild
1294          * each virtual tree and kick everyone again.
1295          */
1296         if (ce->engine != engine)
1297                 kick_siblings(rq, ce);
1298
1299         intel_context_put(ce);
1300 }
1301
1302 static inline void
1303 execlists_schedule_out(struct i915_request *rq)
1304 {
1305         struct intel_context * const ce = rq->context;
1306         struct intel_engine_cs *cur, *old;
1307
1308         trace_i915_request_out(rq);
1309
1310         old = READ_ONCE(ce->inflight);
1311         do
1312                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1313         while (!try_cmpxchg(&ce->inflight, &old, cur));
1314         if (!cur)
1315                 __execlists_schedule_out(rq, old);
1316
1317         i915_request_put(rq);
1318 }
1319
1320 static u64 execlists_update_context(struct i915_request *rq)
1321 {
1322         struct intel_context *ce = rq->context;
1323         u64 desc = ce->lrc_desc;
1324         u32 tail, prev;
1325
1326         /*
1327          * WaIdleLiteRestore:bdw,skl
1328          *
1329          * We should never submit the context with the same RING_TAIL twice
1330          * just in case we submit an empty ring, which confuses the HW.
1331          *
1332          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1333          * the normal request to be able to always advance the RING_TAIL on
1334          * subsequent resubmissions (for lite restore). Should that fail us,
1335          * and we try and submit the same tail again, force the context
1336          * reload.
1337          *
1338          * If we need to return to a preempted context, we need to skip the
1339          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1340          * HW has a tendency to ignore us rewinding the TAIL to the end of
1341          * an earlier request.
1342          */
1343         tail = intel_ring_set_tail(rq->ring, rq->tail);
1344         prev = ce->lrc_reg_state[CTX_RING_TAIL];
1345         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1346                 desc |= CTX_DESC_FORCE_RESTORE;
1347         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1348         rq->tail = rq->wa_tail;
1349
1350         /*
1351          * Make sure the context image is complete before we submit it to HW.
1352          *
1353          * Ostensibly, writes (including the WCB) should be flushed prior to
1354          * an uncached write such as our mmio register access, the empirical
1355          * evidence (esp. on Braswell) suggests that the WC write into memory
1356          * may not be visible to the HW prior to the completion of the UC
1357          * register write and that we may begin execution from the context
1358          * before its image is complete leading to invalid PD chasing.
1359          */
1360         wmb();
1361
1362         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1363         return desc;
1364 }
1365
1366 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1367 {
1368         if (execlists->ctrl_reg) {
1369                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1370                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1371         } else {
1372                 writel(upper_32_bits(desc), execlists->submit_reg);
1373                 writel(lower_32_bits(desc), execlists->submit_reg);
1374         }
1375 }
1376
1377 static __maybe_unused void
1378 trace_ports(const struct intel_engine_execlists *execlists,
1379             const char *msg,
1380             struct i915_request * const *ports)
1381 {
1382         const struct intel_engine_cs *engine =
1383                 container_of(execlists, typeof(*engine), execlists);
1384
1385         if (!ports[0])
1386                 return;
1387
1388         ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1389                      ports[0]->fence.context,
1390                      ports[0]->fence.seqno,
1391                      i915_request_completed(ports[0]) ? "!" :
1392                      i915_request_started(ports[0]) ? "*" :
1393                      "",
1394                      ports[1] ? ports[1]->fence.context : 0,
1395                      ports[1] ? ports[1]->fence.seqno : 0);
1396 }
1397
1398 static __maybe_unused bool
1399 assert_pending_valid(const struct intel_engine_execlists *execlists,
1400                      const char *msg)
1401 {
1402         struct i915_request * const *port, *rq;
1403         struct intel_context *ce = NULL;
1404
1405         trace_ports(execlists, msg, execlists->pending);
1406
1407         if (!execlists->pending[0]) {
1408                 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1409                 return false;
1410         }
1411
1412         if (execlists->pending[execlists_num_ports(execlists)]) {
1413                 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1414                               execlists_num_ports(execlists));
1415                 return false;
1416         }
1417
1418         for (port = execlists->pending; (rq = *port); port++) {
1419                 unsigned long flags;
1420                 bool ok = true;
1421
1422                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1423                 GEM_BUG_ON(!i915_request_is_active(rq));
1424
1425                 if (ce == rq->context) {
1426                         GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1427                                       ce->timeline->fence_context,
1428                                       port - execlists->pending);
1429                         return false;
1430                 }
1431                 ce = rq->context;
1432
1433                 /* Hold tightly onto the lock to prevent concurrent retires! */
1434                 if (!spin_trylock_irqsave(&rq->lock, flags))
1435                         continue;
1436
1437                 if (i915_request_completed(rq))
1438                         goto unlock;
1439
1440                 if (i915_active_is_idle(&ce->active) &&
1441                     !intel_context_is_barrier(ce)) {
1442                         GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1443                                       ce->timeline->fence_context,
1444                                       port - execlists->pending);
1445                         ok = false;
1446                         goto unlock;
1447                 }
1448
1449                 if (!i915_vma_is_pinned(ce->state)) {
1450                         GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1451                                       ce->timeline->fence_context,
1452                                       port - execlists->pending);
1453                         ok = false;
1454                         goto unlock;
1455                 }
1456
1457                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1458                         GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1459                                       ce->timeline->fence_context,
1460                                       port - execlists->pending);
1461                         ok = false;
1462                         goto unlock;
1463                 }
1464
1465 unlock:
1466                 spin_unlock_irqrestore(&rq->lock, flags);
1467                 if (!ok)
1468                         return false;
1469         }
1470
1471         return ce;
1472 }
1473
1474 static void execlists_submit_ports(struct intel_engine_cs *engine)
1475 {
1476         struct intel_engine_execlists *execlists = &engine->execlists;
1477         unsigned int n;
1478
1479         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1480
1481         /*
1482          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1483          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1484          * not be relinquished until the device is idle (see
1485          * i915_gem_idle_work_handler()). As a precaution, we make sure
1486          * that all ELSP are drained i.e. we have processed the CSB,
1487          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1488          */
1489         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1490
1491         /*
1492          * ELSQ note: the submit queue is not cleared after being submitted
1493          * to the HW so we need to make sure we always clean it up. This is
1494          * currently ensured by the fact that we always write the same number
1495          * of elsq entries, keep this in mind before changing the loop below.
1496          */
1497         for (n = execlists_num_ports(execlists); n--; ) {
1498                 struct i915_request *rq = execlists->pending[n];
1499
1500                 write_desc(execlists,
1501                            rq ? execlists_update_context(rq) : 0,
1502                            n);
1503         }
1504
1505         /* we need to manually load the submit queue */
1506         if (execlists->ctrl_reg)
1507                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1508 }
1509
1510 static bool ctx_single_port_submission(const struct intel_context *ce)
1511 {
1512         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1513                 intel_context_force_single_submission(ce));
1514 }
1515
1516 static bool can_merge_ctx(const struct intel_context *prev,
1517                           const struct intel_context *next)
1518 {
1519         if (prev != next)
1520                 return false;
1521
1522         if (ctx_single_port_submission(prev))
1523                 return false;
1524
1525         return true;
1526 }
1527
1528 static bool can_merge_rq(const struct i915_request *prev,
1529                          const struct i915_request *next)
1530 {
1531         GEM_BUG_ON(prev == next);
1532         GEM_BUG_ON(!assert_priority_queue(prev, next));
1533
1534         /*
1535          * We do not submit known completed requests. Therefore if the next
1536          * request is already completed, we can pretend to merge it in
1537          * with the previous context (and we will skip updating the ELSP
1538          * and tracking). Thus hopefully keeping the ELSP full with active
1539          * contexts, despite the best efforts of preempt-to-busy to confuse
1540          * us.
1541          */
1542         if (i915_request_completed(next))
1543                 return true;
1544
1545         if (unlikely((prev->fence.flags ^ next->fence.flags) &
1546                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1547                       BIT(I915_FENCE_FLAG_SENTINEL))))
1548                 return false;
1549
1550         if (!can_merge_ctx(prev->context, next->context))
1551                 return false;
1552
1553         return true;
1554 }
1555
1556 static void virtual_update_register_offsets(u32 *regs,
1557                                             struct intel_engine_cs *engine)
1558 {
1559         set_offsets(regs, reg_offsets(engine), engine, false);
1560 }
1561
1562 static bool virtual_matches(const struct virtual_engine *ve,
1563                             const struct i915_request *rq,
1564                             const struct intel_engine_cs *engine)
1565 {
1566         const struct intel_engine_cs *inflight;
1567
1568         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1569                 return false;
1570
1571         /*
1572          * We track when the HW has completed saving the context image
1573          * (i.e. when we have seen the final CS event switching out of
1574          * the context) and must not overwrite the context image before
1575          * then. This restricts us to only using the active engine
1576          * while the previous virtualized request is inflight (so
1577          * we reuse the register offsets). This is a very small
1578          * hystersis on the greedy seelction algorithm.
1579          */
1580         inflight = intel_context_inflight(&ve->context);
1581         if (inflight && inflight != engine)
1582                 return false;
1583
1584         return true;
1585 }
1586
1587 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1588                                      struct intel_engine_cs *engine)
1589 {
1590         struct intel_engine_cs *old = ve->siblings[0];
1591
1592         /* All unattached (rq->engine == old) must already be completed */
1593
1594         spin_lock(&old->breadcrumbs.irq_lock);
1595         if (!list_empty(&ve->context.signal_link)) {
1596                 list_move_tail(&ve->context.signal_link,
1597                                &engine->breadcrumbs.signalers);
1598                 intel_engine_signal_breadcrumbs(engine);
1599         }
1600         spin_unlock(&old->breadcrumbs.irq_lock);
1601 }
1602
1603 static struct i915_request *
1604 last_active(const struct intel_engine_execlists *execlists)
1605 {
1606         struct i915_request * const *last = READ_ONCE(execlists->active);
1607
1608         while (*last && i915_request_completed(*last))
1609                 last++;
1610
1611         return *last;
1612 }
1613
1614 #define for_each_waiter(p__, rq__) \
1615         list_for_each_entry_lockless(p__, \
1616                                      &(rq__)->sched.waiters_list, \
1617                                      wait_link)
1618
1619 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1620 {
1621         LIST_HEAD(list);
1622
1623         /*
1624          * We want to move the interrupted request to the back of
1625          * the round-robin list (i.e. its priority level), but
1626          * in doing so, we must then move all requests that were in
1627          * flight and were waiting for the interrupted request to
1628          * be run after it again.
1629          */
1630         do {
1631                 struct i915_dependency *p;
1632
1633                 GEM_BUG_ON(i915_request_is_active(rq));
1634                 list_move_tail(&rq->sched.link, pl);
1635
1636                 for_each_waiter(p, rq) {
1637                         struct i915_request *w =
1638                                 container_of(p->waiter, typeof(*w), sched);
1639
1640                         /* Leave semaphores spinning on the other engines */
1641                         if (w->engine != rq->engine)
1642                                 continue;
1643
1644                         /* No waiter should start before its signaler */
1645                         GEM_BUG_ON(i915_request_started(w) &&
1646                                    !i915_request_completed(rq));
1647
1648                         GEM_BUG_ON(i915_request_is_active(w));
1649                         if (!i915_request_is_ready(w))
1650                                 continue;
1651
1652                         if (rq_prio(w) < rq_prio(rq))
1653                                 continue;
1654
1655                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1656                         list_move_tail(&w->sched.link, &list);
1657                 }
1658
1659                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1660         } while (rq);
1661 }
1662
1663 static void defer_active(struct intel_engine_cs *engine)
1664 {
1665         struct i915_request *rq;
1666
1667         rq = __unwind_incomplete_requests(engine);
1668         if (!rq)
1669                 return;
1670
1671         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1672 }
1673
1674 static bool
1675 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1676 {
1677         int hint;
1678
1679         if (!intel_engine_has_timeslices(engine))
1680                 return false;
1681
1682         hint = engine->execlists.queue_priority_hint;
1683         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1684                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1685
1686         return hint >= effective_prio(rq);
1687 }
1688
1689 static int
1690 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1691 {
1692         if (list_is_last(&rq->sched.link, &engine->active.requests))
1693                 return INT_MIN;
1694
1695         return rq_prio(list_next_entry(rq, sched.link));
1696 }
1697
1698 static inline unsigned long
1699 timeslice(const struct intel_engine_cs *engine)
1700 {
1701         return READ_ONCE(engine->props.timeslice_duration_ms);
1702 }
1703
1704 static unsigned long
1705 active_timeslice(const struct intel_engine_cs *engine)
1706 {
1707         const struct i915_request *rq = *engine->execlists.active;
1708
1709         if (!rq || i915_request_completed(rq))
1710                 return 0;
1711
1712         if (engine->execlists.switch_priority_hint < effective_prio(rq))
1713                 return 0;
1714
1715         return timeslice(engine);
1716 }
1717
1718 static void set_timeslice(struct intel_engine_cs *engine)
1719 {
1720         if (!intel_engine_has_timeslices(engine))
1721                 return;
1722
1723         set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1724 }
1725
1726 static void start_timeslice(struct intel_engine_cs *engine)
1727 {
1728         struct intel_engine_execlists *execlists = &engine->execlists;
1729
1730         execlists->switch_priority_hint = execlists->queue_priority_hint;
1731
1732         if (timer_pending(&execlists->timer))
1733                 return;
1734
1735         set_timer_ms(&execlists->timer, timeslice(engine));
1736 }
1737
1738 static void record_preemption(struct intel_engine_execlists *execlists)
1739 {
1740         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1741 }
1742
1743 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
1744 {
1745         struct i915_request *rq;
1746
1747         rq = last_active(&engine->execlists);
1748         if (!rq)
1749                 return 0;
1750
1751         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1752         if (unlikely(intel_context_is_banned(rq->context)))
1753                 return 1;
1754
1755         return READ_ONCE(engine->props.preempt_timeout_ms);
1756 }
1757
1758 static void set_preempt_timeout(struct intel_engine_cs *engine)
1759 {
1760         if (!intel_engine_has_preempt_reset(engine))
1761                 return;
1762
1763         set_timer_ms(&engine->execlists.preempt,
1764                      active_preempt_timeout(engine));
1765 }
1766
1767 static inline void clear_ports(struct i915_request **ports, int count)
1768 {
1769         memset_p((void **)ports, NULL, count);
1770 }
1771
1772 static void execlists_dequeue(struct intel_engine_cs *engine)
1773 {
1774         struct intel_engine_execlists * const execlists = &engine->execlists;
1775         struct i915_request **port = execlists->pending;
1776         struct i915_request ** const last_port = port + execlists->port_mask;
1777         struct i915_request *last;
1778         struct rb_node *rb;
1779         bool submit = false;
1780
1781         /*
1782          * Hardware submission is through 2 ports. Conceptually each port
1783          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1784          * static for a context, and unique to each, so we only execute
1785          * requests belonging to a single context from each ring. RING_HEAD
1786          * is maintained by the CS in the context image, it marks the place
1787          * where it got up to last time, and through RING_TAIL we tell the CS
1788          * where we want to execute up to this time.
1789          *
1790          * In this list the requests are in order of execution. Consecutive
1791          * requests from the same context are adjacent in the ringbuffer. We
1792          * can combine these requests into a single RING_TAIL update:
1793          *
1794          *              RING_HEAD...req1...req2
1795          *                                    ^- RING_TAIL
1796          * since to execute req2 the CS must first execute req1.
1797          *
1798          * Our goal then is to point each port to the end of a consecutive
1799          * sequence of requests as being the most optimal (fewest wake ups
1800          * and context switches) submission.
1801          */
1802
1803         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1804                 struct virtual_engine *ve =
1805                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1806                 struct i915_request *rq = READ_ONCE(ve->request);
1807
1808                 if (!rq) { /* lazily cleanup after another engine handled rq */
1809                         rb_erase_cached(rb, &execlists->virtual);
1810                         RB_CLEAR_NODE(rb);
1811                         rb = rb_first_cached(&execlists->virtual);
1812                         continue;
1813                 }
1814
1815                 if (!virtual_matches(ve, rq, engine)) {
1816                         rb = rb_next(rb);
1817                         continue;
1818                 }
1819
1820                 break;
1821         }
1822
1823         /*
1824          * If the queue is higher priority than the last
1825          * request in the currently active context, submit afresh.
1826          * We will resubmit again afterwards in case we need to split
1827          * the active context to interject the preemption request,
1828          * i.e. we will retrigger preemption following the ack in case
1829          * of trouble.
1830          */
1831         last = last_active(execlists);
1832         if (last) {
1833                 if (need_preempt(engine, last, rb)) {
1834                         ENGINE_TRACE(engine,
1835                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1836                                      last->fence.context,
1837                                      last->fence.seqno,
1838                                      last->sched.attr.priority,
1839                                      execlists->queue_priority_hint);
1840                         record_preemption(execlists);
1841
1842                         /*
1843                          * Don't let the RING_HEAD advance past the breadcrumb
1844                          * as we unwind (and until we resubmit) so that we do
1845                          * not accidentally tell it to go backwards.
1846                          */
1847                         ring_set_paused(engine, 1);
1848
1849                         /*
1850                          * Note that we have not stopped the GPU at this point,
1851                          * so we are unwinding the incomplete requests as they
1852                          * remain inflight and so by the time we do complete
1853                          * the preemption, some of the unwound requests may
1854                          * complete!
1855                          */
1856                         __unwind_incomplete_requests(engine);
1857
1858                         last = NULL;
1859                 } else if (need_timeslice(engine, last) &&
1860                            timer_expired(&engine->execlists.timer)) {
1861                         ENGINE_TRACE(engine,
1862                                      "expired last=%llx:%lld, prio=%d, hint=%d\n",
1863                                      last->fence.context,
1864                                      last->fence.seqno,
1865                                      last->sched.attr.priority,
1866                                      execlists->queue_priority_hint);
1867
1868                         ring_set_paused(engine, 1);
1869                         defer_active(engine);
1870
1871                         /*
1872                          * Unlike for preemption, if we rewind and continue
1873                          * executing the same context as previously active,
1874                          * the order of execution will remain the same and
1875                          * the tail will only advance. We do not need to
1876                          * force a full context restore, as a lite-restore
1877                          * is sufficient to resample the monotonic TAIL.
1878                          *
1879                          * If we switch to any other context, similarly we
1880                          * will not rewind TAIL of current context, and
1881                          * normal save/restore will preserve state and allow
1882                          * us to later continue executing the same request.
1883                          */
1884                         last = NULL;
1885                 } else {
1886                         /*
1887                          * Otherwise if we already have a request pending
1888                          * for execution after the current one, we can
1889                          * just wait until the next CS event before
1890                          * queuing more. In either case we will force a
1891                          * lite-restore preemption event, but if we wait
1892                          * we hopefully coalesce several updates into a single
1893                          * submission.
1894                          */
1895                         if (!list_is_last(&last->sched.link,
1896                                           &engine->active.requests)) {
1897                                 /*
1898                                  * Even if ELSP[1] is occupied and not worthy
1899                                  * of timeslices, our queue might be.
1900                                  */
1901                                 start_timeslice(engine);
1902                                 return;
1903                         }
1904                 }
1905         }
1906
1907         while (rb) { /* XXX virtual is always taking precedence */
1908                 struct virtual_engine *ve =
1909                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1910                 struct i915_request *rq;
1911
1912                 spin_lock(&ve->base.active.lock);
1913
1914                 rq = ve->request;
1915                 if (unlikely(!rq)) { /* lost the race to a sibling */
1916                         spin_unlock(&ve->base.active.lock);
1917                         rb_erase_cached(rb, &execlists->virtual);
1918                         RB_CLEAR_NODE(rb);
1919                         rb = rb_first_cached(&execlists->virtual);
1920                         continue;
1921                 }
1922
1923                 GEM_BUG_ON(rq != ve->request);
1924                 GEM_BUG_ON(rq->engine != &ve->base);
1925                 GEM_BUG_ON(rq->context != &ve->context);
1926
1927                 if (rq_prio(rq) >= queue_prio(execlists)) {
1928                         if (!virtual_matches(ve, rq, engine)) {
1929                                 spin_unlock(&ve->base.active.lock);
1930                                 rb = rb_next(rb);
1931                                 continue;
1932                         }
1933
1934                         if (last && !can_merge_rq(last, rq)) {
1935                                 spin_unlock(&ve->base.active.lock);
1936                                 start_timeslice(engine);
1937                                 return; /* leave this for another sibling */
1938                         }
1939
1940                         ENGINE_TRACE(engine,
1941                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
1942                                      rq->fence.context,
1943                                      rq->fence.seqno,
1944                                      i915_request_completed(rq) ? "!" :
1945                                      i915_request_started(rq) ? "*" :
1946                                      "",
1947                                      yesno(engine != ve->siblings[0]));
1948
1949                         ve->request = NULL;
1950                         ve->base.execlists.queue_priority_hint = INT_MIN;
1951                         rb_erase_cached(rb, &execlists->virtual);
1952                         RB_CLEAR_NODE(rb);
1953
1954                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1955                         rq->engine = engine;
1956
1957                         if (engine != ve->siblings[0]) {
1958                                 u32 *regs = ve->context.lrc_reg_state;
1959                                 unsigned int n;
1960
1961                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1962
1963                                 if (!intel_engine_has_relative_mmio(engine))
1964                                         virtual_update_register_offsets(regs,
1965                                                                         engine);
1966
1967                                 if (!list_empty(&ve->context.signals))
1968                                         virtual_xfer_breadcrumbs(ve, engine);
1969
1970                                 /*
1971                                  * Move the bound engine to the top of the list
1972                                  * for future execution. We then kick this
1973                                  * tasklet first before checking others, so that
1974                                  * we preferentially reuse this set of bound
1975                                  * registers.
1976                                  */
1977                                 for (n = 1; n < ve->num_siblings; n++) {
1978                                         if (ve->siblings[n] == engine) {
1979                                                 swap(ve->siblings[n],
1980                                                      ve->siblings[0]);
1981                                                 break;
1982                                         }
1983                                 }
1984
1985                                 GEM_BUG_ON(ve->siblings[0] != engine);
1986                         }
1987
1988                         if (__i915_request_submit(rq)) {
1989                                 submit = true;
1990                                 last = rq;
1991                         }
1992                         i915_request_put(rq);
1993
1994                         /*
1995                          * Hmm, we have a bunch of virtual engine requests,
1996                          * but the first one was already completed (thanks
1997                          * preempt-to-busy!). Keep looking at the veng queue
1998                          * until we have no more relevant requests (i.e.
1999                          * the normal submit queue has higher priority).
2000                          */
2001                         if (!submit) {
2002                                 spin_unlock(&ve->base.active.lock);
2003                                 rb = rb_first_cached(&execlists->virtual);
2004                                 continue;
2005                         }
2006                 }
2007
2008                 spin_unlock(&ve->base.active.lock);
2009                 break;
2010         }
2011
2012         while ((rb = rb_first_cached(&execlists->queue))) {
2013                 struct i915_priolist *p = to_priolist(rb);
2014                 struct i915_request *rq, *rn;
2015                 int i;
2016
2017                 priolist_for_each_request_consume(rq, rn, p, i) {
2018                         bool merge = true;
2019
2020                         /*
2021                          * Can we combine this request with the current port?
2022                          * It has to be the same context/ringbuffer and not
2023                          * have any exceptions (e.g. GVT saying never to
2024                          * combine contexts).
2025                          *
2026                          * If we can combine the requests, we can execute both
2027                          * by updating the RING_TAIL to point to the end of the
2028                          * second request, and so we never need to tell the
2029                          * hardware about the first.
2030                          */
2031                         if (last && !can_merge_rq(last, rq)) {
2032                                 /*
2033                                  * If we are on the second port and cannot
2034                                  * combine this request with the last, then we
2035                                  * are done.
2036                                  */
2037                                 if (port == last_port)
2038                                         goto done;
2039
2040                                 /*
2041                                  * We must not populate both ELSP[] with the
2042                                  * same LRCA, i.e. we must submit 2 different
2043                                  * contexts if we submit 2 ELSP.
2044                                  */
2045                                 if (last->context == rq->context)
2046                                         goto done;
2047
2048                                 if (i915_request_has_sentinel(last))
2049                                         goto done;
2050
2051                                 /*
2052                                  * If GVT overrides us we only ever submit
2053                                  * port[0], leaving port[1] empty. Note that we
2054                                  * also have to be careful that we don't queue
2055                                  * the same context (even though a different
2056                                  * request) to the second port.
2057                                  */
2058                                 if (ctx_single_port_submission(last->context) ||
2059                                     ctx_single_port_submission(rq->context))
2060                                         goto done;
2061
2062                                 merge = false;
2063                         }
2064
2065                         if (__i915_request_submit(rq)) {
2066                                 if (!merge) {
2067                                         *port = execlists_schedule_in(last, port - execlists->pending);
2068                                         port++;
2069                                         last = NULL;
2070                                 }
2071
2072                                 GEM_BUG_ON(last &&
2073                                            !can_merge_ctx(last->context,
2074                                                           rq->context));
2075
2076                                 submit = true;
2077                                 last = rq;
2078                         }
2079                 }
2080
2081                 rb_erase_cached(&p->node, &execlists->queue);
2082                 i915_priolist_free(p);
2083         }
2084
2085 done:
2086         /*
2087          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2088          *
2089          * We choose the priority hint such that if we add a request of greater
2090          * priority than this, we kick the submission tasklet to decide on
2091          * the right order of submitting the requests to hardware. We must
2092          * also be prepared to reorder requests as they are in-flight on the
2093          * HW. We derive the priority hint then as the first "hole" in
2094          * the HW submission ports and if there are no available slots,
2095          * the priority of the lowest executing request, i.e. last.
2096          *
2097          * When we do receive a higher priority request ready to run from the
2098          * user, see queue_request(), the priority hint is bumped to that
2099          * request triggering preemption on the next dequeue (or subsequent
2100          * interrupt for secondary ports).
2101          */
2102         execlists->queue_priority_hint = queue_prio(execlists);
2103
2104         if (submit) {
2105                 *port = execlists_schedule_in(last, port - execlists->pending);
2106                 execlists->switch_priority_hint =
2107                         switch_prio(engine, *execlists->pending);
2108
2109                 /*
2110                  * Skip if we ended up with exactly the same set of requests,
2111                  * e.g. trying to timeslice a pair of ordered contexts
2112                  */
2113                 if (!memcmp(execlists->active, execlists->pending,
2114                             (port - execlists->pending + 1) * sizeof(*port))) {
2115                         do
2116                                 execlists_schedule_out(fetch_and_zero(port));
2117                         while (port-- != execlists->pending);
2118
2119                         goto skip_submit;
2120                 }
2121                 clear_ports(port + 1, last_port - port);
2122
2123                 execlists_submit_ports(engine);
2124                 set_preempt_timeout(engine);
2125         } else {
2126 skip_submit:
2127                 ring_set_paused(engine, 0);
2128         }
2129 }
2130
2131 static void
2132 cancel_port_requests(struct intel_engine_execlists * const execlists)
2133 {
2134         struct i915_request * const *port;
2135
2136         for (port = execlists->pending; *port; port++)
2137                 execlists_schedule_out(*port);
2138         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2139
2140         /* Mark the end of active before we overwrite *active */
2141         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2142                 execlists_schedule_out(*port);
2143         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2144
2145         WRITE_ONCE(execlists->active, execlists->inflight);
2146 }
2147
2148 static inline void
2149 invalidate_csb_entries(const u32 *first, const u32 *last)
2150 {
2151         clflush((void *)first);
2152         clflush((void *)last);
2153 }
2154
2155 static inline bool
2156 reset_in_progress(const struct intel_engine_execlists *execlists)
2157 {
2158         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
2159 }
2160
2161 /*
2162  * Starting with Gen12, the status has a new format:
2163  *
2164  *     bit  0:     switched to new queue
2165  *     bit  1:     reserved
2166  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2167  *                 switch detail is set to "wait on semaphore"
2168  *     bits 3-5:   engine class
2169  *     bits 6-11:  engine instance
2170  *     bits 12-14: reserved
2171  *     bits 15-25: sw context id of the lrc the GT switched to
2172  *     bits 26-31: sw counter of the lrc the GT switched to
2173  *     bits 32-35: context switch detail
2174  *                  - 0: ctx complete
2175  *                  - 1: wait on sync flip
2176  *                  - 2: wait on vblank
2177  *                  - 3: wait on scanline
2178  *                  - 4: wait on semaphore
2179  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2180  *                       WAIT_FOR_EVENT)
2181  *     bit  36:    reserved
2182  *     bits 37-43: wait detail (for switch detail 1 to 4)
2183  *     bits 44-46: reserved
2184  *     bits 47-57: sw context id of the lrc the GT switched away from
2185  *     bits 58-63: sw counter of the lrc the GT switched away from
2186  */
2187 static inline bool
2188 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2189 {
2190         u32 lower_dw = csb[0];
2191         u32 upper_dw = csb[1];
2192         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2193         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2194         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2195
2196         /*
2197          * The context switch detail is not guaranteed to be 5 when a preemption
2198          * occurs, so we can't just check for that. The check below works for
2199          * all the cases we care about, including preemptions of WAIT
2200          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2201          * would require some extra handling, but we don't support that.
2202          */
2203         if (!ctx_away_valid || new_queue) {
2204                 GEM_BUG_ON(!ctx_to_valid);
2205                 return true;
2206         }
2207
2208         /*
2209          * switch detail = 5 is covered by the case above and we do not expect a
2210          * context switch on an unsuccessful wait instruction since we always
2211          * use polling mode.
2212          */
2213         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2214         return false;
2215 }
2216
2217 static inline bool
2218 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2219 {
2220         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2221 }
2222
2223 static void process_csb(struct intel_engine_cs *engine)
2224 {
2225         struct intel_engine_execlists * const execlists = &engine->execlists;
2226         const u32 * const buf = execlists->csb_status;
2227         const u8 num_entries = execlists->csb_size;
2228         u8 head, tail;
2229
2230         /*
2231          * As we modify our execlists state tracking we require exclusive
2232          * access. Either we are inside the tasklet, or the tasklet is disabled
2233          * and we assume that is only inside the reset paths and so serialised.
2234          */
2235         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2236                    !reset_in_progress(execlists));
2237         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2238
2239         /*
2240          * Note that csb_write, csb_status may be either in HWSP or mmio.
2241          * When reading from the csb_write mmio register, we have to be
2242          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2243          * the low 4bits. As it happens we know the next 4bits are always
2244          * zero and so we can simply masked off the low u8 of the register
2245          * and treat it identically to reading from the HWSP (without having
2246          * to use explicit shifting and masking, and probably bifurcating
2247          * the code to handle the legacy mmio read).
2248          */
2249         head = execlists->csb_head;
2250         tail = READ_ONCE(*execlists->csb_write);
2251         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2252         if (unlikely(head == tail))
2253                 return;
2254
2255         /*
2256          * Hopefully paired with a wmb() in HW!
2257          *
2258          * We must complete the read of the write pointer before any reads
2259          * from the CSB, so that we do not see stale values. Without an rmb
2260          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2261          * we perform the READ_ONCE(*csb_write).
2262          */
2263         rmb();
2264
2265         do {
2266                 bool promote;
2267
2268                 if (++head == num_entries)
2269                         head = 0;
2270
2271                 /*
2272                  * We are flying near dragons again.
2273                  *
2274                  * We hold a reference to the request in execlist_port[]
2275                  * but no more than that. We are operating in softirq
2276                  * context and so cannot hold any mutex or sleep. That
2277                  * prevents us stopping the requests we are processing
2278                  * in port[] from being retired simultaneously (the
2279                  * breadcrumb will be complete before we see the
2280                  * context-switch). As we only hold the reference to the
2281                  * request, any pointer chasing underneath the request
2282                  * is subject to a potential use-after-free. Thus we
2283                  * store all of the bookkeeping within port[] as
2284                  * required, and avoid using unguarded pointers beneath
2285                  * request itself. The same applies to the atomic
2286                  * status notifier.
2287                  */
2288
2289                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2290                              head, buf[2 * head + 0], buf[2 * head + 1]);
2291
2292                 if (INTEL_GEN(engine->i915) >= 12)
2293                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2294                 else
2295                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2296                 if (promote) {
2297                         struct i915_request * const *old = execlists->active;
2298
2299                         /* Point active to the new ELSP; prevent overwriting */
2300                         WRITE_ONCE(execlists->active, execlists->pending);
2301
2302                         if (!inject_preempt_hang(execlists))
2303                                 ring_set_paused(engine, 0);
2304
2305                         /* cancel old inflight, prepare for switch */
2306                         trace_ports(execlists, "preempted", old);
2307                         while (*old)
2308                                 execlists_schedule_out(*old++);
2309
2310                         /* switch pending to inflight */
2311                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2312                         WRITE_ONCE(execlists->active,
2313                                    memcpy(execlists->inflight,
2314                                           execlists->pending,
2315                                           execlists_num_ports(execlists) *
2316                                           sizeof(*execlists->pending)));
2317
2318                         WRITE_ONCE(execlists->pending[0], NULL);
2319                 } else {
2320                         GEM_BUG_ON(!*execlists->active);
2321
2322                         /* port0 completed, advanced to port1 */
2323                         trace_ports(execlists, "completed", execlists->active);
2324
2325                         /*
2326                          * We rely on the hardware being strongly
2327                          * ordered, that the breadcrumb write is
2328                          * coherent (visible from the CPU) before the
2329                          * user interrupt and CSB is processed.
2330                          */
2331                         GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
2332                                    !reset_in_progress(execlists));
2333                         execlists_schedule_out(*execlists->active++);
2334
2335                         GEM_BUG_ON(execlists->active - execlists->inflight >
2336                                    execlists_num_ports(execlists));
2337                 }
2338         } while (head != tail);
2339
2340         execlists->csb_head = head;
2341         set_timeslice(engine);
2342
2343         /*
2344          * Gen11 has proven to fail wrt global observation point between
2345          * entry and tail update, failing on the ordering and thus
2346          * we see an old entry in the context status buffer.
2347          *
2348          * Forcibly evict out entries for the next gpu csb update,
2349          * to increase the odds that we get a fresh entries with non
2350          * working hardware. The cost for doing so comes out mostly with
2351          * the wash as hardware, working or not, will need to do the
2352          * invalidation before.
2353          */
2354         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2355 }
2356
2357 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2358 {
2359         lockdep_assert_held(&engine->active.lock);
2360         if (!engine->execlists.pending[0]) {
2361                 rcu_read_lock(); /* protect peeking at execlists->active */
2362                 execlists_dequeue(engine);
2363                 rcu_read_unlock();
2364         }
2365 }
2366
2367 static void __execlists_hold(struct i915_request *rq)
2368 {
2369         LIST_HEAD(list);
2370
2371         do {
2372                 struct i915_dependency *p;
2373
2374                 if (i915_request_is_active(rq))
2375                         __i915_request_unsubmit(rq);
2376
2377                 RQ_TRACE(rq, "on hold\n");
2378                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2379                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2380                 i915_request_set_hold(rq);
2381
2382                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2383                         struct i915_request *w =
2384                                 container_of(p->waiter, typeof(*w), sched);
2385
2386                         /* Leave semaphores spinning on the other engines */
2387                         if (w->engine != rq->engine)
2388                                 continue;
2389
2390                         if (!i915_request_is_ready(w))
2391                                 continue;
2392
2393                         if (i915_request_completed(w))
2394                                 continue;
2395
2396                         if (i915_request_on_hold(rq))
2397                                 continue;
2398
2399                         list_move_tail(&w->sched.link, &list);
2400                 }
2401
2402                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2403         } while (rq);
2404 }
2405
2406 static bool execlists_hold(struct intel_engine_cs *engine,
2407                            struct i915_request *rq)
2408 {
2409         spin_lock_irq(&engine->active.lock);
2410
2411         if (i915_request_completed(rq)) { /* too late! */
2412                 rq = NULL;
2413                 goto unlock;
2414         }
2415
2416         if (rq->engine != engine) { /* preempted virtual engine */
2417                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2418
2419                 /*
2420                  * intel_context_inflight() is only protected by virtue
2421                  * of process_csb() being called only by the tasklet (or
2422                  * directly from inside reset while the tasklet is suspended).
2423                  * Assert that neither of those are allowed to run while we
2424                  * poke at the request queues.
2425                  */
2426                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2427
2428                 /*
2429                  * An unsubmitted request along a virtual engine will
2430                  * remain on the active (this) engine until we are able
2431                  * to process the context switch away (and so mark the
2432                  * context as no longer in flight). That cannot have happened
2433                  * yet, otherwise we would not be hanging!
2434                  */
2435                 spin_lock(&ve->base.active.lock);
2436                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2437                 GEM_BUG_ON(ve->request != rq);
2438                 ve->request = NULL;
2439                 spin_unlock(&ve->base.active.lock);
2440                 i915_request_put(rq);
2441
2442                 rq->engine = engine;
2443         }
2444
2445         /*
2446          * Transfer this request onto the hold queue to prevent it
2447          * being resumbitted to HW (and potentially completed) before we have
2448          * released it. Since we may have already submitted following
2449          * requests, we need to remove those as well.
2450          */
2451         GEM_BUG_ON(i915_request_on_hold(rq));
2452         GEM_BUG_ON(rq->engine != engine);
2453         __execlists_hold(rq);
2454
2455 unlock:
2456         spin_unlock_irq(&engine->active.lock);
2457         return rq;
2458 }
2459
2460 static bool hold_request(const struct i915_request *rq)
2461 {
2462         struct i915_dependency *p;
2463
2464         /*
2465          * If one of our ancestors is on hold, we must also be on hold,
2466          * otherwise we will bypass it and execute before it.
2467          */
2468         list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
2469                 const struct i915_request *s =
2470                         container_of(p->signaler, typeof(*s), sched);
2471
2472                 if (s->engine != rq->engine)
2473                         continue;
2474
2475                 if (i915_request_on_hold(s))
2476                         return true;
2477         }
2478
2479         return false;
2480 }
2481
2482 static void __execlists_unhold(struct i915_request *rq)
2483 {
2484         LIST_HEAD(list);
2485
2486         do {
2487                 struct i915_dependency *p;
2488
2489                 GEM_BUG_ON(!i915_request_on_hold(rq));
2490                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2491
2492                 i915_request_clear_hold(rq);
2493                 list_move_tail(&rq->sched.link,
2494                                i915_sched_lookup_priolist(rq->engine,
2495                                                           rq_prio(rq)));
2496                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2497                 RQ_TRACE(rq, "hold release\n");
2498
2499                 /* Also release any children on this engine that are ready */
2500                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2501                         struct i915_request *w =
2502                                 container_of(p->waiter, typeof(*w), sched);
2503
2504                         if (w->engine != rq->engine)
2505                                 continue;
2506
2507                         if (!i915_request_on_hold(rq))
2508                                 continue;
2509
2510                         /* Check that no other parents are also on hold */
2511                         if (hold_request(rq))
2512                                 continue;
2513
2514                         list_move_tail(&w->sched.link, &list);
2515                 }
2516
2517                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2518         } while (rq);
2519 }
2520
2521 static void execlists_unhold(struct intel_engine_cs *engine,
2522                              struct i915_request *rq)
2523 {
2524         spin_lock_irq(&engine->active.lock);
2525
2526         /*
2527          * Move this request back to the priority queue, and all of its
2528          * children and grandchildren that were suspended along with it.
2529          */
2530         __execlists_unhold(rq);
2531
2532         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2533                 engine->execlists.queue_priority_hint = rq_prio(rq);
2534                 tasklet_hi_schedule(&engine->execlists.tasklet);
2535         }
2536
2537         spin_unlock_irq(&engine->active.lock);
2538 }
2539
2540 struct execlists_capture {
2541         struct work_struct work;
2542         struct i915_request *rq;
2543         struct i915_gpu_coredump *error;
2544 };
2545
2546 static void execlists_capture_work(struct work_struct *work)
2547 {
2548         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2549         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2550         struct intel_engine_cs *engine = cap->rq->engine;
2551         struct intel_gt_coredump *gt = cap->error->gt;
2552         struct intel_engine_capture_vma *vma;
2553
2554         /* Compress all the objects attached to the request, slow! */
2555         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2556         if (vma) {
2557                 struct i915_vma_compress *compress =
2558                         i915_vma_capture_prepare(gt);
2559
2560                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2561                 i915_vma_capture_finish(gt, compress);
2562         }
2563
2564         gt->simulated = gt->engine->simulated;
2565         cap->error->simulated = gt->simulated;
2566
2567         /* Publish the error state, and announce it to the world */
2568         i915_error_state_store(cap->error);
2569         i915_gpu_coredump_put(cap->error);
2570
2571         /* Return this request and all that depend upon it for signaling */
2572         execlists_unhold(engine, cap->rq);
2573         i915_request_put(cap->rq);
2574
2575         kfree(cap);
2576 }
2577
2578 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2579 {
2580         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2581         struct execlists_capture *cap;
2582
2583         cap = kmalloc(sizeof(*cap), gfp);
2584         if (!cap)
2585                 return NULL;
2586
2587         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2588         if (!cap->error)
2589                 goto err_cap;
2590
2591         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2592         if (!cap->error->gt)
2593                 goto err_gpu;
2594
2595         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2596         if (!cap->error->gt->engine)
2597                 goto err_gt;
2598
2599         return cap;
2600
2601 err_gt:
2602         kfree(cap->error->gt);
2603 err_gpu:
2604         kfree(cap->error);
2605 err_cap:
2606         kfree(cap);
2607         return NULL;
2608 }
2609
2610 static bool execlists_capture(struct intel_engine_cs *engine)
2611 {
2612         struct execlists_capture *cap;
2613
2614         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2615                 return true;
2616
2617         /*
2618          * We need to _quickly_ capture the engine state before we reset.
2619          * We are inside an atomic section (softirq) here and we are delaying
2620          * the forced preemption event.
2621          */
2622         cap = capture_regs(engine);
2623         if (!cap)
2624                 return true;
2625
2626         cap->rq = execlists_active(&engine->execlists);
2627         GEM_BUG_ON(!cap->rq);
2628
2629         rcu_read_lock();
2630         cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2631         cap->rq = i915_request_get_rcu(cap->rq);
2632         rcu_read_unlock();
2633         if (!cap->rq)
2634                 goto err_free;
2635
2636         /*
2637          * Remove the request from the execlists queue, and take ownership
2638          * of the request. We pass it to our worker who will _slowly_ compress
2639          * all the pages the _user_ requested for debugging their batch, after
2640          * which we return it to the queue for signaling.
2641          *
2642          * By removing them from the execlists queue, we also remove the
2643          * requests from being processed by __unwind_incomplete_requests()
2644          * during the intel_engine_reset(), and so they will *not* be replayed
2645          * afterwards.
2646          *
2647          * Note that because we have not yet reset the engine at this point,
2648          * it is possible for the request that we have identified as being
2649          * guilty, did in fact complete and we will then hit an arbitration
2650          * point allowing the outstanding preemption to succeed. The likelihood
2651          * of that is very low (as capturing of the engine registers should be
2652          * fast enough to run inside an irq-off atomic section!), so we will
2653          * simply hold that request accountable for being non-preemptible
2654          * long enough to force the reset.
2655          */
2656         if (!execlists_hold(engine, cap->rq))
2657                 goto err_rq;
2658
2659         INIT_WORK(&cap->work, execlists_capture_work);
2660         schedule_work(&cap->work);
2661         return true;
2662
2663 err_rq:
2664         i915_request_put(cap->rq);
2665 err_free:
2666         i915_gpu_coredump_put(cap->error);
2667         kfree(cap);
2668         return false;
2669 }
2670
2671 static noinline void preempt_reset(struct intel_engine_cs *engine)
2672 {
2673         const unsigned int bit = I915_RESET_ENGINE + engine->id;
2674         unsigned long *lock = &engine->gt->reset.flags;
2675
2676         if (i915_modparams.reset < 3)
2677                 return;
2678
2679         if (test_and_set_bit(bit, lock))
2680                 return;
2681
2682         /* Mark this tasklet as disabled to avoid waiting for it to complete */
2683         tasklet_disable_nosync(&engine->execlists.tasklet);
2684
2685         ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
2686                      READ_ONCE(engine->props.preempt_timeout_ms),
2687                      jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
2688
2689         ring_set_paused(engine, 1); /* Freeze the current request in place */
2690         if (execlists_capture(engine))
2691                 intel_engine_reset(engine, "preemption time out");
2692         else
2693                 ring_set_paused(engine, 0);
2694
2695         tasklet_enable(&engine->execlists.tasklet);
2696         clear_and_wake_up_bit(bit, lock);
2697 }
2698
2699 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2700 {
2701         const struct timer_list *t = &engine->execlists.preempt;
2702
2703         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2704                 return false;
2705
2706         if (!timer_expired(t))
2707                 return false;
2708
2709         return READ_ONCE(engine->execlists.pending[0]);
2710 }
2711
2712 /*
2713  * Check the unread Context Status Buffers and manage the submission of new
2714  * contexts to the ELSP accordingly.
2715  */
2716 static void execlists_submission_tasklet(unsigned long data)
2717 {
2718         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2719         bool timeout = preempt_timeout(engine);
2720
2721         process_csb(engine);
2722         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2723                 unsigned long flags;
2724
2725                 spin_lock_irqsave(&engine->active.lock, flags);
2726                 __execlists_submission_tasklet(engine);
2727                 spin_unlock_irqrestore(&engine->active.lock, flags);
2728
2729                 /* Recheck after serialising with direct-submission */
2730                 if (timeout && preempt_timeout(engine))
2731                         preempt_reset(engine);
2732         }
2733 }
2734
2735 static void __execlists_kick(struct intel_engine_execlists *execlists)
2736 {
2737         /* Kick the tasklet for some interrupt coalescing and reset handling */
2738         tasklet_hi_schedule(&execlists->tasklet);
2739 }
2740
2741 #define execlists_kick(t, member) \
2742         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2743
2744 static void execlists_timeslice(struct timer_list *timer)
2745 {
2746         execlists_kick(timer, timer);
2747 }
2748
2749 static void execlists_preempt(struct timer_list *timer)
2750 {
2751         execlists_kick(timer, preempt);
2752 }
2753
2754 static void queue_request(struct intel_engine_cs *engine,
2755                           struct i915_request *rq)
2756 {
2757         GEM_BUG_ON(!list_empty(&rq->sched.link));
2758         list_add_tail(&rq->sched.link,
2759                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
2760         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2761 }
2762
2763 static void __submit_queue_imm(struct intel_engine_cs *engine)
2764 {
2765         struct intel_engine_execlists * const execlists = &engine->execlists;
2766
2767         if (reset_in_progress(execlists))
2768                 return; /* defer until we restart the engine following reset */
2769
2770         if (execlists->tasklet.func == execlists_submission_tasklet)
2771                 __execlists_submission_tasklet(engine);
2772         else
2773                 tasklet_hi_schedule(&execlists->tasklet);
2774 }
2775
2776 static void submit_queue(struct intel_engine_cs *engine,
2777                          const struct i915_request *rq)
2778 {
2779         struct intel_engine_execlists *execlists = &engine->execlists;
2780
2781         if (rq_prio(rq) <= execlists->queue_priority_hint)
2782                 return;
2783
2784         execlists->queue_priority_hint = rq_prio(rq);
2785         __submit_queue_imm(engine);
2786 }
2787
2788 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2789                              const struct i915_request *rq)
2790 {
2791         GEM_BUG_ON(i915_request_on_hold(rq));
2792         return !list_empty(&engine->active.hold) && hold_request(rq);
2793 }
2794
2795 static void execlists_submit_request(struct i915_request *request)
2796 {
2797         struct intel_engine_cs *engine = request->engine;
2798         unsigned long flags;
2799
2800         /* Will be called from irq-context when using foreign fences. */
2801         spin_lock_irqsave(&engine->active.lock, flags);
2802
2803         if (unlikely(ancestor_on_hold(engine, request))) {
2804                 list_add_tail(&request->sched.link, &engine->active.hold);
2805                 i915_request_set_hold(request);
2806         } else {
2807                 queue_request(engine, request);
2808
2809                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2810                 GEM_BUG_ON(list_empty(&request->sched.link));
2811
2812                 submit_queue(engine, request);
2813         }
2814
2815         spin_unlock_irqrestore(&engine->active.lock, flags);
2816 }
2817
2818 static void __execlists_context_fini(struct intel_context *ce)
2819 {
2820         intel_ring_put(ce->ring);
2821         i915_vma_put(ce->state);
2822 }
2823
2824 static void execlists_context_destroy(struct kref *kref)
2825 {
2826         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2827
2828         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2829         GEM_BUG_ON(intel_context_is_pinned(ce));
2830
2831         if (ce->state)
2832                 __execlists_context_fini(ce);
2833
2834         intel_context_fini(ce);
2835         intel_context_free(ce);
2836 }
2837
2838 static void
2839 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2840 {
2841         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2842                 return;
2843
2844         vaddr += engine->context_size;
2845
2846         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2847 }
2848
2849 static void
2850 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2851 {
2852         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2853                 return;
2854
2855         vaddr += engine->context_size;
2856
2857         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2858                 dev_err_once(engine->i915->drm.dev,
2859                              "%s context redzone overwritten!\n",
2860                              engine->name);
2861 }
2862
2863 static void execlists_context_unpin(struct intel_context *ce)
2864 {
2865         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2866                       ce->engine);
2867
2868         i915_gem_object_unpin_map(ce->state->obj);
2869 }
2870
2871 static void
2872 __execlists_update_reg_state(const struct intel_context *ce,
2873                              const struct intel_engine_cs *engine,
2874                              u32 head)
2875 {
2876         struct intel_ring *ring = ce->ring;
2877         u32 *regs = ce->lrc_reg_state;
2878
2879         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
2880         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2881
2882         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
2883         regs[CTX_RING_HEAD] = head;
2884         regs[CTX_RING_TAIL] = ring->tail;
2885
2886         /* RPCS */
2887         if (engine->class == RENDER_CLASS) {
2888                 regs[CTX_R_PWR_CLK_STATE] =
2889                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2890
2891                 i915_oa_init_reg_state(ce, engine);
2892         }
2893 }
2894
2895 static int
2896 __execlists_context_pin(struct intel_context *ce,
2897                         struct intel_engine_cs *engine)
2898 {
2899         void *vaddr;
2900
2901         GEM_BUG_ON(!ce->state);
2902         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2903
2904         vaddr = i915_gem_object_pin_map(ce->state->obj,
2905                                         i915_coherent_map_type(engine->i915) |
2906                                         I915_MAP_OVERRIDE);
2907         if (IS_ERR(vaddr))
2908                 return PTR_ERR(vaddr);
2909
2910         ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
2911         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2912         __execlists_update_reg_state(ce, engine, ce->ring->tail);
2913
2914         return 0;
2915 }
2916
2917 static int execlists_context_pin(struct intel_context *ce)
2918 {
2919         return __execlists_context_pin(ce, ce->engine);
2920 }
2921
2922 static int execlists_context_alloc(struct intel_context *ce)
2923 {
2924         return __execlists_context_alloc(ce, ce->engine);
2925 }
2926
2927 static void execlists_context_reset(struct intel_context *ce)
2928 {
2929         CE_TRACE(ce, "reset\n");
2930         GEM_BUG_ON(!intel_context_is_pinned(ce));
2931
2932         /*
2933          * Because we emit WA_TAIL_DWORDS there may be a disparity
2934          * between our bookkeeping in ce->ring->head and ce->ring->tail and
2935          * that stored in context. As we only write new commands from
2936          * ce->ring->tail onwards, everything before that is junk. If the GPU
2937          * starts reading from its RING_HEAD from the context, it may try to
2938          * execute that junk and die.
2939          *
2940          * The contexts that are stilled pinned on resume belong to the
2941          * kernel, and are local to each engine. All other contexts will
2942          * have their head/tail sanitized upon pinning before use, so they
2943          * will never see garbage,
2944          *
2945          * So to avoid that we reset the context images upon resume. For
2946          * simplicity, we just zero everything out.
2947          */
2948         intel_ring_reset(ce->ring, ce->ring->emit);
2949
2950         /* Scrub away the garbage */
2951         execlists_init_reg_state(ce->lrc_reg_state,
2952                                  ce, ce->engine, ce->ring, true);
2953         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
2954
2955         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
2956 }
2957
2958 static const struct intel_context_ops execlists_context_ops = {
2959         .alloc = execlists_context_alloc,
2960
2961         .pin = execlists_context_pin,
2962         .unpin = execlists_context_unpin,
2963
2964         .enter = intel_context_enter_engine,
2965         .exit = intel_context_exit_engine,
2966
2967         .reset = execlists_context_reset,
2968         .destroy = execlists_context_destroy,
2969 };
2970
2971 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
2972 {
2973         u32 *cs;
2974
2975         GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
2976
2977         cs = intel_ring_begin(rq, 6);
2978         if (IS_ERR(cs))
2979                 return PTR_ERR(cs);
2980
2981         /*
2982          * Check if we have been preempted before we even get started.
2983          *
2984          * After this point i915_request_started() reports true, even if
2985          * we get preempted and so are no longer running.
2986          */
2987         *cs++ = MI_ARB_CHECK;
2988         *cs++ = MI_NOOP;
2989
2990         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2991         *cs++ = i915_request_timeline(rq)->hwsp_offset;
2992         *cs++ = 0;
2993         *cs++ = rq->fence.seqno - 1;
2994
2995         intel_ring_advance(rq, cs);
2996
2997         /* Record the updated position of the request's payload */
2998         rq->infix = intel_ring_offset(rq, cs);
2999
3000         return 0;
3001 }
3002
3003 static int execlists_request_alloc(struct i915_request *request)
3004 {
3005         int ret;
3006
3007         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3008
3009         /*
3010          * Flush enough space to reduce the likelihood of waiting after
3011          * we start building the request - in which case we will just
3012          * have to repeat work.
3013          */
3014         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3015
3016         /*
3017          * Note that after this point, we have committed to using
3018          * this request as it is being used to both track the
3019          * state of engine initialisation and liveness of the
3020          * golden renderstate above. Think twice before you try
3021          * to cancel/unwind this request now.
3022          */
3023
3024         /* Unconditionally invalidate GPU caches and TLBs. */
3025         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3026         if (ret)
3027                 return ret;
3028
3029         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3030         return 0;
3031 }
3032
3033 /*
3034  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3035  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3036  * but there is a slight complication as this is applied in WA batch where the
3037  * values are only initialized once so we cannot take register value at the
3038  * beginning and reuse it further; hence we save its value to memory, upload a
3039  * constant value with bit21 set and then we restore it back with the saved value.
3040  * To simplify the WA, a constant value is formed by using the default value
3041  * of this register. This shouldn't be a problem because we are only modifying
3042  * it for a short period and this batch in non-premptible. We can ofcourse
3043  * use additional instructions that read the actual value of the register
3044  * at that time and set our bit of interest but it makes the WA complicated.
3045  *
3046  * This WA is also required for Gen9 so extracting as a function avoids
3047  * code duplication.
3048  */
3049 static u32 *
3050 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3051 {
3052         /* NB no one else is allowed to scribble over scratch + 256! */
3053         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3054         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3055         *batch++ = intel_gt_scratch_offset(engine->gt,
3056                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3057         *batch++ = 0;
3058
3059         *batch++ = MI_LOAD_REGISTER_IMM(1);
3060         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3061         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3062
3063         batch = gen8_emit_pipe_control(batch,
3064                                        PIPE_CONTROL_CS_STALL |
3065                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3066                                        0);
3067
3068         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3069         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3070         *batch++ = intel_gt_scratch_offset(engine->gt,
3071                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3072         *batch++ = 0;
3073
3074         return batch;
3075 }
3076
3077 /*
3078  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3079  * initialized at the beginning and shared across all contexts but this field
3080  * helps us to have multiple batches at different offsets and select them based
3081  * on a criteria. At the moment this batch always start at the beginning of the page
3082  * and at this point we don't have multiple wa_ctx batch buffers.
3083  *
3084  * The number of WA applied are not known at the beginning; we use this field
3085  * to return the no of DWORDS written.
3086  *
3087  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3088  * so it adds NOOPs as padding to make it cacheline aligned.
3089  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3090  * makes a complete batch buffer.
3091  */
3092 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3093 {
3094         /* WaDisableCtxRestoreArbitration:bdw,chv */
3095         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3096
3097         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3098         if (IS_BROADWELL(engine->i915))
3099                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3100
3101         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3102         /* Actual scratch location is at 128 bytes offset */
3103         batch = gen8_emit_pipe_control(batch,
3104                                        PIPE_CONTROL_FLUSH_L3 |
3105                                        PIPE_CONTROL_STORE_DATA_INDEX |
3106                                        PIPE_CONTROL_CS_STALL |
3107                                        PIPE_CONTROL_QW_WRITE,
3108                                        LRC_PPHWSP_SCRATCH_ADDR);
3109
3110         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3111
3112         /* Pad to end of cacheline */
3113         while ((unsigned long)batch % CACHELINE_BYTES)
3114                 *batch++ = MI_NOOP;
3115
3116         /*
3117          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3118          * execution depends on the length specified in terms of cache lines
3119          * in the register CTX_RCS_INDIRECT_CTX
3120          */
3121
3122         return batch;
3123 }
3124
3125 struct lri {
3126         i915_reg_t reg;
3127         u32 value;
3128 };
3129
3130 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3131 {
3132         GEM_BUG_ON(!count || count > 63);
3133
3134         *batch++ = MI_LOAD_REGISTER_IMM(count);
3135         do {
3136                 *batch++ = i915_mmio_reg_offset(lri->reg);
3137                 *batch++ = lri->value;
3138         } while (lri++, --count);
3139         *batch++ = MI_NOOP;
3140
3141         return batch;
3142 }
3143
3144 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3145 {
3146         static const struct lri lri[] = {
3147                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3148                 {
3149                         COMMON_SLICE_CHICKEN2,
3150                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3151                                        0),
3152                 },
3153
3154                 /* BSpec: 11391 */
3155                 {
3156                         FF_SLICE_CHICKEN,
3157                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3158                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3159                 },
3160
3161                 /* BSpec: 11299 */
3162                 {
3163                         _3D_CHICKEN3,
3164                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3165                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3166                 }
3167         };
3168
3169         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3170
3171         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3172         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3173
3174         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3175         batch = gen8_emit_pipe_control(batch,
3176                                        PIPE_CONTROL_FLUSH_L3 |
3177                                        PIPE_CONTROL_STORE_DATA_INDEX |
3178                                        PIPE_CONTROL_CS_STALL |
3179                                        PIPE_CONTROL_QW_WRITE,
3180                                        LRC_PPHWSP_SCRATCH_ADDR);
3181
3182         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3183
3184         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3185         if (HAS_POOLED_EU(engine->i915)) {
3186                 /*
3187                  * EU pool configuration is setup along with golden context
3188                  * during context initialization. This value depends on
3189                  * device type (2x6 or 3x6) and needs to be updated based
3190                  * on which subslice is disabled especially for 2x6
3191                  * devices, however it is safe to load default
3192                  * configuration of 3x6 device instead of masking off
3193                  * corresponding bits because HW ignores bits of a disabled
3194                  * subslice and drops down to appropriate config. Please
3195                  * see render_state_setup() in i915_gem_render_state.c for
3196                  * possible configurations, to avoid duplication they are
3197                  * not shown here again.
3198                  */
3199                 *batch++ = GEN9_MEDIA_POOL_STATE;
3200                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3201                 *batch++ = 0x00777000;
3202                 *batch++ = 0;
3203                 *batch++ = 0;
3204                 *batch++ = 0;
3205         }
3206
3207         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3208
3209         /* Pad to end of cacheline */
3210         while ((unsigned long)batch % CACHELINE_BYTES)
3211                 *batch++ = MI_NOOP;
3212
3213         return batch;
3214 }
3215
3216 static u32 *
3217 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3218 {
3219         int i;
3220
3221         /*
3222          * WaPipeControlBefore3DStateSamplePattern: cnl
3223          *
3224          * Ensure the engine is idle prior to programming a
3225          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3226          */
3227         batch = gen8_emit_pipe_control(batch,
3228                                        PIPE_CONTROL_CS_STALL,
3229                                        0);
3230         /*
3231          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3232          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3233          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3234          * confusing. Since gen8_emit_pipe_control() already advances the
3235          * batch by 6 dwords, we advance the other 10 here, completing a
3236          * cacheline. It's not clear if the workaround requires this padding
3237          * before other commands, or if it's just the regular padding we would
3238          * already have for the workaround bb, so leave it here for now.
3239          */
3240         for (i = 0; i < 10; i++)
3241                 *batch++ = MI_NOOP;
3242
3243         /* Pad to end of cacheline */
3244         while ((unsigned long)batch % CACHELINE_BYTES)
3245                 *batch++ = MI_NOOP;
3246
3247         return batch;
3248 }
3249
3250 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3251
3252 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3253 {
3254         struct drm_i915_gem_object *obj;
3255         struct i915_vma *vma;
3256         int err;
3257
3258         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3259         if (IS_ERR(obj))
3260                 return PTR_ERR(obj);
3261
3262         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3263         if (IS_ERR(vma)) {
3264                 err = PTR_ERR(vma);
3265                 goto err;
3266         }
3267
3268         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3269         if (err)
3270                 goto err;
3271
3272         engine->wa_ctx.vma = vma;
3273         return 0;
3274
3275 err:
3276         i915_gem_object_put(obj);
3277         return err;
3278 }
3279
3280 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3281 {
3282         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3283 }
3284
3285 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3286
3287 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3288 {
3289         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3290         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3291                                             &wa_ctx->per_ctx };
3292         wa_bb_func_t wa_bb_fn[2];
3293         struct page *page;
3294         void *batch, *batch_ptr;
3295         unsigned int i;
3296         int ret;
3297
3298         if (engine->class != RENDER_CLASS)
3299                 return 0;
3300
3301         switch (INTEL_GEN(engine->i915)) {
3302         case 12:
3303         case 11:
3304                 return 0;
3305         case 10:
3306                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3307                 wa_bb_fn[1] = NULL;
3308                 break;
3309         case 9:
3310                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3311                 wa_bb_fn[1] = NULL;
3312                 break;
3313         case 8:
3314                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3315                 wa_bb_fn[1] = NULL;
3316                 break;
3317         default:
3318                 MISSING_CASE(INTEL_GEN(engine->i915));
3319                 return 0;
3320         }
3321
3322         ret = lrc_setup_wa_ctx(engine);
3323         if (ret) {
3324                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3325                 return ret;
3326         }
3327
3328         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3329         batch = batch_ptr = kmap_atomic(page);
3330
3331         /*
3332          * Emit the two workaround batch buffers, recording the offset from the
3333          * start of the workaround batch buffer object for each and their
3334          * respective sizes.
3335          */
3336         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3337                 wa_bb[i]->offset = batch_ptr - batch;
3338                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3339                                                   CACHELINE_BYTES))) {
3340                         ret = -EINVAL;
3341                         break;
3342                 }
3343                 if (wa_bb_fn[i])
3344                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3345                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3346         }
3347
3348         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3349
3350         kunmap_atomic(batch);
3351         if (ret)
3352                 lrc_destroy_wa_ctx(engine);
3353
3354         return ret;
3355 }
3356
3357 static void enable_execlists(struct intel_engine_cs *engine)
3358 {
3359         u32 mode;
3360
3361         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3362
3363         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3364
3365         if (INTEL_GEN(engine->i915) >= 11)
3366                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3367         else
3368                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3369         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3370
3371         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3372
3373         ENGINE_WRITE_FW(engine,
3374                         RING_HWS_PGA,
3375                         i915_ggtt_offset(engine->status_page.vma));
3376         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3377
3378         engine->context_tag = 0;
3379 }
3380
3381 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3382 {
3383         bool unexpected = false;
3384
3385         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3386                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3387                 unexpected = true;
3388         }
3389
3390         return unexpected;
3391 }
3392
3393 static int execlists_resume(struct intel_engine_cs *engine)
3394 {
3395         intel_engine_apply_workarounds(engine);
3396         intel_engine_apply_whitelist(engine);
3397
3398         intel_mocs_init_engine(engine);
3399
3400         intel_engine_reset_breadcrumbs(engine);
3401
3402         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3403                 struct drm_printer p = drm_debug_printer(__func__);
3404
3405                 intel_engine_dump(engine, &p, NULL);
3406         }
3407
3408         enable_execlists(engine);
3409
3410         return 0;
3411 }
3412
3413 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3414 {
3415         struct intel_engine_execlists * const execlists = &engine->execlists;
3416         unsigned long flags;
3417
3418         ENGINE_TRACE(engine, "depth<-%d\n",
3419                      atomic_read(&execlists->tasklet.count));
3420
3421         /*
3422          * Prevent request submission to the hardware until we have
3423          * completed the reset in i915_gem_reset_finish(). If a request
3424          * is completed by one engine, it may then queue a request
3425          * to a second via its execlists->tasklet *just* as we are
3426          * calling engine->resume() and also writing the ELSP.
3427          * Turning off the execlists->tasklet until the reset is over
3428          * prevents the race.
3429          */
3430         __tasklet_disable_sync_once(&execlists->tasklet);
3431         GEM_BUG_ON(!reset_in_progress(execlists));
3432
3433         /* And flush any current direct submission. */
3434         spin_lock_irqsave(&engine->active.lock, flags);
3435         spin_unlock_irqrestore(&engine->active.lock, flags);
3436
3437         /*
3438          * We stop engines, otherwise we might get failed reset and a
3439          * dead gpu (on elk). Also as modern gpu as kbl can suffer
3440          * from system hang if batchbuffer is progressing when
3441          * the reset is issued, regardless of READY_TO_RESET ack.
3442          * Thus assume it is best to stop engines on all gens
3443          * where we have a gpu reset.
3444          *
3445          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3446          *
3447          * FIXME: Wa for more modern gens needs to be validated
3448          */
3449         intel_engine_stop_cs(engine);
3450 }
3451
3452 static void reset_csb_pointers(struct intel_engine_cs *engine)
3453 {
3454         struct intel_engine_execlists * const execlists = &engine->execlists;
3455         const unsigned int reset_value = execlists->csb_size - 1;
3456
3457         ring_set_paused(engine, 0);
3458
3459         /*
3460          * After a reset, the HW starts writing into CSB entry [0]. We
3461          * therefore have to set our HEAD pointer back one entry so that
3462          * the *first* entry we check is entry 0. To complicate this further,
3463          * as we don't wait for the first interrupt after reset, we have to
3464          * fake the HW write to point back to the last entry so that our
3465          * inline comparison of our cached head position against the last HW
3466          * write works even before the first interrupt.
3467          */
3468         execlists->csb_head = reset_value;
3469         WRITE_ONCE(*execlists->csb_write, reset_value);
3470         wmb(); /* Make sure this is visible to HW (paranoia?) */
3471
3472         /*
3473          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3474          * Bludgeon them with a mmio update to be sure.
3475          */
3476         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3477                      reset_value << 8 | reset_value);
3478         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3479
3480         invalidate_csb_entries(&execlists->csb_status[0],
3481                                &execlists->csb_status[reset_value]);
3482 }
3483
3484 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3485 {
3486         int x;
3487
3488         x = lrc_ring_mi_mode(engine);
3489         if (x != -1) {
3490                 regs[x + 1] &= ~STOP_RING;
3491                 regs[x + 1] |= STOP_RING << 16;
3492         }
3493 }
3494
3495 static void __execlists_reset_reg_state(const struct intel_context *ce,
3496                                         const struct intel_engine_cs *engine)
3497 {
3498         u32 *regs = ce->lrc_reg_state;
3499
3500         __reset_stop_ring(regs, engine);
3501 }
3502
3503 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3504 {
3505         struct intel_engine_execlists * const execlists = &engine->execlists;
3506         struct intel_context *ce;
3507         struct i915_request *rq;
3508         u32 head;
3509
3510         mb(); /* paranoia: read the CSB pointers from after the reset */
3511         clflush(execlists->csb_write);
3512         mb();
3513
3514         process_csb(engine); /* drain preemption events */
3515
3516         /* Following the reset, we need to reload the CSB read/write pointers */
3517         reset_csb_pointers(engine);
3518
3519         /*
3520          * Save the currently executing context, even if we completed
3521          * its request, it was still running at the time of the
3522          * reset and will have been clobbered.
3523          */
3524         rq = execlists_active(execlists);
3525         if (!rq)
3526                 goto unwind;
3527
3528         /* We still have requests in-flight; the engine should be active */
3529         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3530
3531         ce = rq->context;
3532         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3533
3534         if (i915_request_completed(rq)) {
3535                 /* Idle context; tidy up the ring so we can restart afresh */
3536                 head = intel_ring_wrap(ce->ring, rq->tail);
3537                 goto out_replay;
3538         }
3539
3540         /* Context has requests still in-flight; it should not be idle! */
3541         GEM_BUG_ON(i915_active_is_idle(&ce->active));
3542         rq = active_request(ce->timeline, rq);
3543         head = intel_ring_wrap(ce->ring, rq->head);
3544         GEM_BUG_ON(head == ce->ring->tail);
3545
3546         /*
3547          * If this request hasn't started yet, e.g. it is waiting on a
3548          * semaphore, we need to avoid skipping the request or else we
3549          * break the signaling chain. However, if the context is corrupt
3550          * the request will not restart and we will be stuck with a wedged
3551          * device. It is quite often the case that if we issue a reset
3552          * while the GPU is loading the context image, that the context
3553          * image becomes corrupt.
3554          *
3555          * Otherwise, if we have not started yet, the request should replay
3556          * perfectly and we do not need to flag the result as being erroneous.
3557          */
3558         if (!i915_request_started(rq))
3559                 goto out_replay;
3560
3561         /*
3562          * If the request was innocent, we leave the request in the ELSP
3563          * and will try to replay it on restarting. The context image may
3564          * have been corrupted by the reset, in which case we may have
3565          * to service a new GPU hang, but more likely we can continue on
3566          * without impact.
3567          *
3568          * If the request was guilty, we presume the context is corrupt
3569          * and have to at least restore the RING register in the context
3570          * image back to the expected values to skip over the guilty request.
3571          */
3572         __i915_request_reset(rq, stalled);
3573         if (!stalled)
3574                 goto out_replay;
3575
3576         /*
3577          * We want a simple context + ring to execute the breadcrumb update.
3578          * We cannot rely on the context being intact across the GPU hang,
3579          * so clear it and rebuild just what we need for the breadcrumb.
3580          * All pending requests for this context will be zapped, and any
3581          * future request will be after userspace has had the opportunity
3582          * to recreate its own state.
3583          */
3584         GEM_BUG_ON(!intel_context_is_pinned(ce));
3585         restore_default_state(ce, engine);
3586
3587 out_replay:
3588         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3589                      head, ce->ring->tail);
3590         __execlists_reset_reg_state(ce, engine);
3591         __execlists_update_reg_state(ce, engine, head);
3592         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3593
3594 unwind:
3595         /* Push back any incomplete requests for replay after the reset. */
3596         cancel_port_requests(execlists);
3597         __unwind_incomplete_requests(engine);
3598 }
3599
3600 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3601 {
3602         unsigned long flags;
3603
3604         ENGINE_TRACE(engine, "\n");
3605
3606         spin_lock_irqsave(&engine->active.lock, flags);
3607
3608         __execlists_reset(engine, stalled);
3609
3610         spin_unlock_irqrestore(&engine->active.lock, flags);
3611 }
3612
3613 static void nop_submission_tasklet(unsigned long data)
3614 {
3615         /* The driver is wedged; don't process any more events. */
3616 }
3617
3618 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3619 {
3620         struct intel_engine_execlists * const execlists = &engine->execlists;
3621         struct i915_request *rq, *rn;
3622         struct rb_node *rb;
3623         unsigned long flags;
3624
3625         ENGINE_TRACE(engine, "\n");
3626
3627         /*
3628          * Before we call engine->cancel_requests(), we should have exclusive
3629          * access to the submission state. This is arranged for us by the
3630          * caller disabling the interrupt generation, the tasklet and other
3631          * threads that may then access the same state, giving us a free hand
3632          * to reset state. However, we still need to let lockdep be aware that
3633          * we know this state may be accessed in hardirq context, so we
3634          * disable the irq around this manipulation and we want to keep
3635          * the spinlock focused on its duties and not accidentally conflate
3636          * coverage to the submission's irq state. (Similarly, although we
3637          * shouldn't need to disable irq around the manipulation of the
3638          * submission's irq state, we also wish to remind ourselves that
3639          * it is irq state.)
3640          */
3641         spin_lock_irqsave(&engine->active.lock, flags);
3642
3643         __execlists_reset(engine, true);
3644
3645         /* Mark all executing requests as skipped. */
3646         list_for_each_entry(rq, &engine->active.requests, sched.link)
3647                 mark_eio(rq);
3648
3649         /* Flush the queued requests to the timeline list (for retiring). */
3650         while ((rb = rb_first_cached(&execlists->queue))) {
3651                 struct i915_priolist *p = to_priolist(rb);
3652                 int i;
3653
3654                 priolist_for_each_request_consume(rq, rn, p, i) {
3655                         mark_eio(rq);
3656                         __i915_request_submit(rq);
3657                 }
3658
3659                 rb_erase_cached(&p->node, &execlists->queue);
3660                 i915_priolist_free(p);
3661         }
3662
3663         /* On-hold requests will be flushed to timeline upon their release */
3664         list_for_each_entry(rq, &engine->active.hold, sched.link)
3665                 mark_eio(rq);
3666
3667         /* Cancel all attached virtual engines */
3668         while ((rb = rb_first_cached(&execlists->virtual))) {
3669                 struct virtual_engine *ve =
3670                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3671
3672                 rb_erase_cached(rb, &execlists->virtual);
3673                 RB_CLEAR_NODE(rb);
3674
3675                 spin_lock(&ve->base.active.lock);
3676                 rq = fetch_and_zero(&ve->request);
3677                 if (rq) {
3678                         mark_eio(rq);
3679
3680                         rq->engine = engine;
3681                         __i915_request_submit(rq);
3682                         i915_request_put(rq);
3683
3684                         ve->base.execlists.queue_priority_hint = INT_MIN;
3685                 }
3686                 spin_unlock(&ve->base.active.lock);
3687         }
3688
3689         /* Remaining _unready_ requests will be nop'ed when submitted */
3690
3691         execlists->queue_priority_hint = INT_MIN;
3692         execlists->queue = RB_ROOT_CACHED;
3693
3694         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3695         execlists->tasklet.func = nop_submission_tasklet;
3696
3697         spin_unlock_irqrestore(&engine->active.lock, flags);
3698 }
3699
3700 static void execlists_reset_finish(struct intel_engine_cs *engine)
3701 {
3702         struct intel_engine_execlists * const execlists = &engine->execlists;
3703
3704         /*
3705          * After a GPU reset, we may have requests to replay. Do so now while
3706          * we still have the forcewake to be sure that the GPU is not allowed
3707          * to sleep before we restart and reload a context.
3708          */
3709         GEM_BUG_ON(!reset_in_progress(execlists));
3710         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3711                 execlists->tasklet.func(execlists->tasklet.data);
3712
3713         if (__tasklet_enable(&execlists->tasklet))
3714                 /* And kick in case we missed a new request submission. */
3715                 tasklet_hi_schedule(&execlists->tasklet);
3716         ENGINE_TRACE(engine, "depth->%d\n",
3717                      atomic_read(&execlists->tasklet.count));
3718 }
3719
3720 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3721                                     u64 offset, u32 len,
3722                                     const unsigned int flags)
3723 {
3724         u32 *cs;
3725
3726         cs = intel_ring_begin(rq, 4);
3727         if (IS_ERR(cs))
3728                 return PTR_ERR(cs);
3729
3730         /*
3731          * WaDisableCtxRestoreArbitration:bdw,chv
3732          *
3733          * We don't need to perform MI_ARB_ENABLE as often as we do (in
3734          * particular all the gen that do not need the w/a at all!), if we
3735          * took care to make sure that on every switch into this context
3736          * (both ordinary and for preemption) that arbitrartion was enabled
3737          * we would be fine.  However, for gen8 there is another w/a that
3738          * requires us to not preempt inside GPGPU execution, so we keep
3739          * arbitration disabled for gen8 batches. Arbitration will be
3740          * re-enabled before we close the request
3741          * (engine->emit_fini_breadcrumb).
3742          */
3743         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3744
3745         /* FIXME(BDW+): Address space and security selectors. */
3746         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3747                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3748         *cs++ = lower_32_bits(offset);
3749         *cs++ = upper_32_bits(offset);
3750
3751         intel_ring_advance(rq, cs);
3752
3753         return 0;
3754 }
3755
3756 static int gen8_emit_bb_start(struct i915_request *rq,
3757                               u64 offset, u32 len,
3758                               const unsigned int flags)
3759 {
3760         u32 *cs;
3761
3762         cs = intel_ring_begin(rq, 6);
3763         if (IS_ERR(cs))
3764                 return PTR_ERR(cs);
3765
3766         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3767
3768         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3769                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3770         *cs++ = lower_32_bits(offset);
3771         *cs++ = upper_32_bits(offset);
3772
3773         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3774         *cs++ = MI_NOOP;
3775
3776         intel_ring_advance(rq, cs);
3777
3778         return 0;
3779 }
3780
3781 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3782 {
3783         ENGINE_WRITE(engine, RING_IMR,
3784                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3785         ENGINE_POSTING_READ(engine, RING_IMR);
3786 }
3787
3788 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3789 {
3790         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3791 }
3792
3793 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3794 {
3795         u32 cmd, *cs;
3796
3797         cs = intel_ring_begin(request, 4);
3798         if (IS_ERR(cs))
3799                 return PTR_ERR(cs);
3800
3801         cmd = MI_FLUSH_DW + 1;
3802
3803         /* We always require a command barrier so that subsequent
3804          * commands, such as breadcrumb interrupts, are strictly ordered
3805          * wrt the contents of the write cache being flushed to memory
3806          * (and thus being coherent from the CPU).
3807          */
3808         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3809
3810         if (mode & EMIT_INVALIDATE) {
3811                 cmd |= MI_INVALIDATE_TLB;
3812                 if (request->engine->class == VIDEO_DECODE_CLASS)
3813                         cmd |= MI_INVALIDATE_BSD;
3814         }
3815
3816         *cs++ = cmd;
3817         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3818         *cs++ = 0; /* upper addr */
3819         *cs++ = 0; /* value */
3820         intel_ring_advance(request, cs);
3821
3822         return 0;
3823 }
3824
3825 static int gen8_emit_flush_render(struct i915_request *request,
3826                                   u32 mode)
3827 {
3828         bool vf_flush_wa = false, dc_flush_wa = false;
3829         u32 *cs, flags = 0;
3830         int len;
3831
3832         flags |= PIPE_CONTROL_CS_STALL;
3833
3834         if (mode & EMIT_FLUSH) {
3835                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3836                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3837                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3838                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3839         }
3840
3841         if (mode & EMIT_INVALIDATE) {
3842                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3843                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3844                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3845                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3846                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3847                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3848                 flags |= PIPE_CONTROL_QW_WRITE;
3849                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3850
3851                 /*
3852                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3853                  * pipe control.
3854                  */
3855                 if (IS_GEN(request->i915, 9))
3856                         vf_flush_wa = true;
3857
3858                 /* WaForGAMHang:kbl */
3859                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3860                         dc_flush_wa = true;
3861         }
3862
3863         len = 6;
3864
3865         if (vf_flush_wa)
3866                 len += 6;
3867
3868         if (dc_flush_wa)
3869                 len += 12;
3870
3871         cs = intel_ring_begin(request, len);
3872         if (IS_ERR(cs))
3873                 return PTR_ERR(cs);
3874
3875         if (vf_flush_wa)
3876                 cs = gen8_emit_pipe_control(cs, 0, 0);
3877
3878         if (dc_flush_wa)
3879                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3880                                             0);
3881
3882         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3883
3884         if (dc_flush_wa)
3885                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3886
3887         intel_ring_advance(request, cs);
3888
3889         return 0;
3890 }
3891
3892 static int gen11_emit_flush_render(struct i915_request *request,
3893                                    u32 mode)
3894 {
3895         if (mode & EMIT_FLUSH) {
3896                 u32 *cs;
3897                 u32 flags = 0;
3898
3899                 flags |= PIPE_CONTROL_CS_STALL;
3900
3901                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3902                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3903                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3904                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3905                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3906                 flags |= PIPE_CONTROL_QW_WRITE;
3907                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3908
3909                 cs = intel_ring_begin(request, 6);
3910                 if (IS_ERR(cs))
3911                         return PTR_ERR(cs);
3912
3913                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3914                 intel_ring_advance(request, cs);
3915         }
3916
3917         if (mode & EMIT_INVALIDATE) {
3918                 u32 *cs;
3919                 u32 flags = 0;
3920
3921                 flags |= PIPE_CONTROL_CS_STALL;
3922
3923                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3924                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3925                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3926                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3927                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3928                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3929                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3930                 flags |= PIPE_CONTROL_QW_WRITE;
3931                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3932
3933                 cs = intel_ring_begin(request, 6);
3934                 if (IS_ERR(cs))
3935                         return PTR_ERR(cs);
3936
3937                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3938                 intel_ring_advance(request, cs);
3939         }
3940
3941         return 0;
3942 }
3943
3944 static u32 preparser_disable(bool state)
3945 {
3946         return MI_ARB_CHECK | 1 << 8 | state;
3947 }
3948
3949 static int gen12_emit_flush_render(struct i915_request *request,
3950                                    u32 mode)
3951 {
3952         if (mode & EMIT_FLUSH) {
3953                 u32 flags = 0;
3954                 u32 *cs;
3955
3956                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3957                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3958                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3959                 /* Wa_1409600907:tgl */
3960                 flags |= PIPE_CONTROL_DEPTH_STALL;
3961                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3962                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3963                 flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
3964
3965                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3966                 flags |= PIPE_CONTROL_QW_WRITE;
3967
3968                 flags |= PIPE_CONTROL_CS_STALL;
3969
3970                 cs = intel_ring_begin(request, 6);
3971                 if (IS_ERR(cs))
3972                         return PTR_ERR(cs);
3973
3974                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3975                 intel_ring_advance(request, cs);
3976         }
3977
3978         if (mode & EMIT_INVALIDATE) {
3979                 u32 flags = 0;
3980                 u32 *cs;
3981
3982                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3983                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3984                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3985                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3986                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3987                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3988                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3989                 flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
3990
3991                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3992                 flags |= PIPE_CONTROL_QW_WRITE;
3993
3994                 flags |= PIPE_CONTROL_CS_STALL;
3995
3996                 cs = intel_ring_begin(request, 8);
3997                 if (IS_ERR(cs))
3998                         return PTR_ERR(cs);
3999
4000                 /*
4001                  * Prevent the pre-parser from skipping past the TLB
4002                  * invalidate and loading a stale page for the batch
4003                  * buffer / request payload.
4004                  */
4005                 *cs++ = preparser_disable(true);
4006
4007                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4008
4009                 *cs++ = preparser_disable(false);
4010                 intel_ring_advance(request, cs);
4011
4012                 /*
4013                  * Wa_1604544889:tgl
4014                  */
4015                 if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4016                         flags = 0;
4017                         flags |= PIPE_CONTROL_CS_STALL;
4018                         flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4019
4020                         flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4021                         flags |= PIPE_CONTROL_QW_WRITE;
4022
4023                         cs = intel_ring_begin(request, 6);
4024                         if (IS_ERR(cs))
4025                                 return PTR_ERR(cs);
4026
4027                         cs = gen8_emit_pipe_control(cs, flags,
4028                                                     LRC_PPHWSP_SCRATCH_ADDR);
4029                         intel_ring_advance(request, cs);
4030                 }
4031         }
4032
4033         return 0;
4034 }
4035
4036 /*
4037  * Reserve space for 2 NOOPs at the end of each request to be
4038  * used as a workaround for not being allowed to do lite
4039  * restore with HEAD==TAIL (WaIdleLiteRestore).
4040  */
4041 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4042 {
4043         /* Ensure there's always at least one preemption point per-request. */
4044         *cs++ = MI_ARB_CHECK;
4045         *cs++ = MI_NOOP;
4046         request->wa_tail = intel_ring_offset(request, cs);
4047
4048         return cs;
4049 }
4050
4051 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4052 {
4053         *cs++ = MI_SEMAPHORE_WAIT |
4054                 MI_SEMAPHORE_GLOBAL_GTT |
4055                 MI_SEMAPHORE_POLL |
4056                 MI_SEMAPHORE_SAD_EQ_SDD;
4057         *cs++ = 0;
4058         *cs++ = intel_hws_preempt_address(request->engine);
4059         *cs++ = 0;
4060
4061         return cs;
4062 }
4063
4064 static __always_inline u32*
4065 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4066                                  u32 *cs)
4067 {
4068         *cs++ = MI_USER_INTERRUPT;
4069
4070         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4071         if (intel_engine_has_semaphores(request->engine))
4072                 cs = emit_preempt_busywait(request, cs);
4073
4074         request->tail = intel_ring_offset(request, cs);
4075         assert_ring_tail_valid(request->ring, request->tail);
4076
4077         return gen8_emit_wa_tail(request, cs);
4078 }
4079
4080 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4081 {
4082         cs = gen8_emit_ggtt_write(cs,
4083                                   request->fence.seqno,
4084                                   i915_request_active_timeline(request)->hwsp_offset,
4085                                   0);
4086
4087         return gen8_emit_fini_breadcrumb_footer(request, cs);
4088 }
4089
4090 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4091 {
4092         cs = gen8_emit_pipe_control(cs,
4093                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4094                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4095                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4096                                     0);
4097
4098         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4099         cs = gen8_emit_ggtt_write_rcs(cs,
4100                                       request->fence.seqno,
4101                                       i915_request_active_timeline(request)->hwsp_offset,
4102                                       PIPE_CONTROL_FLUSH_ENABLE |
4103                                       PIPE_CONTROL_CS_STALL);
4104
4105         return gen8_emit_fini_breadcrumb_footer(request, cs);
4106 }
4107
4108 static u32 *
4109 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4110 {
4111         cs = gen8_emit_ggtt_write_rcs(cs,
4112                                       request->fence.seqno,
4113                                       i915_request_active_timeline(request)->hwsp_offset,
4114                                       PIPE_CONTROL_CS_STALL |
4115                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4116                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4117                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4118                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4119                                       PIPE_CONTROL_FLUSH_ENABLE);
4120
4121         return gen8_emit_fini_breadcrumb_footer(request, cs);
4122 }
4123
4124 /*
4125  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4126  * flush and will continue pre-fetching the instructions after it before the
4127  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4128  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4129  * of the next request before the memory has been flushed, we're guaranteed that
4130  * we won't access the batch itself too early.
4131  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4132  * so, if the current request is modifying an instruction in the next request on
4133  * the same intel_context, we might pre-fetch and then execute the pre-update
4134  * instruction. To avoid this, the users of self-modifying code should either
4135  * disable the parser around the code emitting the memory writes, via a new flag
4136  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4137  * the in-kernel use-cases we've opted to use a separate context, see
4138  * reloc_gpu() as an example.
4139  * All the above applies only to the instructions themselves. Non-inline data
4140  * used by the instructions is not pre-fetched.
4141  */
4142
4143 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4144 {
4145         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4146                 MI_SEMAPHORE_GLOBAL_GTT |
4147                 MI_SEMAPHORE_POLL |
4148                 MI_SEMAPHORE_SAD_EQ_SDD;
4149         *cs++ = 0;
4150         *cs++ = intel_hws_preempt_address(request->engine);
4151         *cs++ = 0;
4152         *cs++ = 0;
4153         *cs++ = MI_NOOP;
4154
4155         return cs;
4156 }
4157
4158 static __always_inline u32*
4159 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4160 {
4161         *cs++ = MI_USER_INTERRUPT;
4162
4163         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4164         if (intel_engine_has_semaphores(request->engine))
4165                 cs = gen12_emit_preempt_busywait(request, cs);
4166
4167         request->tail = intel_ring_offset(request, cs);
4168         assert_ring_tail_valid(request->ring, request->tail);
4169
4170         return gen8_emit_wa_tail(request, cs);
4171 }
4172
4173 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4174 {
4175         cs = gen8_emit_ggtt_write(cs,
4176                                   request->fence.seqno,
4177                                   i915_request_active_timeline(request)->hwsp_offset,
4178                                   0);
4179
4180         return gen12_emit_fini_breadcrumb_footer(request, cs);
4181 }
4182
4183 static u32 *
4184 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4185 {
4186         cs = gen8_emit_ggtt_write_rcs(cs,
4187                                       request->fence.seqno,
4188                                       i915_request_active_timeline(request)->hwsp_offset,
4189                                       PIPE_CONTROL_CS_STALL |
4190                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4191                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4192                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4193                                       /* Wa_1409600907:tgl */
4194                                       PIPE_CONTROL_DEPTH_STALL |
4195                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4196                                       PIPE_CONTROL_FLUSH_ENABLE |
4197                                       PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4198
4199         return gen12_emit_fini_breadcrumb_footer(request, cs);
4200 }
4201
4202 static void execlists_park(struct intel_engine_cs *engine)
4203 {
4204         cancel_timer(&engine->execlists.timer);
4205         cancel_timer(&engine->execlists.preempt);
4206 }
4207
4208 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4209 {
4210         engine->submit_request = execlists_submit_request;
4211         engine->schedule = i915_schedule;
4212         engine->execlists.tasklet.func = execlists_submission_tasklet;
4213
4214         engine->reset.prepare = execlists_reset_prepare;
4215         engine->reset.rewind = execlists_reset_rewind;
4216         engine->reset.cancel = execlists_reset_cancel;
4217         engine->reset.finish = execlists_reset_finish;
4218
4219         engine->park = execlists_park;
4220         engine->unpark = NULL;
4221
4222         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4223         if (!intel_vgpu_active(engine->i915)) {
4224                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4225                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4226                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4227         }
4228
4229         if (INTEL_GEN(engine->i915) >= 12)
4230                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4231
4232         if (intel_engine_has_preemption(engine))
4233                 engine->emit_bb_start = gen8_emit_bb_start;
4234         else
4235                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4236 }
4237
4238 static void execlists_shutdown(struct intel_engine_cs *engine)
4239 {
4240         /* Synchronise with residual timers and any softirq they raise */
4241         del_timer_sync(&engine->execlists.timer);
4242         del_timer_sync(&engine->execlists.preempt);
4243         tasklet_kill(&engine->execlists.tasklet);
4244 }
4245
4246 static void execlists_release(struct intel_engine_cs *engine)
4247 {
4248         execlists_shutdown(engine);
4249
4250         intel_engine_cleanup_common(engine);
4251         lrc_destroy_wa_ctx(engine);
4252 }
4253
4254 static void
4255 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4256 {
4257         /* Default vfuncs which can be overriden by each engine. */
4258
4259         engine->resume = execlists_resume;
4260
4261         engine->cops = &execlists_context_ops;
4262         engine->request_alloc = execlists_request_alloc;
4263
4264         engine->emit_flush = gen8_emit_flush;
4265         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4266         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4267         if (INTEL_GEN(engine->i915) >= 12)
4268                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4269
4270         engine->set_default_submission = intel_execlists_set_default_submission;
4271
4272         if (INTEL_GEN(engine->i915) < 11) {
4273                 engine->irq_enable = gen8_logical_ring_enable_irq;
4274                 engine->irq_disable = gen8_logical_ring_disable_irq;
4275         } else {
4276                 /*
4277                  * TODO: On Gen11 interrupt masks need to be clear
4278                  * to allow C6 entry. Keep interrupts enabled at
4279                  * and take the hit of generating extra interrupts
4280                  * until a more refined solution exists.
4281                  */
4282         }
4283 }
4284
4285 static inline void
4286 logical_ring_default_irqs(struct intel_engine_cs *engine)
4287 {
4288         unsigned int shift = 0;
4289
4290         if (INTEL_GEN(engine->i915) < 11) {
4291                 const u8 irq_shifts[] = {
4292                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
4293                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
4294                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4295                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4296                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
4297                 };
4298
4299                 shift = irq_shifts[engine->id];
4300         }
4301
4302         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4303         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4304 }
4305
4306 static void rcs_submission_override(struct intel_engine_cs *engine)
4307 {
4308         switch (INTEL_GEN(engine->i915)) {
4309         case 12:
4310                 engine->emit_flush = gen12_emit_flush_render;
4311                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4312                 break;
4313         case 11:
4314                 engine->emit_flush = gen11_emit_flush_render;
4315                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4316                 break;
4317         default:
4318                 engine->emit_flush = gen8_emit_flush_render;
4319                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4320                 break;
4321         }
4322 }
4323
4324 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4325 {
4326         struct intel_engine_execlists * const execlists = &engine->execlists;
4327         struct drm_i915_private *i915 = engine->i915;
4328         struct intel_uncore *uncore = engine->uncore;
4329         u32 base = engine->mmio_base;
4330
4331         tasklet_init(&engine->execlists.tasklet,
4332                      execlists_submission_tasklet, (unsigned long)engine);
4333         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4334         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4335
4336         logical_ring_default_vfuncs(engine);
4337         logical_ring_default_irqs(engine);
4338
4339         if (engine->class == RENDER_CLASS)
4340                 rcs_submission_override(engine);
4341
4342         if (intel_init_workaround_bb(engine))
4343                 /*
4344                  * We continue even if we fail to initialize WA batch
4345                  * because we only expect rare glitches but nothing
4346                  * critical to prevent us from using GPU
4347                  */
4348                 DRM_ERROR("WA batch buffer initialization failed\n");
4349
4350         if (HAS_LOGICAL_RING_ELSQ(i915)) {
4351                 execlists->submit_reg = uncore->regs +
4352                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4353                 execlists->ctrl_reg = uncore->regs +
4354                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4355         } else {
4356                 execlists->submit_reg = uncore->regs +
4357                         i915_mmio_reg_offset(RING_ELSP(base));
4358         }
4359
4360         execlists->csb_status =
4361                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4362
4363         execlists->csb_write =
4364                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
4365
4366         if (INTEL_GEN(i915) < 11)
4367                 execlists->csb_size = GEN8_CSB_ENTRIES;
4368         else
4369                 execlists->csb_size = GEN11_CSB_ENTRIES;
4370
4371         reset_csb_pointers(engine);
4372
4373         /* Finally, take ownership and responsibility for cleanup! */
4374         engine->release = execlists_release;
4375
4376         return 0;
4377 }
4378
4379 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4380 {
4381         u32 indirect_ctx_offset;
4382
4383         switch (INTEL_GEN(engine->i915)) {
4384         default:
4385                 MISSING_CASE(INTEL_GEN(engine->i915));
4386                 /* fall through */
4387         case 12:
4388                 indirect_ctx_offset =
4389                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4390                 break;
4391         case 11:
4392                 indirect_ctx_offset =
4393                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4394                 break;
4395         case 10:
4396                 indirect_ctx_offset =
4397                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4398                 break;
4399         case 9:
4400                 indirect_ctx_offset =
4401                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4402                 break;
4403         case 8:
4404                 indirect_ctx_offset =
4405                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4406                 break;
4407         }
4408
4409         return indirect_ctx_offset;
4410 }
4411
4412
4413 static void init_common_reg_state(u32 * const regs,
4414                                   const struct intel_engine_cs *engine,
4415                                   const struct intel_ring *ring,
4416                                   bool inhibit)
4417 {
4418         u32 ctl;
4419
4420         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4421         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4422         if (inhibit)
4423                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4424         if (INTEL_GEN(engine->i915) < 11)
4425                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4426                                            CTX_CTRL_RS_CTX_ENABLE);
4427         regs[CTX_CONTEXT_CONTROL] = ctl;
4428
4429         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4430 }
4431
4432 static void init_wa_bb_reg_state(u32 * const regs,
4433                                  const struct intel_engine_cs *engine,
4434                                  u32 pos_bb_per_ctx)
4435 {
4436         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4437
4438         if (wa_ctx->per_ctx.size) {
4439                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4440
4441                 regs[pos_bb_per_ctx] =
4442                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4443         }
4444
4445         if (wa_ctx->indirect_ctx.size) {
4446                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4447
4448                 regs[pos_bb_per_ctx + 2] =
4449                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
4450                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4451
4452                 regs[pos_bb_per_ctx + 4] =
4453                         intel_lr_indirect_ctx_offset(engine) << 6;
4454         }
4455 }
4456
4457 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4458 {
4459         if (i915_vm_is_4lvl(&ppgtt->vm)) {
4460                 /* 64b PPGTT (48bit canonical)
4461                  * PDP0_DESCRIPTOR contains the base address to PML4 and
4462                  * other PDP Descriptors are ignored.
4463                  */
4464                 ASSIGN_CTX_PML4(ppgtt, regs);
4465         } else {
4466                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
4467                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
4468                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
4469                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
4470         }
4471 }
4472
4473 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4474 {
4475         if (i915_is_ggtt(vm))
4476                 return i915_vm_to_ggtt(vm)->alias;
4477         else
4478                 return i915_vm_to_ppgtt(vm);
4479 }
4480
4481 static void execlists_init_reg_state(u32 *regs,
4482                                      const struct intel_context *ce,
4483                                      const struct intel_engine_cs *engine,
4484                                      const struct intel_ring *ring,
4485                                      bool inhibit)
4486 {
4487         /*
4488          * A context is actually a big batch buffer with several
4489          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4490          * values we are setting here are only for the first context restore:
4491          * on a subsequent save, the GPU will recreate this batchbuffer with new
4492          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4493          * we are not initializing here).
4494          *
4495          * Must keep consistent with virtual_update_register_offsets().
4496          */
4497         set_offsets(regs, reg_offsets(engine), engine, inhibit);
4498
4499         init_common_reg_state(regs, engine, ring, inhibit);
4500         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4501
4502         init_wa_bb_reg_state(regs, engine,
4503                              INTEL_GEN(engine->i915) >= 12 ?
4504                              GEN12_CTX_BB_PER_CTX_PTR :
4505                              CTX_BB_PER_CTX_PTR);
4506
4507         __reset_stop_ring(regs, engine);
4508 }
4509
4510 static int
4511 populate_lr_context(struct intel_context *ce,
4512                     struct drm_i915_gem_object *ctx_obj,
4513                     struct intel_engine_cs *engine,
4514                     struct intel_ring *ring)
4515 {
4516         bool inhibit = true;
4517         void *vaddr;
4518         int ret;
4519
4520         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4521         if (IS_ERR(vaddr)) {
4522                 ret = PTR_ERR(vaddr);
4523                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4524                 return ret;
4525         }
4526
4527         set_redzone(vaddr, engine);
4528
4529         if (engine->default_state) {
4530                 void *defaults;
4531
4532                 defaults = i915_gem_object_pin_map(engine->default_state,
4533                                                    I915_MAP_WB);
4534                 if (IS_ERR(defaults)) {
4535                         ret = PTR_ERR(defaults);
4536                         goto err_unpin_ctx;
4537                 }
4538
4539                 memcpy(vaddr, defaults, engine->context_size);
4540                 i915_gem_object_unpin_map(engine->default_state);
4541                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
4542                 inhibit = false;
4543         }
4544
4545         /* The second page of the context object contains some fields which must
4546          * be set up prior to the first execution. */
4547         execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4548                                  ce, engine, ring, inhibit);
4549
4550         ret = 0;
4551 err_unpin_ctx:
4552         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4553         i915_gem_object_unpin_map(ctx_obj);
4554         return ret;
4555 }
4556
4557 static int __execlists_context_alloc(struct intel_context *ce,
4558                                      struct intel_engine_cs *engine)
4559 {
4560         struct drm_i915_gem_object *ctx_obj;
4561         struct intel_ring *ring;
4562         struct i915_vma *vma;
4563         u32 context_size;
4564         int ret;
4565
4566         GEM_BUG_ON(ce->state);
4567         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4568
4569         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4570                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4571
4572         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4573         if (IS_ERR(ctx_obj))
4574                 return PTR_ERR(ctx_obj);
4575
4576         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4577         if (IS_ERR(vma)) {
4578                 ret = PTR_ERR(vma);
4579                 goto error_deref_obj;
4580         }
4581
4582         if (!ce->timeline) {
4583                 struct intel_timeline *tl;
4584
4585                 tl = intel_timeline_create(engine->gt, NULL);
4586                 if (IS_ERR(tl)) {
4587                         ret = PTR_ERR(tl);
4588                         goto error_deref_obj;
4589                 }
4590
4591                 ce->timeline = tl;
4592         }
4593
4594         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4595         if (IS_ERR(ring)) {
4596                 ret = PTR_ERR(ring);
4597                 goto error_deref_obj;
4598         }
4599
4600         ret = populate_lr_context(ce, ctx_obj, engine, ring);
4601         if (ret) {
4602                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4603                 goto error_ring_free;
4604         }
4605
4606         ce->ring = ring;
4607         ce->state = vma;
4608
4609         return 0;
4610
4611 error_ring_free:
4612         intel_ring_put(ring);
4613 error_deref_obj:
4614         i915_gem_object_put(ctx_obj);
4615         return ret;
4616 }
4617
4618 static struct list_head *virtual_queue(struct virtual_engine *ve)
4619 {
4620         return &ve->base.execlists.default_priolist.requests[0];
4621 }
4622
4623 static void virtual_context_destroy(struct kref *kref)
4624 {
4625         struct virtual_engine *ve =
4626                 container_of(kref, typeof(*ve), context.ref);
4627         unsigned int n;
4628
4629         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4630         GEM_BUG_ON(ve->request);
4631         GEM_BUG_ON(ve->context.inflight);
4632
4633         for (n = 0; n < ve->num_siblings; n++) {
4634                 struct intel_engine_cs *sibling = ve->siblings[n];
4635                 struct rb_node *node = &ve->nodes[sibling->id].rb;
4636                 unsigned long flags;
4637
4638                 if (RB_EMPTY_NODE(node))
4639                         continue;
4640
4641                 spin_lock_irqsave(&sibling->active.lock, flags);
4642
4643                 /* Detachment is lazily performed in the execlists tasklet */
4644                 if (!RB_EMPTY_NODE(node))
4645                         rb_erase_cached(node, &sibling->execlists.virtual);
4646
4647                 spin_unlock_irqrestore(&sibling->active.lock, flags);
4648         }
4649         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4650
4651         if (ve->context.state)
4652                 __execlists_context_fini(&ve->context);
4653         intel_context_fini(&ve->context);
4654
4655         kfree(ve->bonds);
4656         kfree(ve);
4657 }
4658
4659 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4660 {
4661         int swp;
4662
4663         /*
4664          * Pick a random sibling on starting to help spread the load around.
4665          *
4666          * New contexts are typically created with exactly the same order
4667          * of siblings, and often started in batches. Due to the way we iterate
4668          * the array of sibling when submitting requests, sibling[0] is
4669          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4670          * randomised across the system, we also help spread the load by the
4671          * first engine we inspect being different each time.
4672          *
4673          * NB This does not force us to execute on this engine, it will just
4674          * typically be the first we inspect for submission.
4675          */
4676         swp = prandom_u32_max(ve->num_siblings);
4677         if (!swp)
4678                 return;
4679
4680         swap(ve->siblings[swp], ve->siblings[0]);
4681         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4682                 virtual_update_register_offsets(ve->context.lrc_reg_state,
4683                                                 ve->siblings[0]);
4684 }
4685
4686 static int virtual_context_alloc(struct intel_context *ce)
4687 {
4688         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4689
4690         return __execlists_context_alloc(ce, ve->siblings[0]);
4691 }
4692
4693 static int virtual_context_pin(struct intel_context *ce)
4694 {
4695         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4696         int err;
4697
4698         /* Note: we must use a real engine class for setting up reg state */
4699         err = __execlists_context_pin(ce, ve->siblings[0]);
4700         if (err)
4701                 return err;
4702
4703         virtual_engine_initial_hint(ve);
4704         return 0;
4705 }
4706
4707 static void virtual_context_enter(struct intel_context *ce)
4708 {
4709         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4710         unsigned int n;
4711
4712         for (n = 0; n < ve->num_siblings; n++)
4713                 intel_engine_pm_get(ve->siblings[n]);
4714
4715         intel_timeline_enter(ce->timeline);
4716 }
4717
4718 static void virtual_context_exit(struct intel_context *ce)
4719 {
4720         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4721         unsigned int n;
4722
4723         intel_timeline_exit(ce->timeline);
4724
4725         for (n = 0; n < ve->num_siblings; n++)
4726                 intel_engine_pm_put(ve->siblings[n]);
4727 }
4728
4729 static const struct intel_context_ops virtual_context_ops = {
4730         .alloc = virtual_context_alloc,
4731
4732         .pin = virtual_context_pin,
4733         .unpin = execlists_context_unpin,
4734
4735         .enter = virtual_context_enter,
4736         .exit = virtual_context_exit,
4737
4738         .destroy = virtual_context_destroy,
4739 };
4740
4741 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4742 {
4743         struct i915_request *rq;
4744         intel_engine_mask_t mask;
4745
4746         rq = READ_ONCE(ve->request);
4747         if (!rq)
4748                 return 0;
4749
4750         /* The rq is ready for submission; rq->execution_mask is now stable. */
4751         mask = rq->execution_mask;
4752         if (unlikely(!mask)) {
4753                 /* Invalid selection, submit to a random engine in error */
4754                 i915_request_skip(rq, -ENODEV);
4755                 mask = ve->siblings[0]->mask;
4756         }
4757
4758         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4759                      rq->fence.context, rq->fence.seqno,
4760                      mask, ve->base.execlists.queue_priority_hint);
4761
4762         return mask;
4763 }
4764
4765 static void virtual_submission_tasklet(unsigned long data)
4766 {
4767         struct virtual_engine * const ve = (struct virtual_engine *)data;
4768         const int prio = ve->base.execlists.queue_priority_hint;
4769         intel_engine_mask_t mask;
4770         unsigned int n;
4771
4772         rcu_read_lock();
4773         mask = virtual_submission_mask(ve);
4774         rcu_read_unlock();
4775         if (unlikely(!mask))
4776                 return;
4777
4778         local_irq_disable();
4779         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4780                 struct intel_engine_cs *sibling = ve->siblings[n];
4781                 struct ve_node * const node = &ve->nodes[sibling->id];
4782                 struct rb_node **parent, *rb;
4783                 bool first;
4784
4785                 if (unlikely(!(mask & sibling->mask))) {
4786                         if (!RB_EMPTY_NODE(&node->rb)) {
4787                                 spin_lock(&sibling->active.lock);
4788                                 rb_erase_cached(&node->rb,
4789                                                 &sibling->execlists.virtual);
4790                                 RB_CLEAR_NODE(&node->rb);
4791                                 spin_unlock(&sibling->active.lock);
4792                         }
4793                         continue;
4794                 }
4795
4796                 spin_lock(&sibling->active.lock);
4797
4798                 if (!RB_EMPTY_NODE(&node->rb)) {
4799                         /*
4800                          * Cheat and avoid rebalancing the tree if we can
4801                          * reuse this node in situ.
4802                          */
4803                         first = rb_first_cached(&sibling->execlists.virtual) ==
4804                                 &node->rb;
4805                         if (prio == node->prio || (prio > node->prio && first))
4806                                 goto submit_engine;
4807
4808                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4809                 }
4810
4811                 rb = NULL;
4812                 first = true;
4813                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4814                 while (*parent) {
4815                         struct ve_node *other;
4816
4817                         rb = *parent;
4818                         other = rb_entry(rb, typeof(*other), rb);
4819                         if (prio > other->prio) {
4820                                 parent = &rb->rb_left;
4821                         } else {
4822                                 parent = &rb->rb_right;
4823                                 first = false;
4824                         }
4825                 }
4826
4827                 rb_link_node(&node->rb, rb, parent);
4828                 rb_insert_color_cached(&node->rb,
4829                                        &sibling->execlists.virtual,
4830                                        first);
4831
4832 submit_engine:
4833                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4834                 node->prio = prio;
4835                 if (first && prio > sibling->execlists.queue_priority_hint) {
4836                         sibling->execlists.queue_priority_hint = prio;
4837                         tasklet_hi_schedule(&sibling->execlists.tasklet);
4838                 }
4839
4840                 spin_unlock(&sibling->active.lock);
4841         }
4842         local_irq_enable();
4843 }
4844
4845 static void virtual_submit_request(struct i915_request *rq)
4846 {
4847         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4848         struct i915_request *old;
4849         unsigned long flags;
4850
4851         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
4852                      rq->fence.context,
4853                      rq->fence.seqno);
4854
4855         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4856
4857         spin_lock_irqsave(&ve->base.active.lock, flags);
4858
4859         old = ve->request;
4860         if (old) { /* background completion event from preempt-to-busy */
4861                 GEM_BUG_ON(!i915_request_completed(old));
4862                 __i915_request_submit(old);
4863                 i915_request_put(old);
4864         }
4865
4866         if (i915_request_completed(rq)) {
4867                 __i915_request_submit(rq);
4868
4869                 ve->base.execlists.queue_priority_hint = INT_MIN;
4870                 ve->request = NULL;
4871         } else {
4872                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
4873                 ve->request = i915_request_get(rq);
4874
4875                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4876                 list_move_tail(&rq->sched.link, virtual_queue(ve));
4877
4878                 tasklet_schedule(&ve->base.execlists.tasklet);
4879         }
4880
4881         spin_unlock_irqrestore(&ve->base.active.lock, flags);
4882 }
4883
4884 static struct ve_bond *
4885 virtual_find_bond(struct virtual_engine *ve,
4886                   const struct intel_engine_cs *master)
4887 {
4888         int i;
4889
4890         for (i = 0; i < ve->num_bonds; i++) {
4891                 if (ve->bonds[i].master == master)
4892                         return &ve->bonds[i];
4893         }
4894
4895         return NULL;
4896 }
4897
4898 static void
4899 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
4900 {
4901         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4902         intel_engine_mask_t allowed, exec;
4903         struct ve_bond *bond;
4904
4905         allowed = ~to_request(signal)->engine->mask;
4906
4907         bond = virtual_find_bond(ve, to_request(signal)->engine);
4908         if (bond)
4909                 allowed &= bond->sibling_mask;
4910
4911         /* Restrict the bonded request to run on only the available engines */
4912         exec = READ_ONCE(rq->execution_mask);
4913         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
4914                 ;
4915
4916         /* Prevent the master from being re-run on the bonded engines */
4917         to_request(signal)->execution_mask &= ~allowed;
4918 }
4919
4920 struct intel_context *
4921 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
4922                                unsigned int count)
4923 {
4924         struct virtual_engine *ve;
4925         unsigned int n;
4926         int err;
4927
4928         if (count == 0)
4929                 return ERR_PTR(-EINVAL);
4930
4931         if (count == 1)
4932                 return intel_context_create(siblings[0]);
4933
4934         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
4935         if (!ve)
4936                 return ERR_PTR(-ENOMEM);
4937
4938         ve->base.i915 = siblings[0]->i915;
4939         ve->base.gt = siblings[0]->gt;
4940         ve->base.uncore = siblings[0]->uncore;
4941         ve->base.id = -1;
4942
4943         ve->base.class = OTHER_CLASS;
4944         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4945         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4946         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4947
4948         /*
4949          * The decision on whether to submit a request using semaphores
4950          * depends on the saturated state of the engine. We only compute
4951          * this during HW submission of the request, and we need for this
4952          * state to be globally applied to all requests being submitted
4953          * to this engine. Virtual engines encompass more than one physical
4954          * engine and so we cannot accurately tell in advance if one of those
4955          * engines is already saturated and so cannot afford to use a semaphore
4956          * and be pessimized in priority for doing so -- if we are the only
4957          * context using semaphores after all other clients have stopped, we
4958          * will be starved on the saturated system. Such a global switch for
4959          * semaphores is less than ideal, but alas is the current compromise.
4960          */
4961         ve->base.saturated = ALL_ENGINES;
4962
4963         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4964
4965         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
4966         intel_engine_init_breadcrumbs(&ve->base);
4967         intel_engine_init_execlists(&ve->base);
4968
4969         ve->base.cops = &virtual_context_ops;
4970         ve->base.request_alloc = execlists_request_alloc;
4971
4972         ve->base.schedule = i915_schedule;
4973         ve->base.submit_request = virtual_submit_request;
4974         ve->base.bond_execute = virtual_bond_execute;
4975
4976         INIT_LIST_HEAD(virtual_queue(ve));
4977         ve->base.execlists.queue_priority_hint = INT_MIN;
4978         tasklet_init(&ve->base.execlists.tasklet,
4979                      virtual_submission_tasklet,
4980                      (unsigned long)ve);
4981
4982         intel_context_init(&ve->context, &ve->base);
4983
4984         for (n = 0; n < count; n++) {
4985                 struct intel_engine_cs *sibling = siblings[n];
4986
4987                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4988                 if (sibling->mask & ve->base.mask) {
4989                         DRM_DEBUG("duplicate %s entry in load balancer\n",
4990                                   sibling->name);
4991                         err = -EINVAL;
4992                         goto err_put;
4993                 }
4994
4995                 /*
4996                  * The virtual engine implementation is tightly coupled to
4997                  * the execlists backend -- we push out request directly
4998                  * into a tree inside each physical engine. We could support
4999                  * layering if we handle cloning of the requests and
5000                  * submitting a copy into each backend.
5001                  */
5002                 if (sibling->execlists.tasklet.func !=
5003                     execlists_submission_tasklet) {
5004                         err = -ENODEV;
5005                         goto err_put;
5006                 }
5007
5008                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5009                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5010
5011                 ve->siblings[ve->num_siblings++] = sibling;
5012                 ve->base.mask |= sibling->mask;
5013
5014                 /*
5015                  * All physical engines must be compatible for their emission
5016                  * functions (as we build the instructions during request
5017                  * construction and do not alter them before submission
5018                  * on the physical engine). We use the engine class as a guide
5019                  * here, although that could be refined.
5020                  */
5021                 if (ve->base.class != OTHER_CLASS) {
5022                         if (ve->base.class != sibling->class) {
5023                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5024                                           sibling->class, ve->base.class);
5025                                 err = -EINVAL;
5026                                 goto err_put;
5027                         }
5028                         continue;
5029                 }
5030
5031                 ve->base.class = sibling->class;
5032                 ve->base.uabi_class = sibling->uabi_class;
5033                 snprintf(ve->base.name, sizeof(ve->base.name),
5034                          "v%dx%d", ve->base.class, count);
5035                 ve->base.context_size = sibling->context_size;
5036
5037                 ve->base.emit_bb_start = sibling->emit_bb_start;
5038                 ve->base.emit_flush = sibling->emit_flush;
5039                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5040                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5041                 ve->base.emit_fini_breadcrumb_dw =
5042                         sibling->emit_fini_breadcrumb_dw;
5043
5044                 ve->base.flags = sibling->flags;
5045         }
5046
5047         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5048
5049         return &ve->context;
5050
5051 err_put:
5052         intel_context_put(&ve->context);
5053         return ERR_PTR(err);
5054 }
5055
5056 struct intel_context *
5057 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5058 {
5059         struct virtual_engine *se = to_virtual_engine(src);
5060         struct intel_context *dst;
5061
5062         dst = intel_execlists_create_virtual(se->siblings,
5063                                              se->num_siblings);
5064         if (IS_ERR(dst))
5065                 return dst;
5066
5067         if (se->num_bonds) {
5068                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5069
5070                 de->bonds = kmemdup(se->bonds,
5071                                     sizeof(*se->bonds) * se->num_bonds,
5072                                     GFP_KERNEL);
5073                 if (!de->bonds) {
5074                         intel_context_put(dst);
5075                         return ERR_PTR(-ENOMEM);
5076                 }
5077
5078                 de->num_bonds = se->num_bonds;
5079         }
5080
5081         return dst;
5082 }
5083
5084 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5085                                      const struct intel_engine_cs *master,
5086                                      const struct intel_engine_cs *sibling)
5087 {
5088         struct virtual_engine *ve = to_virtual_engine(engine);
5089         struct ve_bond *bond;
5090         int n;
5091
5092         /* Sanity check the sibling is part of the virtual engine */
5093         for (n = 0; n < ve->num_siblings; n++)
5094                 if (sibling == ve->siblings[n])
5095                         break;
5096         if (n == ve->num_siblings)
5097                 return -EINVAL;
5098
5099         bond = virtual_find_bond(ve, master);
5100         if (bond) {
5101                 bond->sibling_mask |= sibling->mask;
5102                 return 0;
5103         }
5104
5105         bond = krealloc(ve->bonds,
5106                         sizeof(*bond) * (ve->num_bonds + 1),
5107                         GFP_KERNEL);
5108         if (!bond)
5109                 return -ENOMEM;
5110
5111         bond[ve->num_bonds].master = master;
5112         bond[ve->num_bonds].sibling_mask = sibling->mask;
5113
5114         ve->bonds = bond;
5115         ve->num_bonds++;
5116
5117         return 0;
5118 }
5119
5120 struct intel_engine_cs *
5121 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5122                                  unsigned int sibling)
5123 {
5124         struct virtual_engine *ve = to_virtual_engine(engine);
5125
5126         if (sibling >= ve->num_siblings)
5127                 return NULL;
5128
5129         return ve->siblings[sibling];
5130 }
5131
5132 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5133                                    struct drm_printer *m,
5134                                    void (*show_request)(struct drm_printer *m,
5135                                                         struct i915_request *rq,
5136                                                         const char *prefix),
5137                                    unsigned int max)
5138 {
5139         const struct intel_engine_execlists *execlists = &engine->execlists;
5140         struct i915_request *rq, *last;
5141         unsigned long flags;
5142         unsigned int count;
5143         struct rb_node *rb;
5144
5145         spin_lock_irqsave(&engine->active.lock, flags);
5146
5147         last = NULL;
5148         count = 0;
5149         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5150                 if (count++ < max - 1)
5151                         show_request(m, rq, "\t\tE ");
5152                 else
5153                         last = rq;
5154         }
5155         if (last) {
5156                 if (count > max) {
5157                         drm_printf(m,
5158                                    "\t\t...skipping %d executing requests...\n",
5159                                    count - max);
5160                 }
5161                 show_request(m, last, "\t\tE ");
5162         }
5163
5164         last = NULL;
5165         count = 0;
5166         if (execlists->queue_priority_hint != INT_MIN)
5167                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5168                            execlists->queue_priority_hint);
5169         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5170                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5171                 int i;
5172
5173                 priolist_for_each_request(rq, p, i) {
5174                         if (count++ < max - 1)
5175                                 show_request(m, rq, "\t\tQ ");
5176                         else
5177                                 last = rq;
5178                 }
5179         }
5180         if (last) {
5181                 if (count > max) {
5182                         drm_printf(m,
5183                                    "\t\t...skipping %d queued requests...\n",
5184                                    count - max);
5185                 }
5186                 show_request(m, last, "\t\tQ ");
5187         }
5188
5189         last = NULL;
5190         count = 0;
5191         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5192                 struct virtual_engine *ve =
5193                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5194                 struct i915_request *rq = READ_ONCE(ve->request);
5195
5196                 if (rq) {
5197                         if (count++ < max - 1)
5198                                 show_request(m, rq, "\t\tV ");
5199                         else
5200                                 last = rq;
5201                 }
5202         }
5203         if (last) {
5204                 if (count > max) {
5205                         drm_printf(m,
5206                                    "\t\t...skipping %d virtual requests...\n",
5207                                    count - max);
5208                 }
5209                 show_request(m, last, "\t\tV ");
5210         }
5211
5212         spin_unlock_irqrestore(&engine->active.lock, flags);
5213 }
5214
5215 void intel_lr_context_reset(struct intel_engine_cs *engine,
5216                             struct intel_context *ce,
5217                             u32 head,
5218                             bool scrub)
5219 {
5220         GEM_BUG_ON(!intel_context_is_pinned(ce));
5221
5222         /*
5223          * We want a simple context + ring to execute the breadcrumb update.
5224          * We cannot rely on the context being intact across the GPU hang,
5225          * so clear it and rebuild just what we need for the breadcrumb.
5226          * All pending requests for this context will be zapped, and any
5227          * future request will be after userspace has had the opportunity
5228          * to recreate its own state.
5229          */
5230         if (scrub)
5231                 restore_default_state(ce, engine);
5232
5233         /* Rerun the request; its payload has been neutered (if guilty). */
5234         __execlists_update_reg_state(ce, engine, head);
5235 }
5236
5237 bool
5238 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5239 {
5240         return engine->set_default_submission ==
5241                intel_execlists_set_default_submission;
5242 }
5243
5244 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5245 #include "selftest_lrc.c"
5246 #endif