drivers/gpu/drm/i915/gt/intel_lrc.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *    Michel Thierry <michel.thierry@intel.com>
  26  *    Thomas Daniel <thomas.daniel@intel.com>
  27  *    Oscar Mateo <oscar.mateo@intel.com>
  28  *
  29  */
  30
  31 /**
  32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33  *
  34  * Motivation:
  35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36  * These expanded contexts enable a number of new abilities, especially
  37  * "Execlists" (also implemented in this file).
  38  *
  39  * One of the main differences with the legacy HW contexts is that logical
  40  * ring contexts incorporate many more things to the context's state, like
  41  * PDPs or ringbuffer control registers:
  42  *
  43  * The reason why PDPs are included in the context is straightforward: as
  44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46  * instead, the GPU will do it for you on the context switch.
  47  *
  48  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49  * shouldn't we just need a set of those per engine command streamer? This is
  50  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51  * rings, the engine cs shifts to a new "ring buffer" with every context
  52  * switch. When you want to submit a workload to the GPU you: A) choose your
  53  * context, B) find its appropriate virtualized ring, C) write commands to it
  54  * and then, finally, D) tell the GPU to switch to that context.
  55  *
  56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57  * to a contexts is via a context execution list, ergo "Execlists".
  58  *
  59  * LRC implementation:
  60  * Regarding the creation of contexts, we have:
  61  *
  62  * - One global default context.
  63  * - One local default context for each opened fd.
  64  * - One local extra context for each context create ioctl call.
  65  *
  66  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67  * and that contexts are uniquely tied to a given engine (and not reusable,
  68  * like before) we need:
  69  *
  70  * - One ringbuffer per-engine inside each context.
  71  * - One backing object per-engine inside each context.
  72  *
  73  * The global default context starts its life with these new objects fully
  74  * allocated and populated. The local default context for each opened fd is
  75  * more complex, because we don't know at creation time which engine is going
  76  * to use them. To handle this, we have implemented a deferred creation of LR
  77  * contexts:
  78  *
  79  * The local context starts its life as a hollow or blank holder, that only
  80  * gets populated for a given engine once we receive an execbuffer. If later
  81  * on we receive another execbuffer ioctl for the same context but a different
  82  * engine, we allocate/populate a new ringbuffer and context backing object and
  83  * so on.
  84  *
  85  * Finally, regarding local contexts created using the ioctl call: as they are
  86  * only allowed with the render ring, we can allocate & populate them right
  87  * away (no need to defer anything, at least for now).
  88  *
  89  * Execlists implementation:
  90  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92  * This method works as follows:
  93  *
  94  * When a request is committed, its commands (the BB start and any leading or
  95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96  * for the appropriate context. The tail pointer in the hardware context is not
  97  * updated at this time, but instead, kept by the driver in the ringbuffer
  98  * structure. A structure representing this request is added to a request queue
  99  * for the appropriate engine: this structure contains a copy of the context's
 100  * tail after the request was written to the ring buffer and a pointer to the
 101  * context itself.
 102  *
 103  * If the engine's request queue was empty before the request was added, the
 104  * queue is processed immediately. Otherwise the queue will be processed during
 105  * a context switch interrupt. In any case, elements on the queue will get sent
 106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107  * globally unique 20-bits submission ID.
 108  *
 109  * When execution of a request completes, the GPU updates the context status
 110  * buffer with a context complete event and generates a context switch interrupt.
 111  * During the interrupt handling, the driver examines the events in the buffer:
 112  * for each context complete event, if the announced ID matches that on the head
 113  * of the request queue, then that request is retired and removed from the queue.
 114  *
 115  * After processing, if any requests were retired and the queue is not empty
 116  * then a new execution list can be submitted. The two requests at the front of
 117  * the queue are next to be submitted but since a context may not occur twice in
 118  * an execution list, if subsequent requests have the same ID as the first then
 119  * the two requests must be combined. This is done simply by discarding requests
 120  * at the head of the queue until either only one requests is left (in which case
 121  * we use a NULL second context) or the first two requests have unique IDs.
 122  *
 123  * By always executing the first two requests in the queue the driver ensures
 124  * that the GPU is kept as busy as possible. In the case where a single context
 125  * completes but a second context is still executing, the request for this second
 126  * context will be at the head of the queue when we remove the first one. This
 127  * request will then be resubmitted along with a new request for a different context,
 128  * which will cause the hardware to continue executing the second request and queue
 129  * the new request (the GPU detects the condition of a context getting preempted
 130  * with the same context and optimizes the context switch flow by not doing
 131  * preemption, but just sampling the new tail pointer).
 132  *
 133  */
 134 #include <linux/interrupt.h>
 135
 136 #include "i915_drv.h"
 137 #include "i915_perf.h"
 138 #include "i915_trace.h"
 139 #include "i915_vgpu.h"
 140 #include "intel_context.h"
 141 #include "intel_engine_pm.h"
 142 #include "intel_gt.h"
 143 #include "intel_gt_pm.h"
 144 #include "intel_gt_requests.h"
 145 #include "intel_lrc_reg.h"
 146 #include "intel_mocs.h"
 147 #include "intel_reset.h"
 148 #include "intel_ring.h"
 149 #include "intel_workarounds.h"
 150
 151 #define RING_EXECLIST_QFULL             (1 << 0x2)
 152 #define RING_EXECLIST1_VALID            (1 << 0x3)
 153 #define RING_EXECLIST0_VALID            (1 << 0x4)
 154 #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 155 #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 156 #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 157
 158 #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 159 #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 160 #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 161 #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 162 #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 163 #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 164
 165 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 166          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 167
 168 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 169
 170 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 171 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 172 #define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 173 #define GEN12_IDLE_CTX_ID               0x7FF
 174 #define GEN12_CSB_CTX_VALID(csb_dw) \
 175         (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 176
 177 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 178 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 179 #define WA_TAIL_DWORDS 2
 180 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 181
 182 struct virtual_engine {
 183         struct intel_engine_cs base;
 184         struct intel_context context;
 185
 186         /*
 187          * We allow only a single request through the virtual engine at a time
 188          * (each request in the timeline waits for the completion fence of
 189          * the previous before being submitted). By restricting ourselves to
 190          * only submitting a single request, each request is placed on to a
 191          * physical to maximise load spreading (by virtue of the late greedy
 192          * scheduling -- each real engine takes the next available request
 193          * upon idling).
 194          */
 195         struct i915_request *request;
 196
 197         /*
 198          * We keep a rbtree of available virtual engines inside each physical
 199          * engine, sorted by priority. Here we preallocate the nodes we need
 200          * for the virtual engine, indexed by physical_engine->id.
 201          */
 202         struct ve_node {
 203                 struct rb_node rb;
 204                 int prio;
 205         } nodes[I915_NUM_ENGINES];
 206
 207         /*
 208          * Keep track of bonded pairs -- restrictions upon on our selection
 209          * of physical engines any particular request may be submitted to.
 210          * If we receive a submit-fence from a master engine, we will only
 211          * use one of sibling_mask physical engines.
 212          */
 213         struct ve_bond {
 214                 const struct intel_engine_cs *master;
 215                 intel_engine_mask_t sibling_mask;
 216         } *bonds;
 217         unsigned int num_bonds;
 218
 219         /* And finally, which physical engines this virtual engine maps onto. */
 220         unsigned int num_siblings;
 221         struct intel_engine_cs *siblings[0];
 222 };
 223
 224 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 225 {
 226         GEM_BUG_ON(!intel_engine_is_virtual(engine));
 227         return container_of(engine, struct virtual_engine, base);
 228 }
 229
 230 static int __execlists_context_alloc(struct intel_context *ce,
 231                                      struct intel_engine_cs *engine);
 232
 233 static void execlists_init_reg_state(u32 *reg_state,
 234                                      const struct intel_context *ce,
 235                                      const struct intel_engine_cs *engine,
 236                                      const struct intel_ring *ring,
 237                                      bool close);
 238 static void
 239 __execlists_update_reg_state(const struct intel_context *ce,
 240                              const struct intel_engine_cs *engine,
 241                              u32 head);
 242
 243 static void mark_eio(struct i915_request *rq)
 244 {
 245         if (i915_request_completed(rq))
 246                 return;
 247
 248         GEM_BUG_ON(i915_request_signaled(rq));
 249
 250         dma_fence_set_error(&rq->fence, -EIO);
 251         i915_request_mark_complete(rq);
 252 }
 253
 254 static struct i915_request *
 255 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 256 {
 257         struct i915_request *active = rq;
 258
 259         rcu_read_lock();
 260         list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 261                 if (i915_request_completed(rq))
 262                         break;
 263
 264                 active = rq;
 265         }
 266         rcu_read_unlock();
 267
 268         return active;
 269 }
 270
 271 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 272 {
 273         return (i915_ggtt_offset(engine->status_page.vma) +
 274                 I915_GEM_HWS_PREEMPT_ADDR);
 275 }
 276
 277 static inline void
 278 ring_set_paused(const struct intel_engine_cs *engine, int state)
 279 {
 280         /*
 281          * We inspect HWS_PREEMPT with a semaphore inside
 282          * engine->emit_fini_breadcrumb. If the dword is true,
 283          * the ring is paused as the semaphore will busywait
 284          * until the dword is false.
 285          */
 286         engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 287         if (state)
 288                 wmb();
 289 }
 290
 291 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 292 {
 293         return rb_entry(rb, struct i915_priolist, node);
 294 }
 295
 296 static inline int rq_prio(const struct i915_request *rq)
 297 {
 298         return rq->sched.attr.priority;
 299 }
 300
 301 static int effective_prio(const struct i915_request *rq)
 302 {
 303         int prio = rq_prio(rq);
 304
 305         /*
 306          * If this request is special and must not be interrupted at any
 307          * cost, so be it. Note we are only checking the most recent request
 308          * in the context and so may be masking an earlier vip request. It
 309          * is hoped that under the conditions where nopreempt is used, this
 310          * will not matter (i.e. all requests to that context will be
 311          * nopreempt for as long as desired).
 312          */
 313         if (i915_request_has_nopreempt(rq))
 314                 prio = I915_PRIORITY_UNPREEMPTABLE;
 315
 316         /*
 317          * On unwinding the active request, we give it a priority bump
 318          * if it has completed waiting on any semaphore. If we know that
 319          * the request has already started, we can prevent an unwanted
 320          * preempt-to-idle cycle by taking that into account now.
 321          */
 322         if (__i915_request_has_started(rq))
 323                 prio |= I915_PRIORITY_NOSEMAPHORE;
 324
 325         /* Restrict mere WAIT boosts from triggering preemption */
 326         BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 327         return prio | __NO_PREEMPTION;
 328 }
 329
 330 static int queue_prio(const struct intel_engine_execlists *execlists)
 331 {
 332         struct i915_priolist *p;
 333         struct rb_node *rb;
 334
 335         rb = rb_first_cached(&execlists->queue);
 336         if (!rb)
 337                 return INT_MIN;
 338
 339         /*
 340          * As the priolist[] are inverted, with the highest priority in [0],
 341          * we have to flip the index value to become priority.
 342          */
 343         p = to_priolist(rb);
 344         return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 345 }
 346
 347 static inline bool need_preempt(const struct intel_engine_cs *engine,
 348                                 const struct i915_request *rq,
 349                                 struct rb_node *rb)
 350 {
 351         int last_prio;
 352
 353         if (!intel_engine_has_semaphores(engine))
 354                 return false;
 355
 356         /*
 357          * Check if the current priority hint merits a preemption attempt.
 358          *
 359          * We record the highest value priority we saw during rescheduling
 360          * prior to this dequeue, therefore we know that if it is strictly
 361          * less than the current tail of ESLP[0], we do not need to force
 362          * a preempt-to-idle cycle.
 363          *
 364          * However, the priority hint is a mere hint that we may need to
 365          * preempt. If that hint is stale or we may be trying to preempt
 366          * ourselves, ignore the request.
 367          *
 368          * More naturally we would write
 369          *      prio >= max(0, last);
 370          * except that we wish to prevent triggering preemption at the same
 371          * priority level: the task that is running should remain running
 372          * to preserve FIFO ordering of dependencies.
 373          */
 374         last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 375         if (engine->execlists.queue_priority_hint <= last_prio)
 376                 return false;
 377
 378         /*
 379          * Check against the first request in ELSP[1], it will, thanks to the
 380          * power of PI, be the highest priority of that context.
 381          */
 382         if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 383             rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 384                 return true;
 385
 386         if (rb) {
 387                 struct virtual_engine *ve =
 388                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 389                 bool preempt = false;
 390
 391                 if (engine == ve->siblings[0]) { /* only preempt one sibling */
 392                         struct i915_request *next;
 393
 394                         rcu_read_lock();
 395                         next = READ_ONCE(ve->request);
 396                         if (next)
 397                                 preempt = rq_prio(next) > last_prio;
 398                         rcu_read_unlock();
 399                 }
 400
 401                 if (preempt)
 402                         return preempt;
 403         }
 404
 405         /*
 406          * If the inflight context did not trigger the preemption, then maybe
 407          * it was the set of queued requests? Pick the highest priority in
 408          * the queue (the first active priolist) and see if it deserves to be
 409          * running instead of ELSP[0].
 410          *
 411          * The highest priority request in the queue can not be either
 412          * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 413          * context, it's priority would not exceed ELSP[0] aka last_prio.
 414          */
 415         return queue_prio(&engine->execlists) > last_prio;
 416 }
 417
 418 __maybe_unused static inline bool
 419 assert_priority_queue(const struct i915_request *prev,
 420                       const struct i915_request *next)
 421 {
 422         /*
 423          * Without preemption, the prev may refer to the still active element
 424          * which we refuse to let go.
 425          *
 426          * Even with preemption, there are times when we think it is better not
 427          * to preempt and leave an ostensibly lower priority request in flight.
 428          */
 429         if (i915_request_is_active(prev))
 430                 return true;
 431
 432         return rq_prio(prev) >= rq_prio(next);
 433 }
 434
 435 /*
 436  * The context descriptor encodes various attributes of a context,
 437  * including its GTT address and some flags. Because it's fairly
 438  * expensive to calculate, we'll just do it once and cache the result,
 439  * which remains valid until the context is unpinned.
 440  *
 441  * This is what a descriptor looks like, from LSB to MSB::
 442  *
 443  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 444  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 445  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 446  *      bits 53-54:    mbz, reserved for use by hardware
 447  *      bits 55-63:    group ID, currently unused and set to 0
 448  *
 449  * Starting from Gen11, the upper dword of the descriptor has a new format:
 450  *
 451  *      bits 32-36:    reserved
 452  *      bits 37-47:    SW context ID
 453  *      bits 48:53:    engine instance
 454  *      bit 54:        mbz, reserved for use by hardware
 455  *      bits 55-60:    SW counter
 456  *      bits 61-63:    engine class
 457  *
 458  * engine info, SW context ID and SW counter need to form a unique number
 459  * (Context ID) per lrc.
 460  */
 461 static u64
 462 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 463 {
 464         u64 desc;
 465
 466         desc = INTEL_LEGACY_32B_CONTEXT;
 467         if (i915_vm_is_4lvl(ce->vm))
 468                 desc = INTEL_LEGACY_64B_CONTEXT;
 469         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 470
 471         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 472         if (IS_GEN(engine->i915, 8))
 473                 desc |= GEN8_CTX_L3LLC_COHERENT;
 474
 475         desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
 476         /*
 477          * The following 32bits are copied into the OA reports (dword 2).
 478          * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 479          * anything below.
 480          */
 481         if (INTEL_GEN(engine->i915) >= 11) {
 482                 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 483                                                                 /* bits 48-53 */
 484
 485                 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 486                                                                 /* bits 61-63 */
 487         }
 488
 489         return desc;
 490 }
 491
 492 static inline unsigned int dword_in_page(void *addr)
 493 {
 494         return offset_in_page(addr) / sizeof(u32);
 495 }
 496
 497 static void set_offsets(u32 *regs,
 498                         const u8 *data,
 499                         const struct intel_engine_cs *engine,
 500                         bool clear)
 501 #define NOP(x) (BIT(7) | (x))
 502 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 503 #define POSTED BIT(0)
 504 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 505 #define REG16(x) \
 506         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 507         (((x) >> 2) & 0x7f)
 508 #define END(x) 0, (x)
 509 {
 510         const u32 base = engine->mmio_base;
 511
 512         while (*data) {
 513                 u8 count, flags;
 514
 515                 if (*data & BIT(7)) { /* skip */
 516                         count = *data++ & ~BIT(7);
 517                         if (clear)
 518                                 memset32(regs, MI_NOOP, count);
 519                         regs += count;
 520                         continue;
 521                 }
 522
 523                 count = *data & 0x3f;
 524                 flags = *data >> 6;
 525                 data++;
 526
 527                 *regs = MI_LOAD_REGISTER_IMM(count);
 528                 if (flags & POSTED)
 529                         *regs |= MI_LRI_FORCE_POSTED;
 530                 if (INTEL_GEN(engine->i915) >= 11)
 531                         *regs |= MI_LRI_CS_MMIO;
 532                 regs++;
 533
 534                 GEM_BUG_ON(!count);
 535                 do {
 536                         u32 offset = 0;
 537                         u8 v;
 538
 539                         do {
 540                                 v = *data++;
 541                                 offset <<= 7;
 542                                 offset |= v & ~BIT(7);
 543                         } while (v & BIT(7));
 544
 545                         regs[0] = base + (offset << 2);
 546                         if (clear)
 547                                 regs[1] = 0;
 548                         regs += 2;
 549                 } while (--count);
 550         }
 551
 552         if (clear) {
 553                 u8 count = *++data;
 554
 555                 /* Clear past the tail for HW access */
 556                 GEM_BUG_ON(dword_in_page(regs) > count);
 557                 memset32(regs, MI_NOOP, count - dword_in_page(regs));
 558
 559                 /* Close the batch; used mainly by live_lrc_layout() */
 560                 *regs = MI_BATCH_BUFFER_END;
 561                 if (INTEL_GEN(engine->i915) >= 10)
 562                         *regs |= BIT(0);
 563         }
 564 }
 565
 566 static const u8 gen8_xcs_offsets[] = {
 567         NOP(1),
 568         LRI(11, 0),
 569         REG16(0x244),
 570         REG(0x034),
 571         REG(0x030),
 572         REG(0x038),
 573         REG(0x03c),
 574         REG(0x168),
 575         REG(0x140),
 576         REG(0x110),
 577         REG(0x11c),
 578         REG(0x114),
 579         REG(0x118),
 580
 581         NOP(9),
 582         LRI(9, 0),
 583         REG16(0x3a8),
 584         REG16(0x28c),
 585         REG16(0x288),
 586         REG16(0x284),
 587         REG16(0x280),
 588         REG16(0x27c),
 589         REG16(0x278),
 590         REG16(0x274),
 591         REG16(0x270),
 592
 593         NOP(13),
 594         LRI(2, 0),
 595         REG16(0x200),
 596         REG(0x028),
 597
 598         END(80)
 599 };
 600
 601 static const u8 gen9_xcs_offsets[] = {
 602         NOP(1),
 603         LRI(14, POSTED),
 604         REG16(0x244),
 605         REG(0x034),
 606         REG(0x030),
 607         REG(0x038),
 608         REG(0x03c),
 609         REG(0x168),
 610         REG(0x140),
 611         REG(0x110),
 612         REG(0x11c),
 613         REG(0x114),
 614         REG(0x118),
 615         REG(0x1c0),
 616         REG(0x1c4),
 617         REG(0x1c8),
 618
 619         NOP(3),
 620         LRI(9, POSTED),
 621         REG16(0x3a8),
 622         REG16(0x28c),
 623         REG16(0x288),
 624         REG16(0x284),
 625         REG16(0x280),
 626         REG16(0x27c),
 627         REG16(0x278),
 628         REG16(0x274),
 629         REG16(0x270),
 630
 631         NOP(13),
 632         LRI(1, POSTED),
 633         REG16(0x200),
 634
 635         NOP(13),
 636         LRI(44, POSTED),
 637         REG(0x028),
 638         REG(0x09c),
 639         REG(0x0c0),
 640         REG(0x178),
 641         REG(0x17c),
 642         REG16(0x358),
 643         REG(0x170),
 644         REG(0x150),
 645         REG(0x154),
 646         REG(0x158),
 647         REG16(0x41c),
 648         REG16(0x600),
 649         REG16(0x604),
 650         REG16(0x608),
 651         REG16(0x60c),
 652         REG16(0x610),
 653         REG16(0x614),
 654         REG16(0x618),
 655         REG16(0x61c),
 656         REG16(0x620),
 657         REG16(0x624),
 658         REG16(0x628),
 659         REG16(0x62c),
 660         REG16(0x630),
 661         REG16(0x634),
 662         REG16(0x638),
 663         REG16(0x63c),
 664         REG16(0x640),
 665         REG16(0x644),
 666         REG16(0x648),
 667         REG16(0x64c),
 668         REG16(0x650),
 669         REG16(0x654),
 670         REG16(0x658),
 671         REG16(0x65c),
 672         REG16(0x660),
 673         REG16(0x664),
 674         REG16(0x668),
 675         REG16(0x66c),
 676         REG16(0x670),
 677         REG16(0x674),
 678         REG16(0x678),
 679         REG16(0x67c),
 680         REG(0x068),
 681
 682         END(176)
 683 };
 684
 685 static const u8 gen12_xcs_offsets[] = {
 686         NOP(1),
 687         LRI(13, POSTED),
 688         REG16(0x244),
 689         REG(0x034),
 690         REG(0x030),
 691         REG(0x038),
 692         REG(0x03c),
 693         REG(0x168),
 694         REG(0x140),
 695         REG(0x110),
 696         REG(0x1c0),
 697         REG(0x1c4),
 698         REG(0x1c8),
 699         REG(0x180),
 700         REG16(0x2b4),
 701
 702         NOP(5),
 703         LRI(9, POSTED),
 704         REG16(0x3a8),
 705         REG16(0x28c),
 706         REG16(0x288),
 707         REG16(0x284),
 708         REG16(0x280),
 709         REG16(0x27c),
 710         REG16(0x278),
 711         REG16(0x274),
 712         REG16(0x270),
 713
 714         END(80)
 715 };
 716
 717 static const u8 gen8_rcs_offsets[] = {
 718         NOP(1),
 719         LRI(14, POSTED),
 720         REG16(0x244),
 721         REG(0x034),
 722         REG(0x030),
 723         REG(0x038),
 724         REG(0x03c),
 725         REG(0x168),
 726         REG(0x140),
 727         REG(0x110),
 728         REG(0x11c),
 729         REG(0x114),
 730         REG(0x118),
 731         REG(0x1c0),
 732         REG(0x1c4),
 733         REG(0x1c8),
 734
 735         NOP(3),
 736         LRI(9, POSTED),
 737         REG16(0x3a8),
 738         REG16(0x28c),
 739         REG16(0x288),
 740         REG16(0x284),
 741         REG16(0x280),
 742         REG16(0x27c),
 743         REG16(0x278),
 744         REG16(0x274),
 745         REG16(0x270),
 746
 747         NOP(13),
 748         LRI(1, 0),
 749         REG(0x0c8),
 750
 751         END(80)
 752 };
 753
 754 static const u8 gen9_rcs_offsets[] = {
 755         NOP(1),
 756         LRI(14, POSTED),
 757         REG16(0x244),
 758         REG(0x34),
 759         REG(0x30),
 760         REG(0x38),
 761         REG(0x3c),
 762         REG(0x168),
 763         REG(0x140),
 764         REG(0x110),
 765         REG(0x11c),
 766         REG(0x114),
 767         REG(0x118),
 768         REG(0x1c0),
 769         REG(0x1c4),
 770         REG(0x1c8),
 771
 772         NOP(3),
 773         LRI(9, POSTED),
 774         REG16(0x3a8),
 775         REG16(0x28c),
 776         REG16(0x288),
 777         REG16(0x284),
 778         REG16(0x280),
 779         REG16(0x27c),
 780         REG16(0x278),
 781         REG16(0x274),
 782         REG16(0x270),
 783
 784         NOP(13),
 785         LRI(1, 0),
 786         REG(0xc8),
 787
 788         NOP(13),
 789         LRI(44, POSTED),
 790         REG(0x28),
 791         REG(0x9c),
 792         REG(0xc0),
 793         REG(0x178),
 794         REG(0x17c),
 795         REG16(0x358),
 796         REG(0x170),
 797         REG(0x150),
 798         REG(0x154),
 799         REG(0x158),
 800         REG16(0x41c),
 801         REG16(0x600),
 802         REG16(0x604),
 803         REG16(0x608),
 804         REG16(0x60c),
 805         REG16(0x610),
 806         REG16(0x614),
 807         REG16(0x618),
 808         REG16(0x61c),
 809         REG16(0x620),
 810         REG16(0x624),
 811         REG16(0x628),
 812         REG16(0x62c),
 813         REG16(0x630),
 814         REG16(0x634),
 815         REG16(0x638),
 816         REG16(0x63c),
 817         REG16(0x640),
 818         REG16(0x644),
 819         REG16(0x648),
 820         REG16(0x64c),
 821         REG16(0x650),
 822         REG16(0x654),
 823         REG16(0x658),
 824         REG16(0x65c),
 825         REG16(0x660),
 826         REG16(0x664),
 827         REG16(0x668),
 828         REG16(0x66c),
 829         REG16(0x670),
 830         REG16(0x674),
 831         REG16(0x678),
 832         REG16(0x67c),
 833         REG(0x68),
 834
 835         END(176)
 836 };
 837
 838 static const u8 gen11_rcs_offsets[] = {
 839         NOP(1),
 840         LRI(15, POSTED),
 841         REG16(0x244),
 842         REG(0x034),
 843         REG(0x030),
 844         REG(0x038),
 845         REG(0x03c),
 846         REG(0x168),
 847         REG(0x140),
 848         REG(0x110),
 849         REG(0x11c),
 850         REG(0x114),
 851         REG(0x118),
 852         REG(0x1c0),
 853         REG(0x1c4),
 854         REG(0x1c8),
 855         REG(0x180),
 856
 857         NOP(1),
 858         LRI(9, POSTED),
 859         REG16(0x3a8),
 860         REG16(0x28c),
 861         REG16(0x288),
 862         REG16(0x284),
 863         REG16(0x280),
 864         REG16(0x27c),
 865         REG16(0x278),
 866         REG16(0x274),
 867         REG16(0x270),
 868
 869         LRI(1, POSTED),
 870         REG(0x1b0),
 871
 872         NOP(10),
 873         LRI(1, 0),
 874         REG(0x0c8),
 875
 876         END(80)
 877 };
 878
 879 static const u8 gen12_rcs_offsets[] = {
 880         NOP(1),
 881         LRI(13, POSTED),
 882         REG16(0x244),
 883         REG(0x034),
 884         REG(0x030),
 885         REG(0x038),
 886         REG(0x03c),
 887         REG(0x168),
 888         REG(0x140),
 889         REG(0x110),
 890         REG(0x1c0),
 891         REG(0x1c4),
 892         REG(0x1c8),
 893         REG(0x180),
 894         REG16(0x2b4),
 895
 896         NOP(5),
 897         LRI(9, POSTED),
 898         REG16(0x3a8),
 899         REG16(0x28c),
 900         REG16(0x288),
 901         REG16(0x284),
 902         REG16(0x280),
 903         REG16(0x27c),
 904         REG16(0x278),
 905         REG16(0x274),
 906         REG16(0x270),
 907
 908         LRI(3, POSTED),
 909         REG(0x1b0),
 910         REG16(0x5a8),
 911         REG16(0x5ac),
 912
 913         NOP(6),
 914         LRI(1, 0),
 915         REG(0x0c8),
 916
 917         END(80)
 918 };
 919
 920 #undef END
 921 #undef REG16
 922 #undef REG
 923 #undef LRI
 924 #undef NOP
 925
 926 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 927 {
 928         /*
 929          * The gen12+ lists only have the registers we program in the basic
 930          * default state. We rely on the context image using relative
 931          * addressing to automatic fixup the register state between the
 932          * physical engines for virtual engine.
 933          */
 934         GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
 935                    !intel_engine_has_relative_mmio(engine));
 936
 937         if (engine->class == RENDER_CLASS) {
 938                 if (INTEL_GEN(engine->i915) >= 12)
 939                         return gen12_rcs_offsets;
 940                 else if (INTEL_GEN(engine->i915) >= 11)
 941                         return gen11_rcs_offsets;
 942                 else if (INTEL_GEN(engine->i915) >= 9)
 943                         return gen9_rcs_offsets;
 944                 else
 945                         return gen8_rcs_offsets;
 946         } else {
 947                 if (INTEL_GEN(engine->i915) >= 12)
 948                         return gen12_xcs_offsets;
 949                 else if (INTEL_GEN(engine->i915) >= 9)
 950                         return gen9_xcs_offsets;
 951                 else
 952                         return gen8_xcs_offsets;
 953         }
 954 }
 955
 956 static struct i915_request *
 957 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 958 {
 959         struct i915_request *rq, *rn, *active = NULL;
 960         struct list_head *uninitialized_var(pl);
 961         int prio = I915_PRIORITY_INVALID;
 962
 963         lockdep_assert_held(&engine->active.lock);
 964
 965         list_for_each_entry_safe_reverse(rq, rn,
 966                                          &engine->active.requests,
 967                                          sched.link) {
 968                 if (i915_request_completed(rq))
 969                         continue; /* XXX */
 970
 971                 __i915_request_unsubmit(rq);
 972
 973                 /*
 974                  * Push the request back into the queue for later resubmission.
 975                  * If this request is not native to this physical engine (i.e.
 976                  * it came from a virtual source), push it back onto the virtual
 977                  * engine so that it can be moved across onto another physical
 978                  * engine as load dictates.
 979                  */
 980                 if (likely(rq->execution_mask == engine->mask)) {
 981                         GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 982                         if (rq_prio(rq) != prio) {
 983                                 prio = rq_prio(rq);
 984                                 pl = i915_sched_lookup_priolist(engine, prio);
 985                         }
 986                         GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 987
 988                         list_move(&rq->sched.link, pl);
 989                         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 990
 991                         active = rq;
 992                 } else {
 993                         struct intel_engine_cs *owner = rq->context->engine;
 994
 995                         /*
 996                          * Decouple the virtual breadcrumb before moving it
 997                          * back to the virtual engine -- we don't want the
 998                          * request to complete in the background and try
 999                          * and cancel the breadcrumb on the virtual engine
1000                          * (instead of the old engine where it is linked)!
1001                          */
1002                         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1003                                      &rq->fence.flags)) {
1004                                 spin_lock_nested(&rq->lock,
1005                                                  SINGLE_DEPTH_NESTING);
1006                                 i915_request_cancel_breadcrumb(rq);
1007                                 spin_unlock(&rq->lock);
1008                         }
1009                         rq->engine = owner;
1010                         owner->submit_request(rq);
1011                         active = NULL;
1012                 }
1013         }
1014
1015         return active;
1016 }
1017
1018 struct i915_request *
1019 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1020 {
1021         struct intel_engine_cs *engine =
1022                 container_of(execlists, typeof(*engine), execlists);
1023
1024         return __unwind_incomplete_requests(engine);
1025 }
1026
1027 static inline void
1028 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1029 {
1030         /*
1031          * Only used when GVT-g is enabled now. When GVT-g is disabled,
1032          * The compiler should eliminate this function as dead-code.
1033          */
1034         if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1035                 return;
1036
1037         atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1038                                    status, rq);
1039 }
1040
1041 static void intel_engine_context_in(struct intel_engine_cs *engine)
1042 {
1043         unsigned long flags;
1044
1045         if (READ_ONCE(engine->stats.enabled) == 0)
1046                 return;
1047
1048         write_seqlock_irqsave(&engine->stats.lock, flags);
1049
1050         if (engine->stats.enabled > 0) {
1051                 if (engine->stats.active++ == 0)
1052                         engine->stats.start = ktime_get();
1053                 GEM_BUG_ON(engine->stats.active == 0);
1054         }
1055
1056         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1057 }
1058
1059 static void intel_engine_context_out(struct intel_engine_cs *engine)
1060 {
1061         unsigned long flags;
1062
1063         if (READ_ONCE(engine->stats.enabled) == 0)
1064                 return;
1065
1066         write_seqlock_irqsave(&engine->stats.lock, flags);
1067
1068         if (engine->stats.enabled > 0) {
1069                 ktime_t last;
1070
1071                 if (engine->stats.active && --engine->stats.active == 0) {
1072                         /*
1073                          * Decrement the active context count and in case GPU
1074                          * is now idle add up to the running total.
1075                          */
1076                         last = ktime_sub(ktime_get(), engine->stats.start);
1077
1078                         engine->stats.total = ktime_add(engine->stats.total,
1079                                                         last);
1080                 } else if (engine->stats.active == 0) {
1081                         /*
1082                          * After turning on engine stats, context out might be
1083                          * the first event in which case we account from the
1084                          * time stats gathering was turned on.
1085                          */
1086                         last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1087
1088                         engine->stats.total = ktime_add(engine->stats.total,
1089                                                         last);
1090                 }
1091         }
1092
1093         write_sequnlock_irqrestore(&engine->stats.lock, flags);
1094 }
1095
1096 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
1097 {
1098         if (INTEL_GEN(engine->i915) >= 12)
1099                 return 0x60;
1100         else if (INTEL_GEN(engine->i915) >= 9)
1101                 return 0x54;
1102         else if (engine->class == RENDER_CLASS)
1103                 return 0x58;
1104         else
1105                 return -1;
1106 }
1107
1108 static void
1109 execlists_check_context(const struct intel_context *ce,
1110                         const struct intel_engine_cs *engine)
1111 {
1112         const struct intel_ring *ring = ce->ring;
1113         u32 *regs = ce->lrc_reg_state;
1114         bool valid = true;
1115         int x;
1116
1117         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1118                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1119                        engine->name,
1120                        regs[CTX_RING_START],
1121                        i915_ggtt_offset(ring->vma));
1122                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1123                 valid = false;
1124         }
1125
1126         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1127             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1128                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1129                        engine->name,
1130                        regs[CTX_RING_CTL],
1131                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1132                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1133                 valid = false;
1134         }
1135
1136         x = lrc_ring_mi_mode(engine);
1137         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1138                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1139                        engine->name, regs[x + 1]);
1140                 regs[x + 1] &= ~STOP_RING;
1141                 regs[x + 1] |= STOP_RING << 16;
1142                 valid = false;
1143         }
1144
1145         WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1146 }
1147
1148 static void restore_default_state(struct intel_context *ce,
1149                                   struct intel_engine_cs *engine)
1150 {
1151         u32 *regs = ce->lrc_reg_state;
1152
1153         if (engine->pinned_default_state)
1154                 memcpy(regs, /* skip restoring the vanilla PPHWSP */
1155                        engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
1156                        engine->context_size - PAGE_SIZE);
1157
1158         execlists_init_reg_state(regs, ce, engine, ce->ring, false);
1159 }
1160
1161 static void reset_active(struct i915_request *rq,
1162                          struct intel_engine_cs *engine)
1163 {
1164         struct intel_context * const ce = rq->context;
1165         u32 head;
1166
1167         /*
1168          * The executing context has been cancelled. We want to prevent
1169          * further execution along this context and propagate the error on
1170          * to anything depending on its results.
1171          *
1172          * In __i915_request_submit(), we apply the -EIO and remove the
1173          * requests' payloads for any banned requests. But first, we must
1174          * rewind the context back to the start of the incomplete request so
1175          * that we do not jump back into the middle of the batch.
1176          *
1177          * We preserve the breadcrumbs and semaphores of the incomplete
1178          * requests so that inter-timeline dependencies (i.e other timelines)
1179          * remain correctly ordered. And we defer to __i915_request_submit()
1180          * so that all asynchronous waits are correctly handled.
1181          */
1182         ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1183                      rq->fence.context, rq->fence.seqno);
1184
1185         /* On resubmission of the active request, payload will be scrubbed */
1186         if (i915_request_completed(rq))
1187                 head = rq->tail;
1188         else
1189                 head = active_request(ce->timeline, rq)->head;
1190         head = intel_ring_wrap(ce->ring, head);
1191
1192         /* Scrub the context image to prevent replaying the previous batch */
1193         restore_default_state(ce, engine);
1194         __execlists_update_reg_state(ce, engine, head);
1195
1196         /* We've switched away, so this should be a no-op, but intent matters */
1197         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
1198 }
1199
1200 static inline struct intel_engine_cs *
1201 __execlists_schedule_in(struct i915_request *rq)
1202 {
1203         struct intel_engine_cs * const engine = rq->engine;
1204         struct intel_context * const ce = rq->context;
1205
1206         intel_context_get(ce);
1207
1208         if (unlikely(intel_context_is_banned(ce)))
1209                 reset_active(rq, engine);
1210
1211         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1212                 execlists_check_context(ce, engine);
1213
1214         if (ce->tag) {
1215                 /* Use a fixed tag for OA and friends */
1216                 ce->lrc_desc |= (u64)ce->tag << 32;
1217         } else {
1218                 /* We don't need a strict matching tag, just different values */
1219                 ce->lrc_desc &= ~GENMASK_ULL(47, 37);
1220                 ce->lrc_desc |=
1221                         (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
1222                         GEN11_SW_CTX_ID_SHIFT;
1223                 BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
1224         }
1225
1226         __intel_gt_pm_get(engine->gt);
1227         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1228         intel_engine_context_in(engine);
1229
1230         return engine;
1231 }
1232
1233 static inline struct i915_request *
1234 execlists_schedule_in(struct i915_request *rq, int idx)
1235 {
1236         struct intel_context * const ce = rq->context;
1237         struct intel_engine_cs *old;
1238
1239         GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1240         trace_i915_request_in(rq, idx);
1241
1242         old = READ_ONCE(ce->inflight);
1243         do {
1244                 if (!old) {
1245                         WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1246                         break;
1247                 }
1248         } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1249
1250         GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1251         return i915_request_get(rq);
1252 }
1253
1254 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1255 {
1256         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1257         struct i915_request *next = READ_ONCE(ve->request);
1258
1259         if (next && next->execution_mask & ~rq->execution_mask)
1260                 tasklet_schedule(&ve->base.execlists.tasklet);
1261 }
1262
1263 static inline void
1264 __execlists_schedule_out(struct i915_request *rq,
1265                          struct intel_engine_cs * const engine)
1266 {
1267         struct intel_context * const ce = rq->context;
1268
1269         /*
1270          * NB process_csb() is not under the engine->active.lock and hence
1271          * schedule_out can race with schedule_in meaning that we should
1272          * refrain from doing non-trivial work here.
1273          */
1274
1275         /*
1276          * If we have just completed this context, the engine may now be
1277          * idle and we want to re-enter powersaving.
1278          */
1279         if (list_is_last(&rq->link, &ce->timeline->requests) &&
1280             i915_request_completed(rq))
1281                 intel_engine_add_retire(engine, ce->timeline);
1282
1283         intel_engine_context_out(engine);
1284         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1285         intel_gt_pm_put_async(engine->gt);
1286
1287         /*
1288          * If this is part of a virtual engine, its next request may
1289          * have been blocked waiting for access to the active context.
1290          * We have to kick all the siblings again in case we need to
1291          * switch (e.g. the next request is not runnable on this
1292          * engine). Hopefully, we will already have submitted the next
1293          * request before the tasklet runs and do not need to rebuild
1294          * each virtual tree and kick everyone again.
1295          */
1296         if (ce->engine != engine)
1297                 kick_siblings(rq, ce);
1298
1299         intel_context_put(ce);
1300 }
1301
1302 static inline void
1303 execlists_schedule_out(struct i915_request *rq)
1304 {
1305         struct intel_context * const ce = rq->context;
1306         struct intel_engine_cs *cur, *old;
1307
1308         trace_i915_request_out(rq);
1309
1310         old = READ_ONCE(ce->inflight);
1311         do
1312                 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1313         while (!try_cmpxchg(&ce->inflight, &old, cur));
1314         if (!cur)
1315                 __execlists_schedule_out(rq, old);
1316
1317         i915_request_put(rq);
1318 }
1319
1320 static u64 execlists_update_context(struct i915_request *rq)
1321 {
1322         struct intel_context *ce = rq->context;
1323         u64 desc = ce->lrc_desc;
1324         u32 tail, prev;
1325
1326         /*
1327          * WaIdleLiteRestore:bdw,skl
1328          *
1329          * We should never submit the context with the same RING_TAIL twice
1330          * just in case we submit an empty ring, which confuses the HW.
1331          *
1332          * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1333          * the normal request to be able to always advance the RING_TAIL on
1334          * subsequent resubmissions (for lite restore). Should that fail us,
1335          * and we try and submit the same tail again, force the context
1336          * reload.
1337          *
1338          * If we need to return to a preempted context, we need to skip the
1339          * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1340          * HW has a tendency to ignore us rewinding the TAIL to the end of
1341          * an earlier request.
1342          */
1343         tail = intel_ring_set_tail(rq->ring, rq->tail);
1344         prev = ce->lrc_reg_state[CTX_RING_TAIL];
1345         if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1346                 desc |= CTX_DESC_FORCE_RESTORE;
1347         ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1348         rq->tail = rq->wa_tail;
1349
1350         /*
1351          * Make sure the context image is complete before we submit it to HW.
1352          *
1353          * Ostensibly, writes (including the WCB) should be flushed prior to
1354          * an uncached write such as our mmio register access, the empirical
1355          * evidence (esp. on Braswell) suggests that the WC write into memory
1356          * may not be visible to the HW prior to the completion of the UC
1357          * register write and that we may begin execution from the context
1358          * before its image is complete leading to invalid PD chasing.
1359          */
1360         wmb();
1361
1362         ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
1363         return desc;
1364 }
1365
1366 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1367 {
1368         if (execlists->ctrl_reg) {
1369                 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1370                 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1371         } else {
1372                 writel(upper_32_bits(desc), execlists->submit_reg);
1373                 writel(lower_32_bits(desc), execlists->submit_reg);
1374         }
1375 }
1376
1377 static __maybe_unused void
1378 trace_ports(const struct intel_engine_execlists *execlists,
1379             const char *msg,
1380             struct i915_request * const *ports)
1381 {
1382         const struct intel_engine_cs *engine =
1383                 container_of(execlists, typeof(*engine), execlists);
1384
1385         if (!ports[0])
1386                 return;
1387
1388         ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
1389                      ports[0]->fence.context,
1390                      ports[0]->fence.seqno,
1391                      i915_request_completed(ports[0]) ? "!" :
1392                      i915_request_started(ports[0]) ? "*" :
1393                      "",
1394                      ports[1] ? ports[1]->fence.context : 0,
1395                      ports[1] ? ports[1]->fence.seqno : 0);
1396 }
1397
1398 static __maybe_unused bool
1399 assert_pending_valid(const struct intel_engine_execlists *execlists,
1400                      const char *msg)
1401 {
1402         struct i915_request * const *port, *rq;
1403         struct intel_context *ce = NULL;
1404
1405         trace_ports(execlists, msg, execlists->pending);
1406
1407         if (!execlists->pending[0]) {
1408                 GEM_TRACE_ERR("Nothing pending for promotion!\n");
1409                 return false;
1410         }
1411
1412         if (execlists->pending[execlists_num_ports(execlists)]) {
1413                 GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
1414                               execlists_num_ports(execlists));
1415                 return false;
1416         }
1417
1418         for (port = execlists->pending; (rq = *port); port++) {
1419                 unsigned long flags;
1420                 bool ok = true;
1421
1422                 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1423                 GEM_BUG_ON(!i915_request_is_active(rq));
1424
1425                 if (ce == rq->context) {
1426                         GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
1427                                       ce->timeline->fence_context,
1428                                       port - execlists->pending);
1429                         return false;
1430                 }
1431                 ce = rq->context;
1432
1433                 /* Hold tightly onto the lock to prevent concurrent retires! */
1434                 if (!spin_trylock_irqsave(&rq->lock, flags))
1435                         continue;
1436
1437                 if (i915_request_completed(rq))
1438                         goto unlock;
1439
1440                 if (i915_active_is_idle(&ce->active) &&
1441                     !intel_context_is_barrier(ce)) {
1442                         GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
1443                                       ce->timeline->fence_context,
1444                                       port - execlists->pending);
1445                         ok = false;
1446                         goto unlock;
1447                 }
1448
1449                 if (!i915_vma_is_pinned(ce->state)) {
1450                         GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
1451                                       ce->timeline->fence_context,
1452                                       port - execlists->pending);
1453                         ok = false;
1454                         goto unlock;
1455                 }
1456
1457                 if (!i915_vma_is_pinned(ce->ring->vma)) {
1458                         GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
1459                                       ce->timeline->fence_context,
1460                                       port - execlists->pending);
1461                         ok = false;
1462                         goto unlock;
1463                 }
1464
1465 unlock:
1466                 spin_unlock_irqrestore(&rq->lock, flags);
1467                 if (!ok)
1468                         return false;
1469         }
1470
1471         return ce;
1472 }
1473
1474 static void execlists_submit_ports(struct intel_engine_cs *engine)
1475 {
1476         struct intel_engine_execlists *execlists = &engine->execlists;
1477         unsigned int n;
1478
1479         GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1480
1481         /*
1482          * We can skip acquiring intel_runtime_pm_get() here as it was taken
1483          * on our behalf by the request (see i915_gem_mark_busy()) and it will
1484          * not be relinquished until the device is idle (see
1485          * i915_gem_idle_work_handler()). As a precaution, we make sure
1486          * that all ELSP are drained i.e. we have processed the CSB,
1487          * before allowing ourselves to idle and calling intel_runtime_pm_put().
1488          */
1489         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1490
1491         /*
1492          * ELSQ note: the submit queue is not cleared after being submitted
1493          * to the HW so we need to make sure we always clean it up. This is
1494          * currently ensured by the fact that we always write the same number
1495          * of elsq entries, keep this in mind before changing the loop below.
1496          */
1497         for (n = execlists_num_ports(execlists); n--; ) {
1498                 struct i915_request *rq = execlists->pending[n];
1499
1500                 write_desc(execlists,
1501                            rq ? execlists_update_context(rq) : 0,
1502                            n);
1503         }
1504
1505         /* we need to manually load the submit queue */
1506         if (execlists->ctrl_reg)
1507                 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1508 }
1509
1510 static bool ctx_single_port_submission(const struct intel_context *ce)
1511 {
1512         return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1513                 intel_context_force_single_submission(ce));
1514 }
1515
1516 static bool can_merge_ctx(const struct intel_context *prev,
1517                           const struct intel_context *next)
1518 {
1519         if (prev != next)
1520                 return false;
1521
1522         if (ctx_single_port_submission(prev))
1523                 return false;
1524
1525         return true;
1526 }
1527
1528 static bool can_merge_rq(const struct i915_request *prev,
1529                          const struct i915_request *next)
1530 {
1531         GEM_BUG_ON(prev == next);
1532         GEM_BUG_ON(!assert_priority_queue(prev, next));
1533
1534         /*
1535          * We do not submit known completed requests. Therefore if the next
1536          * request is already completed, we can pretend to merge it in
1537          * with the previous context (and we will skip updating the ELSP
1538          * and tracking). Thus hopefully keeping the ELSP full with active
1539          * contexts, despite the best efforts of preempt-to-busy to confuse
1540          * us.
1541          */
1542         if (i915_request_completed(next))
1543                 return true;
1544
1545         if (unlikely((prev->fence.flags ^ next->fence.flags) &
1546                      (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1547                       BIT(I915_FENCE_FLAG_SENTINEL))))
1548                 return false;
1549
1550         if (!can_merge_ctx(prev->context, next->context))
1551                 return false;
1552
1553         return true;
1554 }
1555
1556 static void virtual_update_register_offsets(u32 *regs,
1557                                             struct intel_engine_cs *engine)
1558 {
1559         set_offsets(regs, reg_offsets(engine), engine, false);
1560 }
1561
1562 static bool virtual_matches(const struct virtual_engine *ve,
1563                             const struct i915_request *rq,
1564                             const struct intel_engine_cs *engine)
1565 {
1566         const struct intel_engine_cs *inflight;
1567
1568         if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1569                 return false;
1570
1571         /*
1572          * We track when the HW has completed saving the context image
1573          * (i.e. when we have seen the final CS event switching out of
1574          * the context) and must not overwrite the context image before
1575          * then. This restricts us to only using the active engine
1576          * while the previous virtualized request is inflight (so
1577          * we reuse the register offsets). This is a very small
1578          * hystersis on the greedy seelction algorithm.
1579          */
1580         inflight = intel_context_inflight(&ve->context);
1581         if (inflight && inflight != engine)
1582                 return false;
1583
1584         return true;
1585 }
1586
1587 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
1588                                      struct intel_engine_cs *engine)
1589 {
1590         struct intel_engine_cs *old = ve->siblings[0];
1591
1592         /* All unattached (rq->engine == old) must already be completed */
1593
1594         spin_lock(&old->breadcrumbs.irq_lock);
1595         if (!list_empty(&ve->context.signal_link)) {
1596                 list_move_tail(&ve->context.signal_link,
1597                                &engine->breadcrumbs.signalers);
1598                 intel_engine_signal_breadcrumbs(engine);
1599         }
1600         spin_unlock(&old->breadcrumbs.irq_lock);
1601 }
1602
1603 #define for_each_waiter(p__, rq__) \
1604         list_for_each_entry_lockless(p__, \
1605                                      &(rq__)->sched.waiters_list, \
1606                                      wait_link)
1607
1608 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1609 {
1610         LIST_HEAD(list);
1611
1612         /*
1613          * We want to move the interrupted request to the back of
1614          * the round-robin list (i.e. its priority level), but
1615          * in doing so, we must then move all requests that were in
1616          * flight and were waiting for the interrupted request to
1617          * be run after it again.
1618          */
1619         do {
1620                 struct i915_dependency *p;
1621
1622                 GEM_BUG_ON(i915_request_is_active(rq));
1623                 list_move_tail(&rq->sched.link, pl);
1624
1625                 for_each_waiter(p, rq) {
1626                         struct i915_request *w =
1627                                 container_of(p->waiter, typeof(*w), sched);
1628
1629                         /* Leave semaphores spinning on the other engines */
1630                         if (w->engine != rq->engine)
1631                                 continue;
1632
1633                         /* No waiter should start before its signaler */
1634                         GEM_BUG_ON(i915_request_started(w) &&
1635                                    !i915_request_completed(rq));
1636
1637                         GEM_BUG_ON(i915_request_is_active(w));
1638                         if (!i915_request_is_ready(w))
1639                                 continue;
1640
1641                         if (rq_prio(w) < rq_prio(rq))
1642                                 continue;
1643
1644                         GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1645                         list_move_tail(&w->sched.link, &list);
1646                 }
1647
1648                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1649         } while (rq);
1650 }
1651
1652 static void defer_active(struct intel_engine_cs *engine)
1653 {
1654         struct i915_request *rq;
1655
1656         rq = __unwind_incomplete_requests(engine);
1657         if (!rq)
1658                 return;
1659
1660         defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1661 }
1662
1663 static bool
1664 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
1665 {
1666         int hint;
1667
1668         if (!intel_engine_has_timeslices(engine))
1669                 return false;
1670
1671         hint = engine->execlists.queue_priority_hint;
1672         if (!list_is_last(&rq->sched.link, &engine->active.requests))
1673                 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1674
1675         return hint >= effective_prio(rq);
1676 }
1677
1678 static int
1679 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1680 {
1681         if (list_is_last(&rq->sched.link, &engine->active.requests))
1682                 return INT_MIN;
1683
1684         return rq_prio(list_next_entry(rq, sched.link));
1685 }
1686
1687 static inline unsigned long
1688 timeslice(const struct intel_engine_cs *engine)
1689 {
1690         return READ_ONCE(engine->props.timeslice_duration_ms);
1691 }
1692
1693 static unsigned long
1694 active_timeslice(const struct intel_engine_cs *engine)
1695 {
1696         const struct i915_request *rq = *engine->execlists.active;
1697
1698         if (!rq || i915_request_completed(rq))
1699                 return 0;
1700
1701         if (engine->execlists.switch_priority_hint < effective_prio(rq))
1702                 return 0;
1703
1704         return timeslice(engine);
1705 }
1706
1707 static void set_timeslice(struct intel_engine_cs *engine)
1708 {
1709         if (!intel_engine_has_timeslices(engine))
1710                 return;
1711
1712         set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
1713 }
1714
1715 static void start_timeslice(struct intel_engine_cs *engine)
1716 {
1717         struct intel_engine_execlists *execlists = &engine->execlists;
1718
1719         execlists->switch_priority_hint = execlists->queue_priority_hint;
1720
1721         if (timer_pending(&execlists->timer))
1722                 return;
1723
1724         set_timer_ms(&execlists->timer, timeslice(engine));
1725 }
1726
1727 static void record_preemption(struct intel_engine_execlists *execlists)
1728 {
1729         (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
1730 }
1731
1732 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
1733                                             const struct i915_request *rq)
1734 {
1735         if (!rq)
1736                 return 0;
1737
1738         /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1739         if (unlikely(intel_context_is_banned(rq->context)))
1740                 return 1;
1741
1742         return READ_ONCE(engine->props.preempt_timeout_ms);
1743 }
1744
1745 static void set_preempt_timeout(struct intel_engine_cs *engine,
1746                                 const struct i915_request *rq)
1747 {
1748         if (!intel_engine_has_preempt_reset(engine))
1749                 return;
1750
1751         set_timer_ms(&engine->execlists.preempt,
1752                      active_preempt_timeout(engine, rq));
1753 }
1754
1755 static inline void clear_ports(struct i915_request **ports, int count)
1756 {
1757         memset_p((void **)ports, NULL, count);
1758 }
1759
1760 static void execlists_dequeue(struct intel_engine_cs *engine)
1761 {
1762         struct intel_engine_execlists * const execlists = &engine->execlists;
1763         struct i915_request **port = execlists->pending;
1764         struct i915_request ** const last_port = port + execlists->port_mask;
1765         struct i915_request * const *active;
1766         struct i915_request *last;
1767         struct rb_node *rb;
1768         bool submit = false;
1769
1770         /*
1771          * Hardware submission is through 2 ports. Conceptually each port
1772          * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1773          * static for a context, and unique to each, so we only execute
1774          * requests belonging to a single context from each ring. RING_HEAD
1775          * is maintained by the CS in the context image, it marks the place
1776          * where it got up to last time, and through RING_TAIL we tell the CS
1777          * where we want to execute up to this time.
1778          *
1779          * In this list the requests are in order of execution. Consecutive
1780          * requests from the same context are adjacent in the ringbuffer. We
1781          * can combine these requests into a single RING_TAIL update:
1782          *
1783          *              RING_HEAD...req1...req2
1784          *                                    ^- RING_TAIL
1785          * since to execute req2 the CS must first execute req1.
1786          *
1787          * Our goal then is to point each port to the end of a consecutive
1788          * sequence of requests as being the most optimal (fewest wake ups
1789          * and context switches) submission.
1790          */
1791
1792         for (rb = rb_first_cached(&execlists->virtual); rb; ) {
1793                 struct virtual_engine *ve =
1794                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1795                 struct i915_request *rq = READ_ONCE(ve->request);
1796
1797                 if (!rq) { /* lazily cleanup after another engine handled rq */
1798                         rb_erase_cached(rb, &execlists->virtual);
1799                         RB_CLEAR_NODE(rb);
1800                         rb = rb_first_cached(&execlists->virtual);
1801                         continue;
1802                 }
1803
1804                 if (!virtual_matches(ve, rq, engine)) {
1805                         rb = rb_next(rb);
1806                         continue;
1807                 }
1808
1809                 break;
1810         }
1811
1812         /*
1813          * If the queue is higher priority than the last
1814          * request in the currently active context, submit afresh.
1815          * We will resubmit again afterwards in case we need to split
1816          * the active context to interject the preemption request,
1817          * i.e. we will retrigger preemption following the ack in case
1818          * of trouble.
1819          */
1820         active = READ_ONCE(execlists->active);
1821         while ((last = *active) && i915_request_completed(last))
1822                 active++;
1823
1824         if (last) {
1825                 if (need_preempt(engine, last, rb)) {
1826                         ENGINE_TRACE(engine,
1827                                      "preempting last=%llx:%lld, prio=%d, hint=%d\n",
1828                                      last->fence.context,
1829                                      last->fence.seqno,
1830                                      last->sched.attr.priority,
1831                                      execlists->queue_priority_hint);
1832                         record_preemption(execlists);
1833
1834                         /*
1835                          * Don't let the RING_HEAD advance past the breadcrumb
1836                          * as we unwind (and until we resubmit) so that we do
1837                          * not accidentally tell it to go backwards.
1838                          */
1839                         ring_set_paused(engine, 1);
1840
1841                         /*
1842                          * Note that we have not stopped the GPU at this point,
1843                          * so we are unwinding the incomplete requests as they
1844                          * remain inflight and so by the time we do complete
1845                          * the preemption, some of the unwound requests may
1846                          * complete!
1847                          */
1848                         __unwind_incomplete_requests(engine);
1849
1850                         last = NULL;
1851                 } else if (need_timeslice(engine, last) &&
1852                            timer_expired(&engine->execlists.timer)) {
1853                         ENGINE_TRACE(engine,
1854                                      "expired last=%llx:%lld, prio=%d, hint=%d\n",
1855                                      last->fence.context,
1856                                      last->fence.seqno,
1857                                      last->sched.attr.priority,
1858                                      execlists->queue_priority_hint);
1859
1860                         ring_set_paused(engine, 1);
1861                         defer_active(engine);
1862
1863                         /*
1864                          * Unlike for preemption, if we rewind and continue
1865                          * executing the same context as previously active,
1866                          * the order of execution will remain the same and
1867                          * the tail will only advance. We do not need to
1868                          * force a full context restore, as a lite-restore
1869                          * is sufficient to resample the monotonic TAIL.
1870                          *
1871                          * If we switch to any other context, similarly we
1872                          * will not rewind TAIL of current context, and
1873                          * normal save/restore will preserve state and allow
1874                          * us to later continue executing the same request.
1875                          */
1876                         last = NULL;
1877                 } else {
1878                         /*
1879                          * Otherwise if we already have a request pending
1880                          * for execution after the current one, we can
1881                          * just wait until the next CS event before
1882                          * queuing more. In either case we will force a
1883                          * lite-restore preemption event, but if we wait
1884                          * we hopefully coalesce several updates into a single
1885                          * submission.
1886                          */
1887                         if (!list_is_last(&last->sched.link,
1888                                           &engine->active.requests)) {
1889                                 /*
1890                                  * Even if ELSP[1] is occupied and not worthy
1891                                  * of timeslices, our queue might be.
1892                                  */
1893                                 start_timeslice(engine);
1894                                 return;
1895                         }
1896                 }
1897         }
1898
1899         while (rb) { /* XXX virtual is always taking precedence */
1900                 struct virtual_engine *ve =
1901                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1902                 struct i915_request *rq;
1903
1904                 spin_lock(&ve->base.active.lock);
1905
1906                 rq = ve->request;
1907                 if (unlikely(!rq)) { /* lost the race to a sibling */
1908                         spin_unlock(&ve->base.active.lock);
1909                         rb_erase_cached(rb, &execlists->virtual);
1910                         RB_CLEAR_NODE(rb);
1911                         rb = rb_first_cached(&execlists->virtual);
1912                         continue;
1913                 }
1914
1915                 GEM_BUG_ON(rq != ve->request);
1916                 GEM_BUG_ON(rq->engine != &ve->base);
1917                 GEM_BUG_ON(rq->context != &ve->context);
1918
1919                 if (rq_prio(rq) >= queue_prio(execlists)) {
1920                         if (!virtual_matches(ve, rq, engine)) {
1921                                 spin_unlock(&ve->base.active.lock);
1922                                 rb = rb_next(rb);
1923                                 continue;
1924                         }
1925
1926                         if (last && !can_merge_rq(last, rq)) {
1927                                 spin_unlock(&ve->base.active.lock);
1928                                 start_timeslice(engine);
1929                                 return; /* leave this for another sibling */
1930                         }
1931
1932                         ENGINE_TRACE(engine,
1933                                      "virtual rq=%llx:%lld%s, new engine? %s\n",
1934                                      rq->fence.context,
1935                                      rq->fence.seqno,
1936                                      i915_request_completed(rq) ? "!" :
1937                                      i915_request_started(rq) ? "*" :
1938                                      "",
1939                                      yesno(engine != ve->siblings[0]));
1940
1941                         ve->request = NULL;
1942                         ve->base.execlists.queue_priority_hint = INT_MIN;
1943                         rb_erase_cached(rb, &execlists->virtual);
1944                         RB_CLEAR_NODE(rb);
1945
1946                         GEM_BUG_ON(!(rq->execution_mask & engine->mask));
1947                         rq->engine = engine;
1948
1949                         if (engine != ve->siblings[0]) {
1950                                 u32 *regs = ve->context.lrc_reg_state;
1951                                 unsigned int n;
1952
1953                                 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1954
1955                                 if (!intel_engine_has_relative_mmio(engine))
1956                                         virtual_update_register_offsets(regs,
1957                                                                         engine);
1958
1959                                 if (!list_empty(&ve->context.signals))
1960                                         virtual_xfer_breadcrumbs(ve, engine);
1961
1962                                 /*
1963                                  * Move the bound engine to the top of the list
1964                                  * for future execution. We then kick this
1965                                  * tasklet first before checking others, so that
1966                                  * we preferentially reuse this set of bound
1967                                  * registers.
1968                                  */
1969                                 for (n = 1; n < ve->num_siblings; n++) {
1970                                         if (ve->siblings[n] == engine) {
1971                                                 swap(ve->siblings[n],
1972                                                      ve->siblings[0]);
1973                                                 break;
1974                                         }
1975                                 }
1976
1977                                 GEM_BUG_ON(ve->siblings[0] != engine);
1978                         }
1979
1980                         if (__i915_request_submit(rq)) {
1981                                 submit = true;
1982                                 last = rq;
1983                         }
1984                         i915_request_put(rq);
1985
1986                         /*
1987                          * Hmm, we have a bunch of virtual engine requests,
1988                          * but the first one was already completed (thanks
1989                          * preempt-to-busy!). Keep looking at the veng queue
1990                          * until we have no more relevant requests (i.e.
1991                          * the normal submit queue has higher priority).
1992                          */
1993                         if (!submit) {
1994                                 spin_unlock(&ve->base.active.lock);
1995                                 rb = rb_first_cached(&execlists->virtual);
1996                                 continue;
1997                         }
1998                 }
1999
2000                 spin_unlock(&ve->base.active.lock);
2001                 break;
2002         }
2003
2004         while ((rb = rb_first_cached(&execlists->queue))) {
2005                 struct i915_priolist *p = to_priolist(rb);
2006                 struct i915_request *rq, *rn;
2007                 int i;
2008
2009                 priolist_for_each_request_consume(rq, rn, p, i) {
2010                         bool merge = true;
2011
2012                         /*
2013                          * Can we combine this request with the current port?
2014                          * It has to be the same context/ringbuffer and not
2015                          * have any exceptions (e.g. GVT saying never to
2016                          * combine contexts).
2017                          *
2018                          * If we can combine the requests, we can execute both
2019                          * by updating the RING_TAIL to point to the end of the
2020                          * second request, and so we never need to tell the
2021                          * hardware about the first.
2022                          */
2023                         if (last && !can_merge_rq(last, rq)) {
2024                                 /*
2025                                  * If we are on the second port and cannot
2026                                  * combine this request with the last, then we
2027                                  * are done.
2028                                  */
2029                                 if (port == last_port)
2030                                         goto done;
2031
2032                                 /*
2033                                  * We must not populate both ELSP[] with the
2034                                  * same LRCA, i.e. we must submit 2 different
2035                                  * contexts if we submit 2 ELSP.
2036                                  */
2037                                 if (last->context == rq->context)
2038                                         goto done;
2039
2040                                 if (i915_request_has_sentinel(last))
2041                                         goto done;
2042
2043                                 /*
2044                                  * If GVT overrides us we only ever submit
2045                                  * port[0], leaving port[1] empty. Note that we
2046                                  * also have to be careful that we don't queue
2047                                  * the same context (even though a different
2048                                  * request) to the second port.
2049                                  */
2050                                 if (ctx_single_port_submission(last->context) ||
2051                                     ctx_single_port_submission(rq->context))
2052                                         goto done;
2053
2054                                 merge = false;
2055                         }
2056
2057                         if (__i915_request_submit(rq)) {
2058                                 if (!merge) {
2059                                         *port = execlists_schedule_in(last, port - execlists->pending);
2060                                         port++;
2061                                         last = NULL;
2062                                 }
2063
2064                                 GEM_BUG_ON(last &&
2065                                            !can_merge_ctx(last->context,
2066                                                           rq->context));
2067
2068                                 submit = true;
2069                                 last = rq;
2070                         }
2071                 }
2072
2073                 rb_erase_cached(&p->node, &execlists->queue);
2074                 i915_priolist_free(p);
2075         }
2076
2077 done:
2078         /*
2079          * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2080          *
2081          * We choose the priority hint such that if we add a request of greater
2082          * priority than this, we kick the submission tasklet to decide on
2083          * the right order of submitting the requests to hardware. We must
2084          * also be prepared to reorder requests as they are in-flight on the
2085          * HW. We derive the priority hint then as the first "hole" in
2086          * the HW submission ports and if there are no available slots,
2087          * the priority of the lowest executing request, i.e. last.
2088          *
2089          * When we do receive a higher priority request ready to run from the
2090          * user, see queue_request(), the priority hint is bumped to that
2091          * request triggering preemption on the next dequeue (or subsequent
2092          * interrupt for secondary ports).
2093          */
2094         execlists->queue_priority_hint = queue_prio(execlists);
2095
2096         if (submit) {
2097                 *port = execlists_schedule_in(last, port - execlists->pending);
2098                 execlists->switch_priority_hint =
2099                         switch_prio(engine, *execlists->pending);
2100
2101                 /*
2102                  * Skip if we ended up with exactly the same set of requests,
2103                  * e.g. trying to timeslice a pair of ordered contexts
2104                  */
2105                 if (!memcmp(active, execlists->pending,
2106                             (port - execlists->pending + 1) * sizeof(*port))) {
2107                         do
2108                                 execlists_schedule_out(fetch_and_zero(port));
2109                         while (port-- != execlists->pending);
2110
2111                         goto skip_submit;
2112                 }
2113                 clear_ports(port + 1, last_port - port);
2114
2115                 execlists_submit_ports(engine);
2116                 set_preempt_timeout(engine, *active);
2117         } else {
2118 skip_submit:
2119                 ring_set_paused(engine, 0);
2120         }
2121 }
2122
2123 static void
2124 cancel_port_requests(struct intel_engine_execlists * const execlists)
2125 {
2126         struct i915_request * const *port;
2127
2128         for (port = execlists->pending; *port; port++)
2129                 execlists_schedule_out(*port);
2130         clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2131
2132         /* Mark the end of active before we overwrite *active */
2133         for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2134                 execlists_schedule_out(*port);
2135         clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2136
2137         WRITE_ONCE(execlists->active, execlists->inflight);
2138 }
2139
2140 static inline void
2141 invalidate_csb_entries(const u32 *first, const u32 *last)
2142 {
2143         clflush((void *)first);
2144         clflush((void *)last);
2145 }
2146
2147 static inline bool
2148 reset_in_progress(const struct intel_engine_execlists *execlists)
2149 {
2150         return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
2151 }
2152
2153 /*
2154  * Starting with Gen12, the status has a new format:
2155  *
2156  *     bit  0:     switched to new queue
2157  *     bit  1:     reserved
2158  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2159  *                 switch detail is set to "wait on semaphore"
2160  *     bits 3-5:   engine class
2161  *     bits 6-11:  engine instance
2162  *     bits 12-14: reserved
2163  *     bits 15-25: sw context id of the lrc the GT switched to
2164  *     bits 26-31: sw counter of the lrc the GT switched to
2165  *     bits 32-35: context switch detail
2166  *                  - 0: ctx complete
2167  *                  - 1: wait on sync flip
2168  *                  - 2: wait on vblank
2169  *                  - 3: wait on scanline
2170  *                  - 4: wait on semaphore
2171  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2172  *                       WAIT_FOR_EVENT)
2173  *     bit  36:    reserved
2174  *     bits 37-43: wait detail (for switch detail 1 to 4)
2175  *     bits 44-46: reserved
2176  *     bits 47-57: sw context id of the lrc the GT switched away from
2177  *     bits 58-63: sw counter of the lrc the GT switched away from
2178  */
2179 static inline bool
2180 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2181 {
2182         u32 lower_dw = csb[0];
2183         u32 upper_dw = csb[1];
2184         bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2185         bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2186         bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2187
2188         /*
2189          * The context switch detail is not guaranteed to be 5 when a preemption
2190          * occurs, so we can't just check for that. The check below works for
2191          * all the cases we care about, including preemptions of WAIT
2192          * instructions and lite-restore. Preempt-to-idle via the CTRL register
2193          * would require some extra handling, but we don't support that.
2194          */
2195         if (!ctx_away_valid || new_queue) {
2196                 GEM_BUG_ON(!ctx_to_valid);
2197                 return true;
2198         }
2199
2200         /*
2201          * switch detail = 5 is covered by the case above and we do not expect a
2202          * context switch on an unsuccessful wait instruction since we always
2203          * use polling mode.
2204          */
2205         GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2206         return false;
2207 }
2208
2209 static inline bool
2210 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2211 {
2212         return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2213 }
2214
2215 static void process_csb(struct intel_engine_cs *engine)
2216 {
2217         struct intel_engine_execlists * const execlists = &engine->execlists;
2218         const u32 * const buf = execlists->csb_status;
2219         const u8 num_entries = execlists->csb_size;
2220         u8 head, tail;
2221
2222         /*
2223          * As we modify our execlists state tracking we require exclusive
2224          * access. Either we are inside the tasklet, or the tasklet is disabled
2225          * and we assume that is only inside the reset paths and so serialised.
2226          */
2227         GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2228                    !reset_in_progress(execlists));
2229         GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2230
2231         /*
2232          * Note that csb_write, csb_status may be either in HWSP or mmio.
2233          * When reading from the csb_write mmio register, we have to be
2234          * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2235          * the low 4bits. As it happens we know the next 4bits are always
2236          * zero and so we can simply masked off the low u8 of the register
2237          * and treat it identically to reading from the HWSP (without having
2238          * to use explicit shifting and masking, and probably bifurcating
2239          * the code to handle the legacy mmio read).
2240          */
2241         head = execlists->csb_head;
2242         tail = READ_ONCE(*execlists->csb_write);
2243         ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2244         if (unlikely(head == tail))
2245                 return;
2246
2247         /*
2248          * Hopefully paired with a wmb() in HW!
2249          *
2250          * We must complete the read of the write pointer before any reads
2251          * from the CSB, so that we do not see stale values. Without an rmb
2252          * (lfence) the HW may speculatively perform the CSB[] reads *before*
2253          * we perform the READ_ONCE(*csb_write).
2254          */
2255         rmb();
2256
2257         do {
2258                 bool promote;
2259
2260                 if (++head == num_entries)
2261                         head = 0;
2262
2263                 /*
2264                  * We are flying near dragons again.
2265                  *
2266                  * We hold a reference to the request in execlist_port[]
2267                  * but no more than that. We are operating in softirq
2268                  * context and so cannot hold any mutex or sleep. That
2269                  * prevents us stopping the requests we are processing
2270                  * in port[] from being retired simultaneously (the
2271                  * breadcrumb will be complete before we see the
2272                  * context-switch). As we only hold the reference to the
2273                  * request, any pointer chasing underneath the request
2274                  * is subject to a potential use-after-free. Thus we
2275                  * store all of the bookkeeping within port[] as
2276                  * required, and avoid using unguarded pointers beneath
2277                  * request itself. The same applies to the atomic
2278                  * status notifier.
2279                  */
2280
2281                 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2282                              head, buf[2 * head + 0], buf[2 * head + 1]);
2283
2284                 if (INTEL_GEN(engine->i915) >= 12)
2285                         promote = gen12_csb_parse(execlists, buf + 2 * head);
2286                 else
2287                         promote = gen8_csb_parse(execlists, buf + 2 * head);
2288                 if (promote) {
2289                         struct i915_request * const *old = execlists->active;
2290
2291                         /* Point active to the new ELSP; prevent overwriting */
2292                         WRITE_ONCE(execlists->active, execlists->pending);
2293
2294                         if (!inject_preempt_hang(execlists))
2295                                 ring_set_paused(engine, 0);
2296
2297                         /* cancel old inflight, prepare for switch */
2298                         trace_ports(execlists, "preempted", old);
2299                         while (*old)
2300                                 execlists_schedule_out(*old++);
2301
2302                         /* switch pending to inflight */
2303                         GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2304                         WRITE_ONCE(execlists->active,
2305                                    memcpy(execlists->inflight,
2306                                           execlists->pending,
2307                                           execlists_num_ports(execlists) *
2308                                           sizeof(*execlists->pending)));
2309
2310                         WRITE_ONCE(execlists->pending[0], NULL);
2311                 } else {
2312                         GEM_BUG_ON(!*execlists->active);
2313
2314                         /* port0 completed, advanced to port1 */
2315                         trace_ports(execlists, "completed", execlists->active);
2316
2317                         /*
2318                          * We rely on the hardware being strongly
2319                          * ordered, that the breadcrumb write is
2320                          * coherent (visible from the CPU) before the
2321                          * user interrupt and CSB is processed.
2322                          */
2323                         GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
2324                                    !reset_in_progress(execlists));
2325                         execlists_schedule_out(*execlists->active++);
2326
2327                         GEM_BUG_ON(execlists->active - execlists->inflight >
2328                                    execlists_num_ports(execlists));
2329                 }
2330         } while (head != tail);
2331
2332         execlists->csb_head = head;
2333         set_timeslice(engine);
2334
2335         /*
2336          * Gen11 has proven to fail wrt global observation point between
2337          * entry and tail update, failing on the ordering and thus
2338          * we see an old entry in the context status buffer.
2339          *
2340          * Forcibly evict out entries for the next gpu csb update,
2341          * to increase the odds that we get a fresh entries with non
2342          * working hardware. The cost for doing so comes out mostly with
2343          * the wash as hardware, working or not, will need to do the
2344          * invalidation before.
2345          */
2346         invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2347 }
2348
2349 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2350 {
2351         lockdep_assert_held(&engine->active.lock);
2352         if (!engine->execlists.pending[0]) {
2353                 rcu_read_lock(); /* protect peeking at execlists->active */
2354                 execlists_dequeue(engine);
2355                 rcu_read_unlock();
2356         }
2357 }
2358
2359 static void __execlists_hold(struct i915_request *rq)
2360 {
2361         LIST_HEAD(list);
2362
2363         do {
2364                 struct i915_dependency *p;
2365
2366                 if (i915_request_is_active(rq))
2367                         __i915_request_unsubmit(rq);
2368
2369                 RQ_TRACE(rq, "on hold\n");
2370                 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2371                 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2372                 i915_request_set_hold(rq);
2373
2374                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2375                         struct i915_request *w =
2376                                 container_of(p->waiter, typeof(*w), sched);
2377
2378                         /* Leave semaphores spinning on the other engines */
2379                         if (w->engine != rq->engine)
2380                                 continue;
2381
2382                         if (!i915_request_is_ready(w))
2383                                 continue;
2384
2385                         if (i915_request_completed(w))
2386                                 continue;
2387
2388                         if (i915_request_on_hold(rq))
2389                                 continue;
2390
2391                         list_move_tail(&w->sched.link, &list);
2392                 }
2393
2394                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2395         } while (rq);
2396 }
2397
2398 static bool execlists_hold(struct intel_engine_cs *engine,
2399                            struct i915_request *rq)
2400 {
2401         spin_lock_irq(&engine->active.lock);
2402
2403         if (i915_request_completed(rq)) { /* too late! */
2404                 rq = NULL;
2405                 goto unlock;
2406         }
2407
2408         if (rq->engine != engine) { /* preempted virtual engine */
2409                 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2410
2411                 /*
2412                  * intel_context_inflight() is only protected by virtue
2413                  * of process_csb() being called only by the tasklet (or
2414                  * directly from inside reset while the tasklet is suspended).
2415                  * Assert that neither of those are allowed to run while we
2416                  * poke at the request queues.
2417                  */
2418                 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2419
2420                 /*
2421                  * An unsubmitted request along a virtual engine will
2422                  * remain on the active (this) engine until we are able
2423                  * to process the context switch away (and so mark the
2424                  * context as no longer in flight). That cannot have happened
2425                  * yet, otherwise we would not be hanging!
2426                  */
2427                 spin_lock(&ve->base.active.lock);
2428                 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2429                 GEM_BUG_ON(ve->request != rq);
2430                 ve->request = NULL;
2431                 spin_unlock(&ve->base.active.lock);
2432                 i915_request_put(rq);
2433
2434                 rq->engine = engine;
2435         }
2436
2437         /*
2438          * Transfer this request onto the hold queue to prevent it
2439          * being resumbitted to HW (and potentially completed) before we have
2440          * released it. Since we may have already submitted following
2441          * requests, we need to remove those as well.
2442          */
2443         GEM_BUG_ON(i915_request_on_hold(rq));
2444         GEM_BUG_ON(rq->engine != engine);
2445         __execlists_hold(rq);
2446
2447 unlock:
2448         spin_unlock_irq(&engine->active.lock);
2449         return rq;
2450 }
2451
2452 static bool hold_request(const struct i915_request *rq)
2453 {
2454         struct i915_dependency *p;
2455
2456         /*
2457          * If one of our ancestors is on hold, we must also be on hold,
2458          * otherwise we will bypass it and execute before it.
2459          */
2460         list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
2461                 const struct i915_request *s =
2462                         container_of(p->signaler, typeof(*s), sched);
2463
2464                 if (s->engine != rq->engine)
2465                         continue;
2466
2467                 if (i915_request_on_hold(s))
2468                         return true;
2469         }
2470
2471         return false;
2472 }
2473
2474 static void __execlists_unhold(struct i915_request *rq)
2475 {
2476         LIST_HEAD(list);
2477
2478         do {
2479                 struct i915_dependency *p;
2480
2481                 GEM_BUG_ON(!i915_request_on_hold(rq));
2482                 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2483
2484                 i915_request_clear_hold(rq);
2485                 list_move_tail(&rq->sched.link,
2486                                i915_sched_lookup_priolist(rq->engine,
2487                                                           rq_prio(rq)));
2488                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2489                 RQ_TRACE(rq, "hold release\n");
2490
2491                 /* Also release any children on this engine that are ready */
2492                 list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
2493                         struct i915_request *w =
2494                                 container_of(p->waiter, typeof(*w), sched);
2495
2496                         if (w->engine != rq->engine)
2497                                 continue;
2498
2499                         if (!i915_request_on_hold(rq))
2500                                 continue;
2501
2502                         /* Check that no other parents are also on hold */
2503                         if (hold_request(rq))
2504                                 continue;
2505
2506                         list_move_tail(&w->sched.link, &list);
2507                 }
2508
2509                 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2510         } while (rq);
2511 }
2512
2513 static void execlists_unhold(struct intel_engine_cs *engine,
2514                              struct i915_request *rq)
2515 {
2516         spin_lock_irq(&engine->active.lock);
2517
2518         /*
2519          * Move this request back to the priority queue, and all of its
2520          * children and grandchildren that were suspended along with it.
2521          */
2522         __execlists_unhold(rq);
2523
2524         if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2525                 engine->execlists.queue_priority_hint = rq_prio(rq);
2526                 tasklet_hi_schedule(&engine->execlists.tasklet);
2527         }
2528
2529         spin_unlock_irq(&engine->active.lock);
2530 }
2531
2532 struct execlists_capture {
2533         struct work_struct work;
2534         struct i915_request *rq;
2535         struct i915_gpu_coredump *error;
2536 };
2537
2538 static void execlists_capture_work(struct work_struct *work)
2539 {
2540         struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2541         const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2542         struct intel_engine_cs *engine = cap->rq->engine;
2543         struct intel_gt_coredump *gt = cap->error->gt;
2544         struct intel_engine_capture_vma *vma;
2545
2546         /* Compress all the objects attached to the request, slow! */
2547         vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2548         if (vma) {
2549                 struct i915_vma_compress *compress =
2550                         i915_vma_capture_prepare(gt);
2551
2552                 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2553                 i915_vma_capture_finish(gt, compress);
2554         }
2555
2556         gt->simulated = gt->engine->simulated;
2557         cap->error->simulated = gt->simulated;
2558
2559         /* Publish the error state, and announce it to the world */
2560         i915_error_state_store(cap->error);
2561         i915_gpu_coredump_put(cap->error);
2562
2563         /* Return this request and all that depend upon it for signaling */
2564         execlists_unhold(engine, cap->rq);
2565         i915_request_put(cap->rq);
2566
2567         kfree(cap);
2568 }
2569
2570 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2571 {
2572         const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2573         struct execlists_capture *cap;
2574
2575         cap = kmalloc(sizeof(*cap), gfp);
2576         if (!cap)
2577                 return NULL;
2578
2579         cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2580         if (!cap->error)
2581                 goto err_cap;
2582
2583         cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2584         if (!cap->error->gt)
2585                 goto err_gpu;
2586
2587         cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2588         if (!cap->error->gt->engine)
2589                 goto err_gt;
2590
2591         return cap;
2592
2593 err_gt:
2594         kfree(cap->error->gt);
2595 err_gpu:
2596         kfree(cap->error);
2597 err_cap:
2598         kfree(cap);
2599         return NULL;
2600 }
2601
2602 static bool execlists_capture(struct intel_engine_cs *engine)
2603 {
2604         struct execlists_capture *cap;
2605
2606         if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
2607                 return true;
2608
2609         /*
2610          * We need to _quickly_ capture the engine state before we reset.
2611          * We are inside an atomic section (softirq) here and we are delaying
2612          * the forced preemption event.
2613          */
2614         cap = capture_regs(engine);
2615         if (!cap)
2616                 return true;
2617
2618         cap->rq = execlists_active(&engine->execlists);
2619         GEM_BUG_ON(!cap->rq);
2620
2621         rcu_read_lock();
2622         cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2623         cap->rq = i915_request_get_rcu(cap->rq);
2624         rcu_read_unlock();
2625         if (!cap->rq)
2626                 goto err_free;
2627
2628         /*
2629          * Remove the request from the execlists queue, and take ownership
2630          * of the request. We pass it to our worker who will _slowly_ compress
2631          * all the pages the _user_ requested for debugging their batch, after
2632          * which we return it to the queue for signaling.
2633          *
2634          * By removing them from the execlists queue, we also remove the
2635          * requests from being processed by __unwind_incomplete_requests()
2636          * during the intel_engine_reset(), and so they will *not* be replayed
2637          * afterwards.
2638          *
2639          * Note that because we have not yet reset the engine at this point,
2640          * it is possible for the request that we have identified as being
2641          * guilty, did in fact complete and we will then hit an arbitration
2642          * point allowing the outstanding preemption to succeed. The likelihood
2643          * of that is very low (as capturing of the engine registers should be
2644          * fast enough to run inside an irq-off atomic section!), so we will
2645          * simply hold that request accountable for being non-preemptible
2646          * long enough to force the reset.
2647          */
2648         if (!execlists_hold(engine, cap->rq))
2649                 goto err_rq;
2650
2651         INIT_WORK(&cap->work, execlists_capture_work);
2652         schedule_work(&cap->work);
2653         return true;
2654
2655 err_rq:
2656         i915_request_put(cap->rq);
2657 err_free:
2658         i915_gpu_coredump_put(cap->error);
2659         kfree(cap);
2660         return false;
2661 }
2662
2663 static noinline void preempt_reset(struct intel_engine_cs *engine)
2664 {
2665         const unsigned int bit = I915_RESET_ENGINE + engine->id;
2666         unsigned long *lock = &engine->gt->reset.flags;
2667
2668         if (i915_modparams.reset < 3)
2669                 return;
2670
2671         if (test_and_set_bit(bit, lock))
2672                 return;
2673
2674         /* Mark this tasklet as disabled to avoid waiting for it to complete */
2675         tasklet_disable_nosync(&engine->execlists.tasklet);
2676
2677         ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
2678                      READ_ONCE(engine->props.preempt_timeout_ms),
2679                      jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
2680
2681         ring_set_paused(engine, 1); /* Freeze the current request in place */
2682         if (execlists_capture(engine))
2683                 intel_engine_reset(engine, "preemption time out");
2684         else
2685                 ring_set_paused(engine, 0);
2686
2687         tasklet_enable(&engine->execlists.tasklet);
2688         clear_and_wake_up_bit(bit, lock);
2689 }
2690
2691 static bool preempt_timeout(const struct intel_engine_cs *const engine)
2692 {
2693         const struct timer_list *t = &engine->execlists.preempt;
2694
2695         if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2696                 return false;
2697
2698         if (!timer_expired(t))
2699                 return false;
2700
2701         return READ_ONCE(engine->execlists.pending[0]);
2702 }
2703
2704 /*
2705  * Check the unread Context Status Buffers and manage the submission of new
2706  * contexts to the ELSP accordingly.
2707  */
2708 static void execlists_submission_tasklet(unsigned long data)
2709 {
2710         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
2711         bool timeout = preempt_timeout(engine);
2712
2713         process_csb(engine);
2714         if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
2715                 unsigned long flags;
2716
2717                 spin_lock_irqsave(&engine->active.lock, flags);
2718                 __execlists_submission_tasklet(engine);
2719                 spin_unlock_irqrestore(&engine->active.lock, flags);
2720
2721                 /* Recheck after serialising with direct-submission */
2722                 if (timeout && preempt_timeout(engine))
2723                         preempt_reset(engine);
2724         }
2725 }
2726
2727 static void __execlists_kick(struct intel_engine_execlists *execlists)
2728 {
2729         /* Kick the tasklet for some interrupt coalescing and reset handling */
2730         tasklet_hi_schedule(&execlists->tasklet);
2731 }
2732
2733 #define execlists_kick(t, member) \
2734         __execlists_kick(container_of(t, struct intel_engine_execlists, member))
2735
2736 static void execlists_timeslice(struct timer_list *timer)
2737 {
2738         execlists_kick(timer, timer);
2739 }
2740
2741 static void execlists_preempt(struct timer_list *timer)
2742 {
2743         execlists_kick(timer, preempt);
2744 }
2745
2746 static void queue_request(struct intel_engine_cs *engine,
2747                           struct i915_request *rq)
2748 {
2749         GEM_BUG_ON(!list_empty(&rq->sched.link));
2750         list_add_tail(&rq->sched.link,
2751                       i915_sched_lookup_priolist(engine, rq_prio(rq)));
2752         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2753 }
2754
2755 static void __submit_queue_imm(struct intel_engine_cs *engine)
2756 {
2757         struct intel_engine_execlists * const execlists = &engine->execlists;
2758
2759         if (reset_in_progress(execlists))
2760                 return; /* defer until we restart the engine following reset */
2761
2762         if (execlists->tasklet.func == execlists_submission_tasklet)
2763                 __execlists_submission_tasklet(engine);
2764         else
2765                 tasklet_hi_schedule(&execlists->tasklet);
2766 }
2767
2768 static void submit_queue(struct intel_engine_cs *engine,
2769                          const struct i915_request *rq)
2770 {
2771         struct intel_engine_execlists *execlists = &engine->execlists;
2772
2773         if (rq_prio(rq) <= execlists->queue_priority_hint)
2774                 return;
2775
2776         execlists->queue_priority_hint = rq_prio(rq);
2777         __submit_queue_imm(engine);
2778 }
2779
2780 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
2781                              const struct i915_request *rq)
2782 {
2783         GEM_BUG_ON(i915_request_on_hold(rq));
2784         return !list_empty(&engine->active.hold) && hold_request(rq);
2785 }
2786
2787 static void execlists_submit_request(struct i915_request *request)
2788 {
2789         struct intel_engine_cs *engine = request->engine;
2790         unsigned long flags;
2791
2792         /* Will be called from irq-context when using foreign fences. */
2793         spin_lock_irqsave(&engine->active.lock, flags);
2794
2795         if (unlikely(ancestor_on_hold(engine, request))) {
2796                 list_add_tail(&request->sched.link, &engine->active.hold);
2797                 i915_request_set_hold(request);
2798         } else {
2799                 queue_request(engine, request);
2800
2801                 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
2802                 GEM_BUG_ON(list_empty(&request->sched.link));
2803
2804                 submit_queue(engine, request);
2805         }
2806
2807         spin_unlock_irqrestore(&engine->active.lock, flags);
2808 }
2809
2810 static void __execlists_context_fini(struct intel_context *ce)
2811 {
2812         intel_ring_put(ce->ring);
2813         i915_vma_put(ce->state);
2814 }
2815
2816 static void execlists_context_destroy(struct kref *kref)
2817 {
2818         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2819
2820         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
2821         GEM_BUG_ON(intel_context_is_pinned(ce));
2822
2823         if (ce->state)
2824                 __execlists_context_fini(ce);
2825
2826         intel_context_fini(ce);
2827         intel_context_free(ce);
2828 }
2829
2830 static void
2831 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
2832 {
2833         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2834                 return;
2835
2836         vaddr += engine->context_size;
2837
2838         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
2839 }
2840
2841 static void
2842 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
2843 {
2844         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2845                 return;
2846
2847         vaddr += engine->context_size;
2848
2849         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
2850                 dev_err_once(engine->i915->drm.dev,
2851                              "%s context redzone overwritten!\n",
2852                              engine->name);
2853 }
2854
2855 static void execlists_context_unpin(struct intel_context *ce)
2856 {
2857         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
2858                       ce->engine);
2859
2860         i915_gem_object_unpin_map(ce->state->obj);
2861 }
2862
2863 static void
2864 __execlists_update_reg_state(const struct intel_context *ce,
2865                              const struct intel_engine_cs *engine,
2866                              u32 head)
2867 {
2868         struct intel_ring *ring = ce->ring;
2869         u32 *regs = ce->lrc_reg_state;
2870
2871         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
2872         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
2873
2874         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
2875         regs[CTX_RING_HEAD] = head;
2876         regs[CTX_RING_TAIL] = ring->tail;
2877
2878         /* RPCS */
2879         if (engine->class == RENDER_CLASS) {
2880                 regs[CTX_R_PWR_CLK_STATE] =
2881                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
2882
2883                 i915_oa_init_reg_state(ce, engine);
2884         }
2885 }
2886
2887 static int
2888 __execlists_context_pin(struct intel_context *ce,
2889                         struct intel_engine_cs *engine)
2890 {
2891         void *vaddr;
2892
2893         GEM_BUG_ON(!ce->state);
2894         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
2895
2896         vaddr = i915_gem_object_pin_map(ce->state->obj,
2897                                         i915_coherent_map_type(engine->i915) |
2898                                         I915_MAP_OVERRIDE);
2899         if (IS_ERR(vaddr))
2900                 return PTR_ERR(vaddr);
2901
2902         ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
2903         ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2904         __execlists_update_reg_state(ce, engine, ce->ring->tail);
2905
2906         return 0;
2907 }
2908
2909 static int execlists_context_pin(struct intel_context *ce)
2910 {
2911         return __execlists_context_pin(ce, ce->engine);
2912 }
2913
2914 static int execlists_context_alloc(struct intel_context *ce)
2915 {
2916         return __execlists_context_alloc(ce, ce->engine);
2917 }
2918
2919 static void execlists_context_reset(struct intel_context *ce)
2920 {
2921         CE_TRACE(ce, "reset\n");
2922         GEM_BUG_ON(!intel_context_is_pinned(ce));
2923
2924         /*
2925          * Because we emit WA_TAIL_DWORDS there may be a disparity
2926          * between our bookkeeping in ce->ring->head and ce->ring->tail and
2927          * that stored in context. As we only write new commands from
2928          * ce->ring->tail onwards, everything before that is junk. If the GPU
2929          * starts reading from its RING_HEAD from the context, it may try to
2930          * execute that junk and die.
2931          *
2932          * The contexts that are stilled pinned on resume belong to the
2933          * kernel, and are local to each engine. All other contexts will
2934          * have their head/tail sanitized upon pinning before use, so they
2935          * will never see garbage,
2936          *
2937          * So to avoid that we reset the context images upon resume. For
2938          * simplicity, we just zero everything out.
2939          */
2940         intel_ring_reset(ce->ring, ce->ring->emit);
2941
2942         /* Scrub away the garbage */
2943         execlists_init_reg_state(ce->lrc_reg_state,
2944                                  ce, ce->engine, ce->ring, true);
2945         __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
2946
2947         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
2948 }
2949
2950 static const struct intel_context_ops execlists_context_ops = {
2951         .alloc = execlists_context_alloc,
2952
2953         .pin = execlists_context_pin,
2954         .unpin = execlists_context_unpin,
2955
2956         .enter = intel_context_enter_engine,
2957         .exit = intel_context_exit_engine,
2958
2959         .reset = execlists_context_reset,
2960         .destroy = execlists_context_destroy,
2961 };
2962
2963 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
2964 {
2965         u32 *cs;
2966
2967         GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
2968
2969         cs = intel_ring_begin(rq, 6);
2970         if (IS_ERR(cs))
2971                 return PTR_ERR(cs);
2972
2973         /*
2974          * Check if we have been preempted before we even get started.
2975          *
2976          * After this point i915_request_started() reports true, even if
2977          * we get preempted and so are no longer running.
2978          */
2979         *cs++ = MI_ARB_CHECK;
2980         *cs++ = MI_NOOP;
2981
2982         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2983         *cs++ = i915_request_timeline(rq)->hwsp_offset;
2984         *cs++ = 0;
2985         *cs++ = rq->fence.seqno - 1;
2986
2987         intel_ring_advance(rq, cs);
2988
2989         /* Record the updated position of the request's payload */
2990         rq->infix = intel_ring_offset(rq, cs);
2991
2992         return 0;
2993 }
2994
2995 static int execlists_request_alloc(struct i915_request *request)
2996 {
2997         int ret;
2998
2999         GEM_BUG_ON(!intel_context_is_pinned(request->context));
3000
3001         /*
3002          * Flush enough space to reduce the likelihood of waiting after
3003          * we start building the request - in which case we will just
3004          * have to repeat work.
3005          */
3006         request->reserved_space += EXECLISTS_REQUEST_SIZE;
3007
3008         /*
3009          * Note that after this point, we have committed to using
3010          * this request as it is being used to both track the
3011          * state of engine initialisation and liveness of the
3012          * golden renderstate above. Think twice before you try
3013          * to cancel/unwind this request now.
3014          */
3015
3016         /* Unconditionally invalidate GPU caches and TLBs. */
3017         ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3018         if (ret)
3019                 return ret;
3020
3021         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3022         return 0;
3023 }
3024
3025 /*
3026  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3027  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3028  * but there is a slight complication as this is applied in WA batch where the
3029  * values are only initialized once so we cannot take register value at the
3030  * beginning and reuse it further; hence we save its value to memory, upload a
3031  * constant value with bit21 set and then we restore it back with the saved value.
3032  * To simplify the WA, a constant value is formed by using the default value
3033  * of this register. This shouldn't be a problem because we are only modifying
3034  * it for a short period and this batch in non-premptible. We can ofcourse
3035  * use additional instructions that read the actual value of the register
3036  * at that time and set our bit of interest but it makes the WA complicated.
3037  *
3038  * This WA is also required for Gen9 so extracting as a function avoids
3039  * code duplication.
3040  */
3041 static u32 *
3042 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3043 {
3044         /* NB no one else is allowed to scribble over scratch + 256! */
3045         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3046         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3047         *batch++ = intel_gt_scratch_offset(engine->gt,
3048                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3049         *batch++ = 0;
3050
3051         *batch++ = MI_LOAD_REGISTER_IMM(1);
3052         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3053         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3054
3055         batch = gen8_emit_pipe_control(batch,
3056                                        PIPE_CONTROL_CS_STALL |
3057                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
3058                                        0);
3059
3060         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3061         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3062         *batch++ = intel_gt_scratch_offset(engine->gt,
3063                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3064         *batch++ = 0;
3065
3066         return batch;
3067 }
3068
3069 /*
3070  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3071  * initialized at the beginning and shared across all contexts but this field
3072  * helps us to have multiple batches at different offsets and select them based
3073  * on a criteria. At the moment this batch always start at the beginning of the page
3074  * and at this point we don't have multiple wa_ctx batch buffers.
3075  *
3076  * The number of WA applied are not known at the beginning; we use this field
3077  * to return the no of DWORDS written.
3078  *
3079  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3080  * so it adds NOOPs as padding to make it cacheline aligned.
3081  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3082  * makes a complete batch buffer.
3083  */
3084 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3085 {
3086         /* WaDisableCtxRestoreArbitration:bdw,chv */
3087         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3088
3089         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3090         if (IS_BROADWELL(engine->i915))
3091                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3092
3093         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3094         /* Actual scratch location is at 128 bytes offset */
3095         batch = gen8_emit_pipe_control(batch,
3096                                        PIPE_CONTROL_FLUSH_L3 |
3097                                        PIPE_CONTROL_STORE_DATA_INDEX |
3098                                        PIPE_CONTROL_CS_STALL |
3099                                        PIPE_CONTROL_QW_WRITE,
3100                                        LRC_PPHWSP_SCRATCH_ADDR);
3101
3102         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3103
3104         /* Pad to end of cacheline */
3105         while ((unsigned long)batch % CACHELINE_BYTES)
3106                 *batch++ = MI_NOOP;
3107
3108         /*
3109          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3110          * execution depends on the length specified in terms of cache lines
3111          * in the register CTX_RCS_INDIRECT_CTX
3112          */
3113
3114         return batch;
3115 }
3116
3117 struct lri {
3118         i915_reg_t reg;
3119         u32 value;
3120 };
3121
3122 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3123 {
3124         GEM_BUG_ON(!count || count > 63);
3125
3126         *batch++ = MI_LOAD_REGISTER_IMM(count);
3127         do {
3128                 *batch++ = i915_mmio_reg_offset(lri->reg);
3129                 *batch++ = lri->value;
3130         } while (lri++, --count);
3131         *batch++ = MI_NOOP;
3132
3133         return batch;
3134 }
3135
3136 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3137 {
3138         static const struct lri lri[] = {
3139                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3140                 {
3141                         COMMON_SLICE_CHICKEN2,
3142                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3143                                        0),
3144                 },
3145
3146                 /* BSpec: 11391 */
3147                 {
3148                         FF_SLICE_CHICKEN,
3149                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3150                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3151                 },
3152
3153                 /* BSpec: 11299 */
3154                 {
3155                         _3D_CHICKEN3,
3156                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3157                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3158                 }
3159         };
3160
3161         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3162
3163         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3164         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3165
3166         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3167         batch = gen8_emit_pipe_control(batch,
3168                                        PIPE_CONTROL_FLUSH_L3 |
3169                                        PIPE_CONTROL_STORE_DATA_INDEX |
3170                                        PIPE_CONTROL_CS_STALL |
3171                                        PIPE_CONTROL_QW_WRITE,
3172                                        LRC_PPHWSP_SCRATCH_ADDR);
3173
3174         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3175
3176         /* WaMediaPoolStateCmdInWABB:bxt,glk */
3177         if (HAS_POOLED_EU(engine->i915)) {
3178                 /*
3179                  * EU pool configuration is setup along with golden context
3180                  * during context initialization. This value depends on
3181                  * device type (2x6 or 3x6) and needs to be updated based
3182                  * on which subslice is disabled especially for 2x6
3183                  * devices, however it is safe to load default
3184                  * configuration of 3x6 device instead of masking off
3185                  * corresponding bits because HW ignores bits of a disabled
3186                  * subslice and drops down to appropriate config. Please
3187                  * see render_state_setup() in i915_gem_render_state.c for
3188                  * possible configurations, to avoid duplication they are
3189                  * not shown here again.
3190                  */
3191                 *batch++ = GEN9_MEDIA_POOL_STATE;
3192                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3193                 *batch++ = 0x00777000;
3194                 *batch++ = 0;
3195                 *batch++ = 0;
3196                 *batch++ = 0;
3197         }
3198
3199         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3200
3201         /* Pad to end of cacheline */
3202         while ((unsigned long)batch % CACHELINE_BYTES)
3203                 *batch++ = MI_NOOP;
3204
3205         return batch;
3206 }
3207
3208 static u32 *
3209 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3210 {
3211         int i;
3212
3213         /*
3214          * WaPipeControlBefore3DStateSamplePattern: cnl
3215          *
3216          * Ensure the engine is idle prior to programming a
3217          * 3DSTATE_SAMPLE_PATTERN during a context restore.
3218          */
3219         batch = gen8_emit_pipe_control(batch,
3220                                        PIPE_CONTROL_CS_STALL,
3221                                        0);
3222         /*
3223          * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3224          * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3225          * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3226          * confusing. Since gen8_emit_pipe_control() already advances the
3227          * batch by 6 dwords, we advance the other 10 here, completing a
3228          * cacheline. It's not clear if the workaround requires this padding
3229          * before other commands, or if it's just the regular padding we would
3230          * already have for the workaround bb, so leave it here for now.
3231          */
3232         for (i = 0; i < 10; i++)
3233                 *batch++ = MI_NOOP;
3234
3235         /* Pad to end of cacheline */
3236         while ((unsigned long)batch % CACHELINE_BYTES)
3237                 *batch++ = MI_NOOP;
3238
3239         return batch;
3240 }
3241
3242 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3243
3244 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3245 {
3246         struct drm_i915_gem_object *obj;
3247         struct i915_vma *vma;
3248         int err;
3249
3250         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3251         if (IS_ERR(obj))
3252                 return PTR_ERR(obj);
3253
3254         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3255         if (IS_ERR(vma)) {
3256                 err = PTR_ERR(vma);
3257                 goto err;
3258         }
3259
3260         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
3261         if (err)
3262                 goto err;
3263
3264         engine->wa_ctx.vma = vma;
3265         return 0;
3266
3267 err:
3268         i915_gem_object_put(obj);
3269         return err;
3270 }
3271
3272 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3273 {
3274         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3275 }
3276
3277 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3278
3279 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3280 {
3281         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3282         struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3283                                             &wa_ctx->per_ctx };
3284         wa_bb_func_t wa_bb_fn[2];
3285         struct page *page;
3286         void *batch, *batch_ptr;
3287         unsigned int i;
3288         int ret;
3289
3290         if (engine->class != RENDER_CLASS)
3291                 return 0;
3292
3293         switch (INTEL_GEN(engine->i915)) {
3294         case 12:
3295         case 11:
3296                 return 0;
3297         case 10:
3298                 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3299                 wa_bb_fn[1] = NULL;
3300                 break;
3301         case 9:
3302                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3303                 wa_bb_fn[1] = NULL;
3304                 break;
3305         case 8:
3306                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3307                 wa_bb_fn[1] = NULL;
3308                 break;
3309         default:
3310                 MISSING_CASE(INTEL_GEN(engine->i915));
3311                 return 0;
3312         }
3313
3314         ret = lrc_setup_wa_ctx(engine);
3315         if (ret) {
3316                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
3317                 return ret;
3318         }
3319
3320         page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
3321         batch = batch_ptr = kmap_atomic(page);
3322
3323         /*
3324          * Emit the two workaround batch buffers, recording the offset from the
3325          * start of the workaround batch buffer object for each and their
3326          * respective sizes.
3327          */
3328         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3329                 wa_bb[i]->offset = batch_ptr - batch;
3330                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3331                                                   CACHELINE_BYTES))) {
3332                         ret = -EINVAL;
3333                         break;
3334                 }
3335                 if (wa_bb_fn[i])
3336                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3337                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3338         }
3339
3340         BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3341
3342         kunmap_atomic(batch);
3343         if (ret)
3344                 lrc_destroy_wa_ctx(engine);
3345
3346         return ret;
3347 }
3348
3349 static void enable_execlists(struct intel_engine_cs *engine)
3350 {
3351         u32 mode;
3352
3353         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3354
3355         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3356
3357         if (INTEL_GEN(engine->i915) >= 11)
3358                 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
3359         else
3360                 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
3361         ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
3362
3363         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3364
3365         ENGINE_WRITE_FW(engine,
3366                         RING_HWS_PGA,
3367                         i915_ggtt_offset(engine->status_page.vma));
3368         ENGINE_POSTING_READ(engine, RING_HWS_PGA);
3369
3370         engine->context_tag = 0;
3371 }
3372
3373 static bool unexpected_starting_state(struct intel_engine_cs *engine)
3374 {
3375         bool unexpected = false;
3376
3377         if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
3378                 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
3379                 unexpected = true;
3380         }
3381
3382         return unexpected;
3383 }
3384
3385 static int execlists_resume(struct intel_engine_cs *engine)
3386 {
3387         intel_engine_apply_workarounds(engine);
3388         intel_engine_apply_whitelist(engine);
3389
3390         intel_mocs_init_engine(engine);
3391
3392         intel_engine_reset_breadcrumbs(engine);
3393
3394         if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
3395                 struct drm_printer p = drm_debug_printer(__func__);
3396
3397                 intel_engine_dump(engine, &p, NULL);
3398         }
3399
3400         enable_execlists(engine);
3401
3402         return 0;
3403 }
3404
3405 static void execlists_reset_prepare(struct intel_engine_cs *engine)
3406 {
3407         struct intel_engine_execlists * const execlists = &engine->execlists;
3408         unsigned long flags;
3409
3410         ENGINE_TRACE(engine, "depth<-%d\n",
3411                      atomic_read(&execlists->tasklet.count));
3412
3413         /*
3414          * Prevent request submission to the hardware until we have
3415          * completed the reset in i915_gem_reset_finish(). If a request
3416          * is completed by one engine, it may then queue a request
3417          * to a second via its execlists->tasklet *just* as we are
3418          * calling engine->resume() and also writing the ELSP.
3419          * Turning off the execlists->tasklet until the reset is over
3420          * prevents the race.
3421          */
3422         __tasklet_disable_sync_once(&execlists->tasklet);
3423         GEM_BUG_ON(!reset_in_progress(execlists));
3424
3425         /* And flush any current direct submission. */
3426         spin_lock_irqsave(&engine->active.lock, flags);
3427         spin_unlock_irqrestore(&engine->active.lock, flags);
3428
3429         /*
3430          * We stop engines, otherwise we might get failed reset and a
3431          * dead gpu (on elk). Also as modern gpu as kbl can suffer
3432          * from system hang if batchbuffer is progressing when
3433          * the reset is issued, regardless of READY_TO_RESET ack.
3434          * Thus assume it is best to stop engines on all gens
3435          * where we have a gpu reset.
3436          *
3437          * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
3438          *
3439          * FIXME: Wa for more modern gens needs to be validated
3440          */
3441         intel_engine_stop_cs(engine);
3442 }
3443
3444 static void reset_csb_pointers(struct intel_engine_cs *engine)
3445 {
3446         struct intel_engine_execlists * const execlists = &engine->execlists;
3447         const unsigned int reset_value = execlists->csb_size - 1;
3448
3449         ring_set_paused(engine, 0);
3450
3451         /*
3452          * After a reset, the HW starts writing into CSB entry [0]. We
3453          * therefore have to set our HEAD pointer back one entry so that
3454          * the *first* entry we check is entry 0. To complicate this further,
3455          * as we don't wait for the first interrupt after reset, we have to
3456          * fake the HW write to point back to the last entry so that our
3457          * inline comparison of our cached head position against the last HW
3458          * write works even before the first interrupt.
3459          */
3460         execlists->csb_head = reset_value;
3461         WRITE_ONCE(*execlists->csb_write, reset_value);
3462         wmb(); /* Make sure this is visible to HW (paranoia?) */
3463
3464         /*
3465          * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3466          * Bludgeon them with a mmio update to be sure.
3467          */
3468         ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3469                      reset_value << 8 | reset_value);
3470         ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3471
3472         invalidate_csb_entries(&execlists->csb_status[0],
3473                                &execlists->csb_status[reset_value]);
3474 }
3475
3476 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
3477 {
3478         int x;
3479
3480         x = lrc_ring_mi_mode(engine);
3481         if (x != -1) {
3482                 regs[x + 1] &= ~STOP_RING;
3483                 regs[x + 1] |= STOP_RING << 16;
3484         }
3485 }
3486
3487 static void __execlists_reset_reg_state(const struct intel_context *ce,
3488                                         const struct intel_engine_cs *engine)
3489 {
3490         u32 *regs = ce->lrc_reg_state;
3491
3492         __reset_stop_ring(regs, engine);
3493 }
3494
3495 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
3496 {
3497         struct intel_engine_execlists * const execlists = &engine->execlists;
3498         struct intel_context *ce;
3499         struct i915_request *rq;
3500         u32 head;
3501
3502         mb(); /* paranoia: read the CSB pointers from after the reset */
3503         clflush(execlists->csb_write);
3504         mb();
3505
3506         process_csb(engine); /* drain preemption events */
3507
3508         /* Following the reset, we need to reload the CSB read/write pointers */
3509         reset_csb_pointers(engine);
3510
3511         /*
3512          * Save the currently executing context, even if we completed
3513          * its request, it was still running at the time of the
3514          * reset and will have been clobbered.
3515          */
3516         rq = execlists_active(execlists);
3517         if (!rq)
3518                 goto unwind;
3519
3520         /* We still have requests in-flight; the engine should be active */
3521         GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
3522
3523         ce = rq->context;
3524         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3525
3526         if (i915_request_completed(rq)) {
3527                 /* Idle context; tidy up the ring so we can restart afresh */
3528                 head = intel_ring_wrap(ce->ring, rq->tail);
3529                 goto out_replay;
3530         }
3531
3532         /* Context has requests still in-flight; it should not be idle! */
3533         GEM_BUG_ON(i915_active_is_idle(&ce->active));
3534         rq = active_request(ce->timeline, rq);
3535         head = intel_ring_wrap(ce->ring, rq->head);
3536         GEM_BUG_ON(head == ce->ring->tail);
3537
3538         /*
3539          * If this request hasn't started yet, e.g. it is waiting on a
3540          * semaphore, we need to avoid skipping the request or else we
3541          * break the signaling chain. However, if the context is corrupt
3542          * the request will not restart and we will be stuck with a wedged
3543          * device. It is quite often the case that if we issue a reset
3544          * while the GPU is loading the context image, that the context
3545          * image becomes corrupt.
3546          *
3547          * Otherwise, if we have not started yet, the request should replay
3548          * perfectly and we do not need to flag the result as being erroneous.
3549          */
3550         if (!i915_request_started(rq))
3551                 goto out_replay;
3552
3553         /*
3554          * If the request was innocent, we leave the request in the ELSP
3555          * and will try to replay it on restarting. The context image may
3556          * have been corrupted by the reset, in which case we may have
3557          * to service a new GPU hang, but more likely we can continue on
3558          * without impact.
3559          *
3560          * If the request was guilty, we presume the context is corrupt
3561          * and have to at least restore the RING register in the context
3562          * image back to the expected values to skip over the guilty request.
3563          */
3564         __i915_request_reset(rq, stalled);
3565         if (!stalled)
3566                 goto out_replay;
3567
3568         /*
3569          * We want a simple context + ring to execute the breadcrumb update.
3570          * We cannot rely on the context being intact across the GPU hang,
3571          * so clear it and rebuild just what we need for the breadcrumb.
3572          * All pending requests for this context will be zapped, and any
3573          * future request will be after userspace has had the opportunity
3574          * to recreate its own state.
3575          */
3576         GEM_BUG_ON(!intel_context_is_pinned(ce));
3577         restore_default_state(ce, engine);
3578
3579 out_replay:
3580         ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
3581                      head, ce->ring->tail);
3582         __execlists_reset_reg_state(ce, engine);
3583         __execlists_update_reg_state(ce, engine, head);
3584         ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
3585
3586 unwind:
3587         /* Push back any incomplete requests for replay after the reset. */
3588         cancel_port_requests(execlists);
3589         __unwind_incomplete_requests(engine);
3590 }
3591
3592 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
3593 {
3594         unsigned long flags;
3595
3596         ENGINE_TRACE(engine, "\n");
3597
3598         spin_lock_irqsave(&engine->active.lock, flags);
3599
3600         __execlists_reset(engine, stalled);
3601
3602         spin_unlock_irqrestore(&engine->active.lock, flags);
3603 }
3604
3605 static void nop_submission_tasklet(unsigned long data)
3606 {
3607         /* The driver is wedged; don't process any more events. */
3608 }
3609
3610 static void execlists_reset_cancel(struct intel_engine_cs *engine)
3611 {
3612         struct intel_engine_execlists * const execlists = &engine->execlists;
3613         struct i915_request *rq, *rn;
3614         struct rb_node *rb;
3615         unsigned long flags;
3616
3617         ENGINE_TRACE(engine, "\n");
3618
3619         /*
3620          * Before we call engine->cancel_requests(), we should have exclusive
3621          * access to the submission state. This is arranged for us by the
3622          * caller disabling the interrupt generation, the tasklet and other
3623          * threads that may then access the same state, giving us a free hand
3624          * to reset state. However, we still need to let lockdep be aware that
3625          * we know this state may be accessed in hardirq context, so we
3626          * disable the irq around this manipulation and we want to keep
3627          * the spinlock focused on its duties and not accidentally conflate
3628          * coverage to the submission's irq state. (Similarly, although we
3629          * shouldn't need to disable irq around the manipulation of the
3630          * submission's irq state, we also wish to remind ourselves that
3631          * it is irq state.)
3632          */
3633         spin_lock_irqsave(&engine->active.lock, flags);
3634
3635         __execlists_reset(engine, true);
3636
3637         /* Mark all executing requests as skipped. */
3638         list_for_each_entry(rq, &engine->active.requests, sched.link)
3639                 mark_eio(rq);
3640
3641         /* Flush the queued requests to the timeline list (for retiring). */
3642         while ((rb = rb_first_cached(&execlists->queue))) {
3643                 struct i915_priolist *p = to_priolist(rb);
3644                 int i;
3645
3646                 priolist_for_each_request_consume(rq, rn, p, i) {
3647                         mark_eio(rq);
3648                         __i915_request_submit(rq);
3649                 }
3650
3651                 rb_erase_cached(&p->node, &execlists->queue);
3652                 i915_priolist_free(p);
3653         }
3654
3655         /* On-hold requests will be flushed to timeline upon their release */
3656         list_for_each_entry(rq, &engine->active.hold, sched.link)
3657                 mark_eio(rq);
3658
3659         /* Cancel all attached virtual engines */
3660         while ((rb = rb_first_cached(&execlists->virtual))) {
3661                 struct virtual_engine *ve =
3662                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3663
3664                 rb_erase_cached(rb, &execlists->virtual);
3665                 RB_CLEAR_NODE(rb);
3666
3667                 spin_lock(&ve->base.active.lock);
3668                 rq = fetch_and_zero(&ve->request);
3669                 if (rq) {
3670                         mark_eio(rq);
3671
3672                         rq->engine = engine;
3673                         __i915_request_submit(rq);
3674                         i915_request_put(rq);
3675
3676                         ve->base.execlists.queue_priority_hint = INT_MIN;
3677                 }
3678                 spin_unlock(&ve->base.active.lock);
3679         }
3680
3681         /* Remaining _unready_ requests will be nop'ed when submitted */
3682
3683         execlists->queue_priority_hint = INT_MIN;
3684         execlists->queue = RB_ROOT_CACHED;
3685
3686         GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
3687         execlists->tasklet.func = nop_submission_tasklet;
3688
3689         spin_unlock_irqrestore(&engine->active.lock, flags);
3690 }
3691
3692 static void execlists_reset_finish(struct intel_engine_cs *engine)
3693 {
3694         struct intel_engine_execlists * const execlists = &engine->execlists;
3695
3696         /*
3697          * After a GPU reset, we may have requests to replay. Do so now while
3698          * we still have the forcewake to be sure that the GPU is not allowed
3699          * to sleep before we restart and reload a context.
3700          */
3701         GEM_BUG_ON(!reset_in_progress(execlists));
3702         if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
3703                 execlists->tasklet.func(execlists->tasklet.data);
3704
3705         if (__tasklet_enable(&execlists->tasklet))
3706                 /* And kick in case we missed a new request submission. */
3707                 tasklet_hi_schedule(&execlists->tasklet);
3708         ENGINE_TRACE(engine, "depth->%d\n",
3709                      atomic_read(&execlists->tasklet.count));
3710 }
3711
3712 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
3713                                     u64 offset, u32 len,
3714                                     const unsigned int flags)
3715 {
3716         u32 *cs;
3717
3718         cs = intel_ring_begin(rq, 4);
3719         if (IS_ERR(cs))
3720                 return PTR_ERR(cs);
3721
3722         /*
3723          * WaDisableCtxRestoreArbitration:bdw,chv
3724          *
3725          * We don't need to perform MI_ARB_ENABLE as often as we do (in
3726          * particular all the gen that do not need the w/a at all!), if we
3727          * took care to make sure that on every switch into this context
3728          * (both ordinary and for preemption) that arbitrartion was enabled
3729          * we would be fine.  However, for gen8 there is another w/a that
3730          * requires us to not preempt inside GPGPU execution, so we keep
3731          * arbitration disabled for gen8 batches. Arbitration will be
3732          * re-enabled before we close the request
3733          * (engine->emit_fini_breadcrumb).
3734          */
3735         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3736
3737         /* FIXME(BDW+): Address space and security selectors. */
3738         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3739                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3740         *cs++ = lower_32_bits(offset);
3741         *cs++ = upper_32_bits(offset);
3742
3743         intel_ring_advance(rq, cs);
3744
3745         return 0;
3746 }
3747
3748 static int gen8_emit_bb_start(struct i915_request *rq,
3749                               u64 offset, u32 len,
3750                               const unsigned int flags)
3751 {
3752         u32 *cs;
3753
3754         cs = intel_ring_begin(rq, 6);
3755         if (IS_ERR(cs))
3756                 return PTR_ERR(cs);
3757
3758         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3759
3760         *cs++ = MI_BATCH_BUFFER_START_GEN8 |
3761                 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
3762         *cs++ = lower_32_bits(offset);
3763         *cs++ = upper_32_bits(offset);
3764
3765         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3766         *cs++ = MI_NOOP;
3767
3768         intel_ring_advance(rq, cs);
3769
3770         return 0;
3771 }
3772
3773 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3774 {
3775         ENGINE_WRITE(engine, RING_IMR,
3776                      ~(engine->irq_enable_mask | engine->irq_keep_mask));
3777         ENGINE_POSTING_READ(engine, RING_IMR);
3778 }
3779
3780 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3781 {
3782         ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
3783 }
3784
3785 static int gen8_emit_flush(struct i915_request *request, u32 mode)
3786 {
3787         u32 cmd, *cs;
3788
3789         cs = intel_ring_begin(request, 4);
3790         if (IS_ERR(cs))
3791                 return PTR_ERR(cs);
3792
3793         cmd = MI_FLUSH_DW + 1;
3794
3795         /* We always require a command barrier so that subsequent
3796          * commands, such as breadcrumb interrupts, are strictly ordered
3797          * wrt the contents of the write cache being flushed to memory
3798          * (and thus being coherent from the CPU).
3799          */
3800         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
3801
3802         if (mode & EMIT_INVALIDATE) {
3803                 cmd |= MI_INVALIDATE_TLB;
3804                 if (request->engine->class == VIDEO_DECODE_CLASS)
3805                         cmd |= MI_INVALIDATE_BSD;
3806         }
3807
3808         *cs++ = cmd;
3809         *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
3810         *cs++ = 0; /* upper addr */
3811         *cs++ = 0; /* value */
3812         intel_ring_advance(request, cs);
3813
3814         return 0;
3815 }
3816
3817 static int gen8_emit_flush_render(struct i915_request *request,
3818                                   u32 mode)
3819 {
3820         bool vf_flush_wa = false, dc_flush_wa = false;
3821         u32 *cs, flags = 0;
3822         int len;
3823
3824         flags |= PIPE_CONTROL_CS_STALL;
3825
3826         if (mode & EMIT_FLUSH) {
3827                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3828                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3829                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3830                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3831         }
3832
3833         if (mode & EMIT_INVALIDATE) {
3834                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3835                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3836                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3837                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3838                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3839                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3840                 flags |= PIPE_CONTROL_QW_WRITE;
3841                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3842
3843                 /*
3844                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
3845                  * pipe control.
3846                  */
3847                 if (IS_GEN(request->i915, 9))
3848                         vf_flush_wa = true;
3849
3850                 /* WaForGAMHang:kbl */
3851                 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
3852                         dc_flush_wa = true;
3853         }
3854
3855         len = 6;
3856
3857         if (vf_flush_wa)
3858                 len += 6;
3859
3860         if (dc_flush_wa)
3861                 len += 12;
3862
3863         cs = intel_ring_begin(request, len);
3864         if (IS_ERR(cs))
3865                 return PTR_ERR(cs);
3866
3867         if (vf_flush_wa)
3868                 cs = gen8_emit_pipe_control(cs, 0, 0);
3869
3870         if (dc_flush_wa)
3871                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
3872                                             0);
3873
3874         cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3875
3876         if (dc_flush_wa)
3877                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
3878
3879         intel_ring_advance(request, cs);
3880
3881         return 0;
3882 }
3883
3884 static int gen11_emit_flush_render(struct i915_request *request,
3885                                    u32 mode)
3886 {
3887         if (mode & EMIT_FLUSH) {
3888                 u32 *cs;
3889                 u32 flags = 0;
3890
3891                 flags |= PIPE_CONTROL_CS_STALL;
3892
3893                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3894                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3895                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3896                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3897                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3898                 flags |= PIPE_CONTROL_QW_WRITE;
3899                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3900
3901                 cs = intel_ring_begin(request, 6);
3902                 if (IS_ERR(cs))
3903                         return PTR_ERR(cs);
3904
3905                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3906                 intel_ring_advance(request, cs);
3907         }
3908
3909         if (mode & EMIT_INVALIDATE) {
3910                 u32 *cs;
3911                 u32 flags = 0;
3912
3913                 flags |= PIPE_CONTROL_CS_STALL;
3914
3915                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3916                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3917                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3918                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3919                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3920                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3921                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3922                 flags |= PIPE_CONTROL_QW_WRITE;
3923                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3924
3925                 cs = intel_ring_begin(request, 6);
3926                 if (IS_ERR(cs))
3927                         return PTR_ERR(cs);
3928
3929                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3930                 intel_ring_advance(request, cs);
3931         }
3932
3933         return 0;
3934 }
3935
3936 static u32 preparser_disable(bool state)
3937 {
3938         return MI_ARB_CHECK | 1 << 8 | state;
3939 }
3940
3941 static int gen12_emit_flush_render(struct i915_request *request,
3942                                    u32 mode)
3943 {
3944         if (mode & EMIT_FLUSH) {
3945                 u32 flags = 0;
3946                 u32 *cs;
3947
3948                 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
3949                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3950                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3951                 /* Wa_1409600907:tgl */
3952                 flags |= PIPE_CONTROL_DEPTH_STALL;
3953                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
3954                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
3955                 flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
3956
3957                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3958                 flags |= PIPE_CONTROL_QW_WRITE;
3959
3960                 flags |= PIPE_CONTROL_CS_STALL;
3961
3962                 cs = intel_ring_begin(request, 6);
3963                 if (IS_ERR(cs))
3964                         return PTR_ERR(cs);
3965
3966                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
3967                 intel_ring_advance(request, cs);
3968         }
3969
3970         if (mode & EMIT_INVALIDATE) {
3971                 u32 flags = 0;
3972                 u32 *cs;
3973
3974                 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
3975                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
3976                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
3977                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3978                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
3979                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3980                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3981                 flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
3982
3983                 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
3984                 flags |= PIPE_CONTROL_QW_WRITE;
3985
3986                 flags |= PIPE_CONTROL_CS_STALL;
3987
3988                 cs = intel_ring_begin(request, 8);
3989                 if (IS_ERR(cs))
3990                         return PTR_ERR(cs);
3991
3992                 /*
3993                  * Prevent the pre-parser from skipping past the TLB
3994                  * invalidate and loading a stale page for the batch
3995                  * buffer / request payload.
3996                  */
3997                 *cs++ = preparser_disable(true);
3998
3999                 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4000
4001                 *cs++ = preparser_disable(false);
4002                 intel_ring_advance(request, cs);
4003
4004                 /*
4005                  * Wa_1604544889:tgl
4006                  */
4007                 if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
4008                         flags = 0;
4009                         flags |= PIPE_CONTROL_CS_STALL;
4010                         flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
4011
4012                         flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4013                         flags |= PIPE_CONTROL_QW_WRITE;
4014
4015                         cs = intel_ring_begin(request, 6);
4016                         if (IS_ERR(cs))
4017                                 return PTR_ERR(cs);
4018
4019                         cs = gen8_emit_pipe_control(cs, flags,
4020                                                     LRC_PPHWSP_SCRATCH_ADDR);
4021                         intel_ring_advance(request, cs);
4022                 }
4023         }
4024
4025         return 0;
4026 }
4027
4028 /*
4029  * Reserve space for 2 NOOPs at the end of each request to be
4030  * used as a workaround for not being allowed to do lite
4031  * restore with HEAD==TAIL (WaIdleLiteRestore).
4032  */
4033 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4034 {
4035         /* Ensure there's always at least one preemption point per-request. */
4036         *cs++ = MI_ARB_CHECK;
4037         *cs++ = MI_NOOP;
4038         request->wa_tail = intel_ring_offset(request, cs);
4039
4040         return cs;
4041 }
4042
4043 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4044 {
4045         *cs++ = MI_SEMAPHORE_WAIT |
4046                 MI_SEMAPHORE_GLOBAL_GTT |
4047                 MI_SEMAPHORE_POLL |
4048                 MI_SEMAPHORE_SAD_EQ_SDD;
4049         *cs++ = 0;
4050         *cs++ = intel_hws_preempt_address(request->engine);
4051         *cs++ = 0;
4052
4053         return cs;
4054 }
4055
4056 static __always_inline u32*
4057 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
4058                                  u32 *cs)
4059 {
4060         *cs++ = MI_USER_INTERRUPT;
4061
4062         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4063         if (intel_engine_has_semaphores(request->engine))
4064                 cs = emit_preempt_busywait(request, cs);
4065
4066         request->tail = intel_ring_offset(request, cs);
4067         assert_ring_tail_valid(request->ring, request->tail);
4068
4069         return gen8_emit_wa_tail(request, cs);
4070 }
4071
4072 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4073 {
4074         cs = gen8_emit_ggtt_write(cs,
4075                                   request->fence.seqno,
4076                                   i915_request_active_timeline(request)->hwsp_offset,
4077                                   0);
4078
4079         return gen8_emit_fini_breadcrumb_footer(request, cs);
4080 }
4081
4082 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4083 {
4084         cs = gen8_emit_pipe_control(cs,
4085                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4086                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4087                                     PIPE_CONTROL_DC_FLUSH_ENABLE,
4088                                     0);
4089
4090         /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4091         cs = gen8_emit_ggtt_write_rcs(cs,
4092                                       request->fence.seqno,
4093                                       i915_request_active_timeline(request)->hwsp_offset,
4094                                       PIPE_CONTROL_FLUSH_ENABLE |
4095                                       PIPE_CONTROL_CS_STALL);
4096
4097         return gen8_emit_fini_breadcrumb_footer(request, cs);
4098 }
4099
4100 static u32 *
4101 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4102 {
4103         cs = gen8_emit_ggtt_write_rcs(cs,
4104                                       request->fence.seqno,
4105                                       i915_request_active_timeline(request)->hwsp_offset,
4106                                       PIPE_CONTROL_CS_STALL |
4107                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4108                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4109                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4110                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4111                                       PIPE_CONTROL_FLUSH_ENABLE);
4112
4113         return gen8_emit_fini_breadcrumb_footer(request, cs);
4114 }
4115
4116 /*
4117  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4118  * flush and will continue pre-fetching the instructions after it before the
4119  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4120  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4121  * of the next request before the memory has been flushed, we're guaranteed that
4122  * we won't access the batch itself too early.
4123  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4124  * so, if the current request is modifying an instruction in the next request on
4125  * the same intel_context, we might pre-fetch and then execute the pre-update
4126  * instruction. To avoid this, the users of self-modifying code should either
4127  * disable the parser around the code emitting the memory writes, via a new flag
4128  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4129  * the in-kernel use-cases we've opted to use a separate context, see
4130  * reloc_gpu() as an example.
4131  * All the above applies only to the instructions themselves. Non-inline data
4132  * used by the instructions is not pre-fetched.
4133  */
4134
4135 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4136 {
4137         *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4138                 MI_SEMAPHORE_GLOBAL_GTT |
4139                 MI_SEMAPHORE_POLL |
4140                 MI_SEMAPHORE_SAD_EQ_SDD;
4141         *cs++ = 0;
4142         *cs++ = intel_hws_preempt_address(request->engine);
4143         *cs++ = 0;
4144         *cs++ = 0;
4145         *cs++ = MI_NOOP;
4146
4147         return cs;
4148 }
4149
4150 static __always_inline u32*
4151 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
4152 {
4153         *cs++ = MI_USER_INTERRUPT;
4154
4155         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4156         if (intel_engine_has_semaphores(request->engine))
4157                 cs = gen12_emit_preempt_busywait(request, cs);
4158
4159         request->tail = intel_ring_offset(request, cs);
4160         assert_ring_tail_valid(request->ring, request->tail);
4161
4162         return gen8_emit_wa_tail(request, cs);
4163 }
4164
4165 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
4166 {
4167         cs = gen8_emit_ggtt_write(cs,
4168                                   request->fence.seqno,
4169                                   i915_request_active_timeline(request)->hwsp_offset,
4170                                   0);
4171
4172         return gen12_emit_fini_breadcrumb_footer(request, cs);
4173 }
4174
4175 static u32 *
4176 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4177 {
4178         cs = gen8_emit_ggtt_write_rcs(cs,
4179                                       request->fence.seqno,
4180                                       i915_request_active_timeline(request)->hwsp_offset,
4181                                       PIPE_CONTROL_CS_STALL |
4182                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
4183                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4184                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4185                                       /* Wa_1409600907:tgl */
4186                                       PIPE_CONTROL_DEPTH_STALL |
4187                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
4188                                       PIPE_CONTROL_FLUSH_ENABLE |
4189                                       PIPE_CONTROL_HDC_PIPELINE_FLUSH);
4190
4191         return gen12_emit_fini_breadcrumb_footer(request, cs);
4192 }
4193
4194 static void execlists_park(struct intel_engine_cs *engine)
4195 {
4196         cancel_timer(&engine->execlists.timer);
4197         cancel_timer(&engine->execlists.preempt);
4198 }
4199
4200 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4201 {
4202         engine->submit_request = execlists_submit_request;
4203         engine->schedule = i915_schedule;
4204         engine->execlists.tasklet.func = execlists_submission_tasklet;
4205
4206         engine->reset.prepare = execlists_reset_prepare;
4207         engine->reset.rewind = execlists_reset_rewind;
4208         engine->reset.cancel = execlists_reset_cancel;
4209         engine->reset.finish = execlists_reset_finish;
4210
4211         engine->park = execlists_park;
4212         engine->unpark = NULL;
4213
4214         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4215         if (!intel_vgpu_active(engine->i915)) {
4216                 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4217                 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
4218                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4219         }
4220
4221         if (INTEL_GEN(engine->i915) >= 12)
4222                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
4223
4224         if (intel_engine_has_preemption(engine))
4225                 engine->emit_bb_start = gen8_emit_bb_start;
4226         else
4227                 engine->emit_bb_start = gen8_emit_bb_start_noarb;
4228 }
4229
4230 static void execlists_shutdown(struct intel_engine_cs *engine)
4231 {
4232         /* Synchronise with residual timers and any softirq they raise */
4233         del_timer_sync(&engine->execlists.timer);
4234         del_timer_sync(&engine->execlists.preempt);
4235         tasklet_kill(&engine->execlists.tasklet);
4236 }
4237
4238 static void execlists_release(struct intel_engine_cs *engine)
4239 {
4240         execlists_shutdown(engine);
4241
4242         intel_engine_cleanup_common(engine);
4243         lrc_destroy_wa_ctx(engine);
4244 }
4245
4246 static void
4247 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
4248 {
4249         /* Default vfuncs which can be overriden by each engine. */
4250
4251         engine->resume = execlists_resume;
4252
4253         engine->cops = &execlists_context_ops;
4254         engine->request_alloc = execlists_request_alloc;
4255
4256         engine->emit_flush = gen8_emit_flush;
4257         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4258         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
4259         if (INTEL_GEN(engine->i915) >= 12)
4260                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
4261
4262         engine->set_default_submission = intel_execlists_set_default_submission;
4263
4264         if (INTEL_GEN(engine->i915) < 11) {
4265                 engine->irq_enable = gen8_logical_ring_enable_irq;
4266                 engine->irq_disable = gen8_logical_ring_disable_irq;
4267         } else {
4268                 /*
4269                  * TODO: On Gen11 interrupt masks need to be clear
4270                  * to allow C6 entry. Keep interrupts enabled at
4271                  * and take the hit of generating extra interrupts
4272                  * until a more refined solution exists.
4273                  */
4274         }
4275 }
4276
4277 static inline void
4278 logical_ring_default_irqs(struct intel_engine_cs *engine)
4279 {
4280         unsigned int shift = 0;
4281
4282         if (INTEL_GEN(engine->i915) < 11) {
4283                 const u8 irq_shifts[] = {
4284                         [RCS0]  = GEN8_RCS_IRQ_SHIFT,
4285                         [BCS0]  = GEN8_BCS_IRQ_SHIFT,
4286                         [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
4287                         [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
4288                         [VECS0] = GEN8_VECS_IRQ_SHIFT,
4289                 };
4290
4291                 shift = irq_shifts[engine->id];
4292         }
4293
4294         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
4295         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
4296 }
4297
4298 static void rcs_submission_override(struct intel_engine_cs *engine)
4299 {
4300         switch (INTEL_GEN(engine->i915)) {
4301         case 12:
4302                 engine->emit_flush = gen12_emit_flush_render;
4303                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4304                 break;
4305         case 11:
4306                 engine->emit_flush = gen11_emit_flush_render;
4307                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4308                 break;
4309         default:
4310                 engine->emit_flush = gen8_emit_flush_render;
4311                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4312                 break;
4313         }
4314 }
4315
4316 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
4317 {
4318         struct intel_engine_execlists * const execlists = &engine->execlists;
4319         struct drm_i915_private *i915 = engine->i915;
4320         struct intel_uncore *uncore = engine->uncore;
4321         u32 base = engine->mmio_base;
4322
4323         tasklet_init(&engine->execlists.tasklet,
4324                      execlists_submission_tasklet, (unsigned long)engine);
4325         timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
4326         timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
4327
4328         logical_ring_default_vfuncs(engine);
4329         logical_ring_default_irqs(engine);
4330
4331         if (engine->class == RENDER_CLASS)
4332                 rcs_submission_override(engine);
4333
4334         if (intel_init_workaround_bb(engine))
4335                 /*
4336                  * We continue even if we fail to initialize WA batch
4337                  * because we only expect rare glitches but nothing
4338                  * critical to prevent us from using GPU
4339                  */
4340                 DRM_ERROR("WA batch buffer initialization failed\n");
4341
4342         if (HAS_LOGICAL_RING_ELSQ(i915)) {
4343                 execlists->submit_reg = uncore->regs +
4344                         i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
4345                 execlists->ctrl_reg = uncore->regs +
4346                         i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
4347         } else {
4348                 execlists->submit_reg = uncore->regs +
4349                         i915_mmio_reg_offset(RING_ELSP(base));
4350         }
4351
4352         execlists->csb_status =
4353                 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
4354
4355         execlists->csb_write =
4356                 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
4357
4358         if (INTEL_GEN(i915) < 11)
4359                 execlists->csb_size = GEN8_CSB_ENTRIES;
4360         else
4361                 execlists->csb_size = GEN11_CSB_ENTRIES;
4362
4363         reset_csb_pointers(engine);
4364
4365         /* Finally, take ownership and responsibility for cleanup! */
4366         engine->release = execlists_release;
4367
4368         return 0;
4369 }
4370
4371 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
4372 {
4373         u32 indirect_ctx_offset;
4374
4375         switch (INTEL_GEN(engine->i915)) {
4376         default:
4377                 MISSING_CASE(INTEL_GEN(engine->i915));
4378                 /* fall through */
4379         case 12:
4380                 indirect_ctx_offset =
4381                         GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4382                 break;
4383         case 11:
4384                 indirect_ctx_offset =
4385                         GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4386                 break;
4387         case 10:
4388                 indirect_ctx_offset =
4389                         GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4390                 break;
4391         case 9:
4392                 indirect_ctx_offset =
4393                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4394                 break;
4395         case 8:
4396                 indirect_ctx_offset =
4397                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
4398                 break;
4399         }
4400
4401         return indirect_ctx_offset;
4402 }
4403
4404
4405 static void init_common_reg_state(u32 * const regs,
4406                                   const struct intel_engine_cs *engine,
4407                                   const struct intel_ring *ring,
4408                                   bool inhibit)
4409 {
4410         u32 ctl;
4411
4412         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
4413         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
4414         if (inhibit)
4415                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
4416         if (INTEL_GEN(engine->i915) < 11)
4417                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
4418                                            CTX_CTRL_RS_CTX_ENABLE);
4419         regs[CTX_CONTEXT_CONTROL] = ctl;
4420
4421         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
4422 }
4423
4424 static void init_wa_bb_reg_state(u32 * const regs,
4425                                  const struct intel_engine_cs *engine,
4426                                  u32 pos_bb_per_ctx)
4427 {
4428         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
4429
4430         if (wa_ctx->per_ctx.size) {
4431                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4432
4433                 regs[pos_bb_per_ctx] =
4434                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
4435         }
4436
4437         if (wa_ctx->indirect_ctx.size) {
4438                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4439
4440                 regs[pos_bb_per_ctx + 2] =
4441                         (ggtt_offset + wa_ctx->indirect_ctx.offset) |
4442                         (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4443
4444                 regs[pos_bb_per_ctx + 4] =
4445                         intel_lr_indirect_ctx_offset(engine) << 6;
4446         }
4447 }
4448
4449 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
4450 {
4451         if (i915_vm_is_4lvl(&ppgtt->vm)) {
4452                 /* 64b PPGTT (48bit canonical)
4453                  * PDP0_DESCRIPTOR contains the base address to PML4 and
4454                  * other PDP Descriptors are ignored.
4455                  */
4456                 ASSIGN_CTX_PML4(ppgtt, regs);
4457         } else {
4458                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
4459                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
4460                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
4461                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
4462         }
4463 }
4464
4465 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
4466 {
4467         if (i915_is_ggtt(vm))
4468                 return i915_vm_to_ggtt(vm)->alias;
4469         else
4470                 return i915_vm_to_ppgtt(vm);
4471 }
4472
4473 static void execlists_init_reg_state(u32 *regs,
4474                                      const struct intel_context *ce,
4475                                      const struct intel_engine_cs *engine,
4476                                      const struct intel_ring *ring,
4477                                      bool inhibit)
4478 {
4479         /*
4480          * A context is actually a big batch buffer with several
4481          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
4482          * values we are setting here are only for the first context restore:
4483          * on a subsequent save, the GPU will recreate this batchbuffer with new
4484          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
4485          * we are not initializing here).
4486          *
4487          * Must keep consistent with virtual_update_register_offsets().
4488          */
4489         set_offsets(regs, reg_offsets(engine), engine, inhibit);
4490
4491         init_common_reg_state(regs, engine, ring, inhibit);
4492         init_ppgtt_reg_state(regs, vm_alias(ce->vm));
4493
4494         init_wa_bb_reg_state(regs, engine,
4495                              INTEL_GEN(engine->i915) >= 12 ?
4496                              GEN12_CTX_BB_PER_CTX_PTR :
4497                              CTX_BB_PER_CTX_PTR);
4498
4499         __reset_stop_ring(regs, engine);
4500 }
4501
4502 static int
4503 populate_lr_context(struct intel_context *ce,
4504                     struct drm_i915_gem_object *ctx_obj,
4505                     struct intel_engine_cs *engine,
4506                     struct intel_ring *ring)
4507 {
4508         bool inhibit = true;
4509         void *vaddr;
4510         int ret;
4511
4512         vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
4513         if (IS_ERR(vaddr)) {
4514                 ret = PTR_ERR(vaddr);
4515                 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
4516                 return ret;
4517         }
4518
4519         set_redzone(vaddr, engine);
4520
4521         if (engine->default_state) {
4522                 void *defaults;
4523
4524                 defaults = i915_gem_object_pin_map(engine->default_state,
4525                                                    I915_MAP_WB);
4526                 if (IS_ERR(defaults)) {
4527                         ret = PTR_ERR(defaults);
4528                         goto err_unpin_ctx;
4529                 }
4530
4531                 memcpy(vaddr, defaults, engine->context_size);
4532                 i915_gem_object_unpin_map(engine->default_state);
4533                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
4534                 inhibit = false;
4535         }
4536
4537         /* The second page of the context object contains some fields which must
4538          * be set up prior to the first execution. */
4539         execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
4540                                  ce, engine, ring, inhibit);
4541
4542         ret = 0;
4543 err_unpin_ctx:
4544         __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
4545         i915_gem_object_unpin_map(ctx_obj);
4546         return ret;
4547 }
4548
4549 static int __execlists_context_alloc(struct intel_context *ce,
4550                                      struct intel_engine_cs *engine)
4551 {
4552         struct drm_i915_gem_object *ctx_obj;
4553         struct intel_ring *ring;
4554         struct i915_vma *vma;
4555         u32 context_size;
4556         int ret;
4557
4558         GEM_BUG_ON(ce->state);
4559         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
4560
4561         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4562                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
4563
4564         ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
4565         if (IS_ERR(ctx_obj))
4566                 return PTR_ERR(ctx_obj);
4567
4568         vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
4569         if (IS_ERR(vma)) {
4570                 ret = PTR_ERR(vma);
4571                 goto error_deref_obj;
4572         }
4573
4574         if (!ce->timeline) {
4575                 struct intel_timeline *tl;
4576
4577                 tl = intel_timeline_create(engine->gt, NULL);
4578                 if (IS_ERR(tl)) {
4579                         ret = PTR_ERR(tl);
4580                         goto error_deref_obj;
4581                 }
4582
4583                 ce->timeline = tl;
4584         }
4585
4586         ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
4587         if (IS_ERR(ring)) {
4588                 ret = PTR_ERR(ring);
4589                 goto error_deref_obj;
4590         }
4591
4592         ret = populate_lr_context(ce, ctx_obj, engine, ring);
4593         if (ret) {
4594                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
4595                 goto error_ring_free;
4596         }
4597
4598         ce->ring = ring;
4599         ce->state = vma;
4600
4601         return 0;
4602
4603 error_ring_free:
4604         intel_ring_put(ring);
4605 error_deref_obj:
4606         i915_gem_object_put(ctx_obj);
4607         return ret;
4608 }
4609
4610 static struct list_head *virtual_queue(struct virtual_engine *ve)
4611 {
4612         return &ve->base.execlists.default_priolist.requests[0];
4613 }
4614
4615 static void virtual_context_destroy(struct kref *kref)
4616 {
4617         struct virtual_engine *ve =
4618                 container_of(kref, typeof(*ve), context.ref);
4619         unsigned int n;
4620
4621         GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4622         GEM_BUG_ON(ve->request);
4623         GEM_BUG_ON(ve->context.inflight);
4624
4625         for (n = 0; n < ve->num_siblings; n++) {
4626                 struct intel_engine_cs *sibling = ve->siblings[n];
4627                 struct rb_node *node = &ve->nodes[sibling->id].rb;
4628                 unsigned long flags;
4629
4630                 if (RB_EMPTY_NODE(node))
4631                         continue;
4632
4633                 spin_lock_irqsave(&sibling->active.lock, flags);
4634
4635                 /* Detachment is lazily performed in the execlists tasklet */
4636                 if (!RB_EMPTY_NODE(node))
4637                         rb_erase_cached(node, &sibling->execlists.virtual);
4638
4639                 spin_unlock_irqrestore(&sibling->active.lock, flags);
4640         }
4641         GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
4642
4643         if (ve->context.state)
4644                 __execlists_context_fini(&ve->context);
4645         intel_context_fini(&ve->context);
4646
4647         kfree(ve->bonds);
4648         kfree(ve);
4649 }
4650
4651 static void virtual_engine_initial_hint(struct virtual_engine *ve)
4652 {
4653         int swp;
4654
4655         /*
4656          * Pick a random sibling on starting to help spread the load around.
4657          *
4658          * New contexts are typically created with exactly the same order
4659          * of siblings, and often started in batches. Due to the way we iterate
4660          * the array of sibling when submitting requests, sibling[0] is
4661          * prioritised for dequeuing. If we make sure that sibling[0] is fairly
4662          * randomised across the system, we also help spread the load by the
4663          * first engine we inspect being different each time.
4664          *
4665          * NB This does not force us to execute on this engine, it will just
4666          * typically be the first we inspect for submission.
4667          */
4668         swp = prandom_u32_max(ve->num_siblings);
4669         if (!swp)
4670                 return;
4671
4672         swap(ve->siblings[swp], ve->siblings[0]);
4673         if (!intel_engine_has_relative_mmio(ve->siblings[0]))
4674                 virtual_update_register_offsets(ve->context.lrc_reg_state,
4675                                                 ve->siblings[0]);
4676 }
4677
4678 static int virtual_context_alloc(struct intel_context *ce)
4679 {
4680         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4681
4682         return __execlists_context_alloc(ce, ve->siblings[0]);
4683 }
4684
4685 static int virtual_context_pin(struct intel_context *ce)
4686 {
4687         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4688         int err;
4689
4690         /* Note: we must use a real engine class for setting up reg state */
4691         err = __execlists_context_pin(ce, ve->siblings[0]);
4692         if (err)
4693                 return err;
4694
4695         virtual_engine_initial_hint(ve);
4696         return 0;
4697 }
4698
4699 static void virtual_context_enter(struct intel_context *ce)
4700 {
4701         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4702         unsigned int n;
4703
4704         for (n = 0; n < ve->num_siblings; n++)
4705                 intel_engine_pm_get(ve->siblings[n]);
4706
4707         intel_timeline_enter(ce->timeline);
4708 }
4709
4710 static void virtual_context_exit(struct intel_context *ce)
4711 {
4712         struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
4713         unsigned int n;
4714
4715         intel_timeline_exit(ce->timeline);
4716
4717         for (n = 0; n < ve->num_siblings; n++)
4718                 intel_engine_pm_put(ve->siblings[n]);
4719 }
4720
4721 static const struct intel_context_ops virtual_context_ops = {
4722         .alloc = virtual_context_alloc,
4723
4724         .pin = virtual_context_pin,
4725         .unpin = execlists_context_unpin,
4726
4727         .enter = virtual_context_enter,
4728         .exit = virtual_context_exit,
4729
4730         .destroy = virtual_context_destroy,
4731 };
4732
4733 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
4734 {
4735         struct i915_request *rq;
4736         intel_engine_mask_t mask;
4737
4738         rq = READ_ONCE(ve->request);
4739         if (!rq)
4740                 return 0;
4741
4742         /* The rq is ready for submission; rq->execution_mask is now stable. */
4743         mask = rq->execution_mask;
4744         if (unlikely(!mask)) {
4745                 /* Invalid selection, submit to a random engine in error */
4746                 i915_request_skip(rq, -ENODEV);
4747                 mask = ve->siblings[0]->mask;
4748         }
4749
4750         ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
4751                      rq->fence.context, rq->fence.seqno,
4752                      mask, ve->base.execlists.queue_priority_hint);
4753
4754         return mask;
4755 }
4756
4757 static void virtual_submission_tasklet(unsigned long data)
4758 {
4759         struct virtual_engine * const ve = (struct virtual_engine *)data;
4760         const int prio = ve->base.execlists.queue_priority_hint;
4761         intel_engine_mask_t mask;
4762         unsigned int n;
4763
4764         rcu_read_lock();
4765         mask = virtual_submission_mask(ve);
4766         rcu_read_unlock();
4767         if (unlikely(!mask))
4768                 return;
4769
4770         local_irq_disable();
4771         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
4772                 struct intel_engine_cs *sibling = ve->siblings[n];
4773                 struct ve_node * const node = &ve->nodes[sibling->id];
4774                 struct rb_node **parent, *rb;
4775                 bool first;
4776
4777                 if (unlikely(!(mask & sibling->mask))) {
4778                         if (!RB_EMPTY_NODE(&node->rb)) {
4779                                 spin_lock(&sibling->active.lock);
4780                                 rb_erase_cached(&node->rb,
4781                                                 &sibling->execlists.virtual);
4782                                 RB_CLEAR_NODE(&node->rb);
4783                                 spin_unlock(&sibling->active.lock);
4784                         }
4785                         continue;
4786                 }
4787
4788                 spin_lock(&sibling->active.lock);
4789
4790                 if (!RB_EMPTY_NODE(&node->rb)) {
4791                         /*
4792                          * Cheat and avoid rebalancing the tree if we can
4793                          * reuse this node in situ.
4794                          */
4795                         first = rb_first_cached(&sibling->execlists.virtual) ==
4796                                 &node->rb;
4797                         if (prio == node->prio || (prio > node->prio && first))
4798                                 goto submit_engine;
4799
4800                         rb_erase_cached(&node->rb, &sibling->execlists.virtual);
4801                 }
4802
4803                 rb = NULL;
4804                 first = true;
4805                 parent = &sibling->execlists.virtual.rb_root.rb_node;
4806                 while (*parent) {
4807                         struct ve_node *other;
4808
4809                         rb = *parent;
4810                         other = rb_entry(rb, typeof(*other), rb);
4811                         if (prio > other->prio) {
4812                                 parent = &rb->rb_left;
4813                         } else {
4814                                 parent = &rb->rb_right;
4815                                 first = false;
4816                         }
4817                 }
4818
4819                 rb_link_node(&node->rb, rb, parent);
4820                 rb_insert_color_cached(&node->rb,
4821                                        &sibling->execlists.virtual,
4822                                        first);
4823
4824 submit_engine:
4825                 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
4826                 node->prio = prio;
4827                 if (first && prio > sibling->execlists.queue_priority_hint) {
4828                         sibling->execlists.queue_priority_hint = prio;
4829                         tasklet_hi_schedule(&sibling->execlists.tasklet);
4830                 }
4831
4832                 spin_unlock(&sibling->active.lock);
4833         }
4834         local_irq_enable();
4835 }
4836
4837 static void virtual_submit_request(struct i915_request *rq)
4838 {
4839         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4840         struct i915_request *old;
4841         unsigned long flags;
4842
4843         ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
4844                      rq->fence.context,
4845                      rq->fence.seqno);
4846
4847         GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
4848
4849         spin_lock_irqsave(&ve->base.active.lock, flags);
4850
4851         old = ve->request;
4852         if (old) { /* background completion event from preempt-to-busy */
4853                 GEM_BUG_ON(!i915_request_completed(old));
4854                 __i915_request_submit(old);
4855                 i915_request_put(old);
4856         }
4857
4858         if (i915_request_completed(rq)) {
4859                 __i915_request_submit(rq);
4860
4861                 ve->base.execlists.queue_priority_hint = INT_MIN;
4862                 ve->request = NULL;
4863         } else {
4864                 ve->base.execlists.queue_priority_hint = rq_prio(rq);
4865                 ve->request = i915_request_get(rq);
4866
4867                 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
4868                 list_move_tail(&rq->sched.link, virtual_queue(ve));
4869
4870                 tasklet_schedule(&ve->base.execlists.tasklet);
4871         }
4872
4873         spin_unlock_irqrestore(&ve->base.active.lock, flags);
4874 }
4875
4876 static struct ve_bond *
4877 virtual_find_bond(struct virtual_engine *ve,
4878                   const struct intel_engine_cs *master)
4879 {
4880         int i;
4881
4882         for (i = 0; i < ve->num_bonds; i++) {
4883                 if (ve->bonds[i].master == master)
4884                         return &ve->bonds[i];
4885         }
4886
4887         return NULL;
4888 }
4889
4890 static void
4891 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
4892 {
4893         struct virtual_engine *ve = to_virtual_engine(rq->engine);
4894         intel_engine_mask_t allowed, exec;
4895         struct ve_bond *bond;
4896
4897         allowed = ~to_request(signal)->engine->mask;
4898
4899         bond = virtual_find_bond(ve, to_request(signal)->engine);
4900         if (bond)
4901                 allowed &= bond->sibling_mask;
4902
4903         /* Restrict the bonded request to run on only the available engines */
4904         exec = READ_ONCE(rq->execution_mask);
4905         while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
4906                 ;
4907
4908         /* Prevent the master from being re-run on the bonded engines */
4909         to_request(signal)->execution_mask &= ~allowed;
4910 }
4911
4912 struct intel_context *
4913 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
4914                                unsigned int count)
4915 {
4916         struct virtual_engine *ve;
4917         unsigned int n;
4918         int err;
4919
4920         if (count == 0)
4921                 return ERR_PTR(-EINVAL);
4922
4923         if (count == 1)
4924                 return intel_context_create(siblings[0]);
4925
4926         ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
4927         if (!ve)
4928                 return ERR_PTR(-ENOMEM);
4929
4930         ve->base.i915 = siblings[0]->i915;
4931         ve->base.gt = siblings[0]->gt;
4932         ve->base.uncore = siblings[0]->uncore;
4933         ve->base.id = -1;
4934
4935         ve->base.class = OTHER_CLASS;
4936         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4937         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4938         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4939
4940         /*
4941          * The decision on whether to submit a request using semaphores
4942          * depends on the saturated state of the engine. We only compute
4943          * this during HW submission of the request, and we need for this
4944          * state to be globally applied to all requests being submitted
4945          * to this engine. Virtual engines encompass more than one physical
4946          * engine and so we cannot accurately tell in advance if one of those
4947          * engines is already saturated and so cannot afford to use a semaphore
4948          * and be pessimized in priority for doing so -- if we are the only
4949          * context using semaphores after all other clients have stopped, we
4950          * will be starved on the saturated system. Such a global switch for
4951          * semaphores is less than ideal, but alas is the current compromise.
4952          */
4953         ve->base.saturated = ALL_ENGINES;
4954
4955         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4956
4957         intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
4958         intel_engine_init_breadcrumbs(&ve->base);
4959         intel_engine_init_execlists(&ve->base);
4960
4961         ve->base.cops = &virtual_context_ops;
4962         ve->base.request_alloc = execlists_request_alloc;
4963
4964         ve->base.schedule = i915_schedule;
4965         ve->base.submit_request = virtual_submit_request;
4966         ve->base.bond_execute = virtual_bond_execute;
4967
4968         INIT_LIST_HEAD(virtual_queue(ve));
4969         ve->base.execlists.queue_priority_hint = INT_MIN;
4970         tasklet_init(&ve->base.execlists.tasklet,
4971                      virtual_submission_tasklet,
4972                      (unsigned long)ve);
4973
4974         intel_context_init(&ve->context, &ve->base);
4975
4976         for (n = 0; n < count; n++) {
4977                 struct intel_engine_cs *sibling = siblings[n];
4978
4979                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
4980                 if (sibling->mask & ve->base.mask) {
4981                         DRM_DEBUG("duplicate %s entry in load balancer\n",
4982                                   sibling->name);
4983                         err = -EINVAL;
4984                         goto err_put;
4985                 }
4986
4987                 /*
4988                  * The virtual engine implementation is tightly coupled to
4989                  * the execlists backend -- we push out request directly
4990                  * into a tree inside each physical engine. We could support
4991                  * layering if we handle cloning of the requests and
4992                  * submitting a copy into each backend.
4993                  */
4994                 if (sibling->execlists.tasklet.func !=
4995                     execlists_submission_tasklet) {
4996                         err = -ENODEV;
4997                         goto err_put;
4998                 }
4999
5000                 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5001                 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5002
5003                 ve->siblings[ve->num_siblings++] = sibling;
5004                 ve->base.mask |= sibling->mask;
5005
5006                 /*
5007                  * All physical engines must be compatible for their emission
5008                  * functions (as we build the instructions during request
5009                  * construction and do not alter them before submission
5010                  * on the physical engine). We use the engine class as a guide
5011                  * here, although that could be refined.
5012                  */
5013                 if (ve->base.class != OTHER_CLASS) {
5014                         if (ve->base.class != sibling->class) {
5015                                 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5016                                           sibling->class, ve->base.class);
5017                                 err = -EINVAL;
5018                                 goto err_put;
5019                         }
5020                         continue;
5021                 }
5022
5023                 ve->base.class = sibling->class;
5024                 ve->base.uabi_class = sibling->uabi_class;
5025                 snprintf(ve->base.name, sizeof(ve->base.name),
5026                          "v%dx%d", ve->base.class, count);
5027                 ve->base.context_size = sibling->context_size;
5028
5029                 ve->base.emit_bb_start = sibling->emit_bb_start;
5030                 ve->base.emit_flush = sibling->emit_flush;
5031                 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5032                 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5033                 ve->base.emit_fini_breadcrumb_dw =
5034                         sibling->emit_fini_breadcrumb_dw;
5035
5036                 ve->base.flags = sibling->flags;
5037         }
5038
5039         ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5040
5041         return &ve->context;
5042
5043 err_put:
5044         intel_context_put(&ve->context);
5045         return ERR_PTR(err);
5046 }
5047
5048 struct intel_context *
5049 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5050 {
5051         struct virtual_engine *se = to_virtual_engine(src);
5052         struct intel_context *dst;
5053
5054         dst = intel_execlists_create_virtual(se->siblings,
5055                                              se->num_siblings);
5056         if (IS_ERR(dst))
5057                 return dst;
5058
5059         if (se->num_bonds) {
5060                 struct virtual_engine *de = to_virtual_engine(dst->engine);
5061
5062                 de->bonds = kmemdup(se->bonds,
5063                                     sizeof(*se->bonds) * se->num_bonds,
5064                                     GFP_KERNEL);
5065                 if (!de->bonds) {
5066                         intel_context_put(dst);
5067                         return ERR_PTR(-ENOMEM);
5068                 }
5069
5070                 de->num_bonds = se->num_bonds;
5071         }
5072
5073         return dst;
5074 }
5075
5076 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5077                                      const struct intel_engine_cs *master,
5078                                      const struct intel_engine_cs *sibling)
5079 {
5080         struct virtual_engine *ve = to_virtual_engine(engine);
5081         struct ve_bond *bond;
5082         int n;
5083
5084         /* Sanity check the sibling is part of the virtual engine */
5085         for (n = 0; n < ve->num_siblings; n++)
5086                 if (sibling == ve->siblings[n])
5087                         break;
5088         if (n == ve->num_siblings)
5089                 return -EINVAL;
5090
5091         bond = virtual_find_bond(ve, master);
5092         if (bond) {
5093                 bond->sibling_mask |= sibling->mask;
5094                 return 0;
5095         }
5096
5097         bond = krealloc(ve->bonds,
5098                         sizeof(*bond) * (ve->num_bonds + 1),
5099                         GFP_KERNEL);
5100         if (!bond)
5101                 return -ENOMEM;
5102
5103         bond[ve->num_bonds].master = master;
5104         bond[ve->num_bonds].sibling_mask = sibling->mask;
5105
5106         ve->bonds = bond;
5107         ve->num_bonds++;
5108
5109         return 0;
5110 }
5111
5112 struct intel_engine_cs *
5113 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5114                                  unsigned int sibling)
5115 {
5116         struct virtual_engine *ve = to_virtual_engine(engine);
5117
5118         if (sibling >= ve->num_siblings)
5119                 return NULL;
5120
5121         return ve->siblings[sibling];
5122 }
5123
5124 void intel_execlists_show_requests(struct intel_engine_cs *engine,
5125                                    struct drm_printer *m,
5126                                    void (*show_request)(struct drm_printer *m,
5127                                                         struct i915_request *rq,
5128                                                         const char *prefix),
5129                                    unsigned int max)
5130 {
5131         const struct intel_engine_execlists *execlists = &engine->execlists;
5132         struct i915_request *rq, *last;
5133         unsigned long flags;
5134         unsigned int count;
5135         struct rb_node *rb;
5136
5137         spin_lock_irqsave(&engine->active.lock, flags);
5138
5139         last = NULL;
5140         count = 0;
5141         list_for_each_entry(rq, &engine->active.requests, sched.link) {
5142                 if (count++ < max - 1)
5143                         show_request(m, rq, "\t\tE ");
5144                 else
5145                         last = rq;
5146         }
5147         if (last) {
5148                 if (count > max) {
5149                         drm_printf(m,
5150                                    "\t\t...skipping %d executing requests...\n",
5151                                    count - max);
5152                 }
5153                 show_request(m, last, "\t\tE ");
5154         }
5155
5156         last = NULL;
5157         count = 0;
5158         if (execlists->queue_priority_hint != INT_MIN)
5159                 drm_printf(m, "\t\tQueue priority hint: %d\n",
5160                            execlists->queue_priority_hint);
5161         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5162                 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5163                 int i;
5164
5165                 priolist_for_each_request(rq, p, i) {
5166                         if (count++ < max - 1)
5167                                 show_request(m, rq, "\t\tQ ");
5168                         else
5169                                 last = rq;
5170                 }
5171         }
5172         if (last) {
5173                 if (count > max) {
5174                         drm_printf(m,
5175                                    "\t\t...skipping %d queued requests...\n",
5176                                    count - max);
5177                 }
5178                 show_request(m, last, "\t\tQ ");
5179         }
5180
5181         last = NULL;
5182         count = 0;
5183         for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5184                 struct virtual_engine *ve =
5185                         rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5186                 struct i915_request *rq = READ_ONCE(ve->request);
5187
5188                 if (rq) {
5189                         if (count++ < max - 1)
5190                                 show_request(m, rq, "\t\tV ");
5191                         else
5192                                 last = rq;
5193                 }
5194         }
5195         if (last) {
5196                 if (count > max) {
5197                         drm_printf(m,
5198                                    "\t\t...skipping %d virtual requests...\n",
5199                                    count - max);
5200                 }
5201                 show_request(m, last, "\t\tV ");
5202         }
5203
5204         spin_unlock_irqrestore(&engine->active.lock, flags);
5205 }
5206
5207 void intel_lr_context_reset(struct intel_engine_cs *engine,
5208                             struct intel_context *ce,
5209                             u32 head,
5210                             bool scrub)
5211 {
5212         GEM_BUG_ON(!intel_context_is_pinned(ce));
5213
5214         /*
5215          * We want a simple context + ring to execute the breadcrumb update.
5216          * We cannot rely on the context being intact across the GPU hang,
5217          * so clear it and rebuild just what we need for the breadcrumb.
5218          * All pending requests for this context will be zapped, and any
5219          * future request will be after userspace has had the opportunity
5220          * to recreate its own state.
5221          */
5222         if (scrub)
5223                 restore_default_state(ce, engine);
5224
5225         /* Rerun the request; its payload has been neutered (if guilty). */
5226         __execlists_update_reg_state(ce, engine, head);
5227 }
5228
5229 bool
5230 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
5231 {
5232         return engine->set_default_submission ==
5233                intel_execlists_set_default_submission;
5234 }
5235
5236 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5237 #include "selftest_lrc.c"
5238 #endif