drivers/gpu/drm/i915/gt/selftest_lrc.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "gem/i915_gem_pm.h"
  10 #include "gt/intel_engine_heartbeat.h"
  11 #include "gt/intel_reset.h"
  12
  13 #include "i915_selftest.h"
  14 #include "selftests/i915_random.h"
  15 #include "selftests/igt_flush_test.h"
  16 #include "selftests/igt_live_test.h"
  17 #include "selftests/igt_spinner.h"
  18 #include "selftests/lib_sw_fence.h"
  19
  20 #include "gem/selftests/igt_gem_utils.h"
  21 #include "gem/selftests/mock_context.h"
  22
  23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
  25
  26 static struct i915_vma *create_scratch(struct intel_gt *gt)
  27 {
  28         struct drm_i915_gem_object *obj;
  29         struct i915_vma *vma;
  30         int err;
  31
  32         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
  33         if (IS_ERR(obj))
  34                 return ERR_CAST(obj);
  35
  36         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
  37
  38         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  39         if (IS_ERR(vma)) {
  40                 i915_gem_object_put(obj);
  41                 return vma;
  42         }
  43
  44         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
  45         if (err) {
  46                 i915_gem_object_put(obj);
  47                 return ERR_PTR(err);
  48         }
  49
  50         return vma;
  51 }
  52
  53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
  54                                      unsigned long *saved)
  55 {
  56         *saved = engine->props.heartbeat_interval_ms;
  57         engine->props.heartbeat_interval_ms = 0;
  58
  59         intel_engine_pm_get(engine);
  60         intel_engine_park_heartbeat(engine);
  61 }
  62
  63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
  64                                     unsigned long saved)
  65 {
  66         intel_engine_pm_put(engine);
  67
  68         engine->props.heartbeat_interval_ms = saved;
  69 }
  70
  71 static int live_sanitycheck(void *arg)
  72 {
  73         struct intel_gt *gt = arg;
  74         struct intel_engine_cs *engine;
  75         enum intel_engine_id id;
  76         struct igt_spinner spin;
  77         int err = 0;
  78
  79         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
  80                 return 0;
  81
  82         if (igt_spinner_init(&spin, gt))
  83                 return -ENOMEM;
  84
  85         for_each_engine(engine, gt, id) {
  86                 struct intel_context *ce;
  87                 struct i915_request *rq;
  88
  89                 ce = intel_context_create(engine);
  90                 if (IS_ERR(ce)) {
  91                         err = PTR_ERR(ce);
  92                         break;
  93                 }
  94
  95                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
  96                 if (IS_ERR(rq)) {
  97                         err = PTR_ERR(rq);
  98                         goto out_ctx;
  99                 }
 100
 101                 i915_request_add(rq);
 102                 if (!igt_wait_for_spinner(&spin, rq)) {
 103                         GEM_TRACE("spinner failed to start\n");
 104                         GEM_TRACE_DUMP();
 105                         intel_gt_set_wedged(gt);
 106                         err = -EIO;
 107                         goto out_ctx;
 108                 }
 109
 110                 igt_spinner_end(&spin);
 111                 if (igt_flush_test(gt->i915)) {
 112                         err = -EIO;
 113                         goto out_ctx;
 114                 }
 115
 116 out_ctx:
 117                 intel_context_put(ce);
 118                 if (err)
 119                         break;
 120         }
 121
 122         igt_spinner_fini(&spin);
 123         return err;
 124 }
 125
 126 static int live_unlite_restore(struct intel_gt *gt, int prio)
 127 {
 128         struct intel_engine_cs *engine;
 129         enum intel_engine_id id;
 130         struct igt_spinner spin;
 131         int err = -ENOMEM;
 132
 133         /*
 134          * Check that we can correctly context switch between 2 instances
 135          * on the same engine from the same parent context.
 136          */
 137
 138         if (igt_spinner_init(&spin, gt))
 139                 return err;
 140
 141         err = 0;
 142         for_each_engine(engine, gt, id) {
 143                 struct intel_context *ce[2] = {};
 144                 struct i915_request *rq[2];
 145                 struct igt_live_test t;
 146                 unsigned long saved;
 147                 int n;
 148
 149                 if (prio && !intel_engine_has_preemption(engine))
 150                         continue;
 151
 152                 if (!intel_engine_can_store_dword(engine))
 153                         continue;
 154
 155                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 156                         err = -EIO;
 157                         break;
 158                 }
 159                 engine_heartbeat_disable(engine, &saved);
 160
 161                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 162                         struct intel_context *tmp;
 163
 164                         tmp = intel_context_create(engine);
 165                         if (IS_ERR(tmp)) {
 166                                 err = PTR_ERR(tmp);
 167                                 goto err_ce;
 168                         }
 169
 170                         err = intel_context_pin(tmp);
 171                         if (err) {
 172                                 intel_context_put(tmp);
 173                                 goto err_ce;
 174                         }
 175
 176                         /*
 177                          * Setup the pair of contexts such that if we
 178                          * lite-restore using the RING_TAIL from ce[1] it
 179                          * will execute garbage from ce[0]->ring.
 180                          */
 181                         memset(tmp->ring->vaddr,
 182                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 183                                tmp->ring->vma->size);
 184
 185                         ce[n] = tmp;
 186                 }
 187                 GEM_BUG_ON(!ce[1]->ring->size);
 188                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 189                 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
 190
 191                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 192                 if (IS_ERR(rq[0])) {
 193                         err = PTR_ERR(rq[0]);
 194                         goto err_ce;
 195                 }
 196
 197                 i915_request_get(rq[0]);
 198                 i915_request_add(rq[0]);
 199                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 200
 201                 if (!igt_wait_for_spinner(&spin, rq[0])) {
 202                         i915_request_put(rq[0]);
 203                         goto err_ce;
 204                 }
 205
 206                 rq[1] = i915_request_create(ce[1]);
 207                 if (IS_ERR(rq[1])) {
 208                         err = PTR_ERR(rq[1]);
 209                         i915_request_put(rq[0]);
 210                         goto err_ce;
 211                 }
 212
 213                 if (!prio) {
 214                         /*
 215                          * Ensure we do the switch to ce[1] on completion.
 216                          *
 217                          * rq[0] is already submitted, so this should reduce
 218                          * to a no-op (a wait on a request on the same engine
 219                          * uses the submit fence, not the completion fence),
 220                          * but it will install a dependency on rq[1] for rq[0]
 221                          * that will prevent the pair being reordered by
 222                          * timeslicing.
 223                          */
 224                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 225                 }
 226
 227                 i915_request_get(rq[1]);
 228                 i915_request_add(rq[1]);
 229                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 230                 i915_request_put(rq[0]);
 231
 232                 if (prio) {
 233                         struct i915_sched_attr attr = {
 234                                 .priority = prio,
 235                         };
 236
 237                         /* Alternatively preempt the spinner with ce[1] */
 238                         engine->schedule(rq[1], &attr);
 239                 }
 240
 241                 /* And switch back to ce[0] for good measure */
 242                 rq[0] = i915_request_create(ce[0]);
 243                 if (IS_ERR(rq[0])) {
 244                         err = PTR_ERR(rq[0]);
 245                         i915_request_put(rq[1]);
 246                         goto err_ce;
 247                 }
 248
 249                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 250                 i915_request_get(rq[0]);
 251                 i915_request_add(rq[0]);
 252                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 253                 i915_request_put(rq[1]);
 254                 i915_request_put(rq[0]);
 255
 256 err_ce:
 257                 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
 258                 igt_spinner_end(&spin);
 259                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
 260                         if (IS_ERR_OR_NULL(ce[n]))
 261                                 break;
 262
 263                         intel_context_unpin(ce[n]);
 264                         intel_context_put(ce[n]);
 265                 }
 266
 267                 engine_heartbeat_enable(engine, saved);
 268                 if (igt_live_test_end(&t))
 269                         err = -EIO;
 270                 if (err)
 271                         break;
 272         }
 273
 274         igt_spinner_fini(&spin);
 275         return err;
 276 }
 277
 278 static int live_unlite_switch(void *arg)
 279 {
 280         return live_unlite_restore(arg, 0);
 281 }
 282
 283 static int live_unlite_preempt(void *arg)
 284 {
 285         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 286 }
 287
 288 static int live_hold_reset(void *arg)
 289 {
 290         struct intel_gt *gt = arg;
 291         struct intel_engine_cs *engine;
 292         enum intel_engine_id id;
 293         struct igt_spinner spin;
 294         int err = 0;
 295
 296         /*
 297          * In order to support offline error capture for fast preempt reset,
 298          * we need to decouple the guilty request and ensure that it and its
 299          * descendents are not executed while the capture is in progress.
 300          */
 301
 302         if (!intel_has_reset_engine(gt))
 303                 return 0;
 304
 305         if (igt_spinner_init(&spin, gt))
 306                 return -ENOMEM;
 307
 308         for_each_engine(engine, gt, id) {
 309                 struct intel_context *ce;
 310                 unsigned long heartbeat;
 311                 struct i915_request *rq;
 312
 313                 ce = intel_context_create(engine);
 314                 if (IS_ERR(ce)) {
 315                         err = PTR_ERR(ce);
 316                         break;
 317                 }
 318
 319                 engine_heartbeat_disable(engine, &heartbeat);
 320
 321                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 322                 if (IS_ERR(rq)) {
 323                         err = PTR_ERR(rq);
 324                         goto out;
 325                 }
 326                 i915_request_add(rq);
 327
 328                 if (!igt_wait_for_spinner(&spin, rq)) {
 329                         intel_gt_set_wedged(gt);
 330                         err = -ETIME;
 331                         goto out;
 332                 }
 333
 334                 /* We have our request executing, now remove it and reset */
 335
 336                 if (test_and_set_bit(I915_RESET_ENGINE + id,
 337                                      &gt->reset.flags)) {
 338                         intel_gt_set_wedged(gt);
 339                         err = -EBUSY;
 340                         goto out;
 341                 }
 342                 tasklet_disable(&engine->execlists.tasklet);
 343
 344                 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
 345                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 346
 347                 i915_request_get(rq);
 348                 execlists_hold(engine, rq);
 349                 GEM_BUG_ON(!i915_request_on_hold(rq));
 350
 351                 intel_engine_reset(engine, NULL);
 352                 GEM_BUG_ON(rq->fence.error != -EIO);
 353
 354                 tasklet_enable(&engine->execlists.tasklet);
 355                 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
 356                                       &gt->reset.flags);
 357
 358                 /* Check that we do not resubmit the held request */
 359                 if (!i915_request_wait(rq, 0, HZ / 5)) {
 360                         pr_err("%s: on hold request completed!\n",
 361                                engine->name);
 362                         i915_request_put(rq);
 363                         err = -EIO;
 364                         goto out;
 365                 }
 366                 GEM_BUG_ON(!i915_request_on_hold(rq));
 367
 368                 /* But is resubmitted on release */
 369                 execlists_unhold(engine, rq);
 370                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 371                         pr_err("%s: held request did not complete!\n",
 372                                engine->name);
 373                         intel_gt_set_wedged(gt);
 374                         err = -ETIME;
 375                 }
 376                 i915_request_put(rq);
 377
 378 out:
 379                 engine_heartbeat_enable(engine, heartbeat);
 380                 intel_context_put(ce);
 381                 if (err)
 382                         break;
 383         }
 384
 385         igt_spinner_fini(&spin);
 386         return err;
 387 }
 388
 389 static int
 390 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 391 {
 392         u32 *cs;
 393
 394         cs = intel_ring_begin(rq, 10);
 395         if (IS_ERR(cs))
 396                 return PTR_ERR(cs);
 397
 398         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 399
 400         *cs++ = MI_SEMAPHORE_WAIT |
 401                 MI_SEMAPHORE_GLOBAL_GTT |
 402                 MI_SEMAPHORE_POLL |
 403                 MI_SEMAPHORE_SAD_NEQ_SDD;
 404         *cs++ = 0;
 405         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
 406         *cs++ = 0;
 407
 408         if (idx > 0) {
 409                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 410                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 411                 *cs++ = 0;
 412                 *cs++ = 1;
 413         } else {
 414                 *cs++ = MI_NOOP;
 415                 *cs++ = MI_NOOP;
 416                 *cs++ = MI_NOOP;
 417                 *cs++ = MI_NOOP;
 418         }
 419
 420         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 421
 422         intel_ring_advance(rq, cs);
 423         return 0;
 424 }
 425
 426 static struct i915_request *
 427 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 428 {
 429         struct intel_context *ce;
 430         struct i915_request *rq;
 431         int err;
 432
 433         ce = intel_context_create(engine);
 434         if (IS_ERR(ce))
 435                 return ERR_CAST(ce);
 436
 437         rq = intel_context_create_request(ce);
 438         if (IS_ERR(rq))
 439                 goto out_ce;
 440
 441         err = 0;
 442         if (rq->engine->emit_init_breadcrumb)
 443                 err = rq->engine->emit_init_breadcrumb(rq);
 444         if (err == 0)
 445                 err = emit_semaphore_chain(rq, vma, idx);
 446         if (err == 0)
 447                 i915_request_get(rq);
 448         i915_request_add(rq);
 449         if (err)
 450                 rq = ERR_PTR(err);
 451
 452 out_ce:
 453         intel_context_put(ce);
 454         return rq;
 455 }
 456
 457 static int
 458 release_queue(struct intel_engine_cs *engine,
 459               struct i915_vma *vma,
 460               int idx, int prio)
 461 {
 462         struct i915_sched_attr attr = {
 463                 .priority = prio,
 464         };
 465         struct i915_request *rq;
 466         u32 *cs;
 467
 468         rq = intel_engine_create_kernel_request(engine);
 469         if (IS_ERR(rq))
 470                 return PTR_ERR(rq);
 471
 472         cs = intel_ring_begin(rq, 4);
 473         if (IS_ERR(cs)) {
 474                 i915_request_add(rq);
 475                 return PTR_ERR(cs);
 476         }
 477
 478         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 479         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 480         *cs++ = 0;
 481         *cs++ = 1;
 482
 483         intel_ring_advance(rq, cs);
 484
 485         i915_request_get(rq);
 486         i915_request_add(rq);
 487
 488         local_bh_disable();
 489         engine->schedule(rq, &attr);
 490         local_bh_enable(); /* kick tasklet */
 491
 492         i915_request_put(rq);
 493
 494         return 0;
 495 }
 496
 497 static int
 498 slice_semaphore_queue(struct intel_engine_cs *outer,
 499                       struct i915_vma *vma,
 500                       int count)
 501 {
 502         struct intel_engine_cs *engine;
 503         struct i915_request *head;
 504         enum intel_engine_id id;
 505         int err, i, n = 0;
 506
 507         head = semaphore_queue(outer, vma, n++);
 508         if (IS_ERR(head))
 509                 return PTR_ERR(head);
 510
 511         for_each_engine(engine, outer->gt, id) {
 512                 for (i = 0; i < count; i++) {
 513                         struct i915_request *rq;
 514
 515                         rq = semaphore_queue(engine, vma, n++);
 516                         if (IS_ERR(rq)) {
 517                                 err = PTR_ERR(rq);
 518                                 goto out;
 519                         }
 520
 521                         i915_request_put(rq);
 522                 }
 523         }
 524
 525         err = release_queue(outer, vma, n, INT_MAX);
 526         if (err)
 527                 goto out;
 528
 529         if (i915_request_wait(head, 0,
 530                               2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 531                 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 532                        count, n);
 533                 GEM_TRACE_DUMP();
 534                 intel_gt_set_wedged(outer->gt);
 535                 err = -EIO;
 536         }
 537
 538 out:
 539         i915_request_put(head);
 540         return err;
 541 }
 542
 543 static int live_timeslice_preempt(void *arg)
 544 {
 545         struct intel_gt *gt = arg;
 546         struct drm_i915_gem_object *obj;
 547         struct i915_vma *vma;
 548         void *vaddr;
 549         int err = 0;
 550         int count;
 551
 552         /*
 553          * If a request takes too long, we would like to give other users
 554          * a fair go on the GPU. In particular, users may create batches
 555          * that wait upon external input, where that input may even be
 556          * supplied by another GPU job. To avoid blocking forever, we
 557          * need to preempt the current task and replace it with another
 558          * ready task.
 559          */
 560         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 561                 return 0;
 562
 563         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 564         if (IS_ERR(obj))
 565                 return PTR_ERR(obj);
 566
 567         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 568         if (IS_ERR(vma)) {
 569                 err = PTR_ERR(vma);
 570                 goto err_obj;
 571         }
 572
 573         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
 574         if (IS_ERR(vaddr)) {
 575                 err = PTR_ERR(vaddr);
 576                 goto err_obj;
 577         }
 578
 579         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
 580         if (err)
 581                 goto err_map;
 582
 583         for_each_prime_number_from(count, 1, 16) {
 584                 struct intel_engine_cs *engine;
 585                 enum intel_engine_id id;
 586
 587                 for_each_engine(engine, gt, id) {
 588                         unsigned long saved;
 589
 590                         if (!intel_engine_has_preemption(engine))
 591                                 continue;
 592
 593                         memset(vaddr, 0, PAGE_SIZE);
 594
 595                         engine_heartbeat_disable(engine, &saved);
 596                         err = slice_semaphore_queue(engine, vma, count);
 597                         engine_heartbeat_enable(engine, saved);
 598                         if (err)
 599                                 goto err_pin;
 600
 601                         if (igt_flush_test(gt->i915)) {
 602                                 err = -EIO;
 603                                 goto err_pin;
 604                         }
 605                 }
 606         }
 607
 608 err_pin:
 609         i915_vma_unpin(vma);
 610 err_map:
 611         i915_gem_object_unpin_map(obj);
 612 err_obj:
 613         i915_gem_object_put(obj);
 614         return err;
 615 }
 616
 617 static struct i915_request *nop_request(struct intel_engine_cs *engine)
 618 {
 619         struct i915_request *rq;
 620
 621         rq = intel_engine_create_kernel_request(engine);
 622         if (IS_ERR(rq))
 623                 return rq;
 624
 625         i915_request_get(rq);
 626         i915_request_add(rq);
 627
 628         return rq;
 629 }
 630
 631 static int wait_for_submit(struct intel_engine_cs *engine,
 632                            struct i915_request *rq,
 633                            unsigned long timeout)
 634 {
 635         timeout += jiffies;
 636         do {
 637                 cond_resched();
 638                 intel_engine_flush_submission(engine);
 639                 if (i915_request_is_active(rq))
 640                         return 0;
 641         } while (time_before(jiffies, timeout));
 642
 643         return -ETIME;
 644 }
 645
 646 static long timeslice_threshold(const struct intel_engine_cs *engine)
 647 {
 648         return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
 649 }
 650
 651 static int live_timeslice_queue(void *arg)
 652 {
 653         struct intel_gt *gt = arg;
 654         struct drm_i915_gem_object *obj;
 655         struct intel_engine_cs *engine;
 656         enum intel_engine_id id;
 657         struct i915_vma *vma;
 658         void *vaddr;
 659         int err = 0;
 660
 661         /*
 662          * Make sure that even if ELSP[0] and ELSP[1] are filled with
 663          * timeslicing between them disabled, we *do* enable timeslicing
 664          * if the queue demands it. (Normally, we do not submit if
 665          * ELSP[1] is already occupied, so must rely on timeslicing to
 666          * eject ELSP[0] in favour of the queue.)
 667          */
 668         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 669                 return 0;
 670
 671         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 672         if (IS_ERR(obj))
 673                 return PTR_ERR(obj);
 674
 675         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 676         if (IS_ERR(vma)) {
 677                 err = PTR_ERR(vma);
 678                 goto err_obj;
 679         }
 680
 681         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
 682         if (IS_ERR(vaddr)) {
 683                 err = PTR_ERR(vaddr);
 684                 goto err_obj;
 685         }
 686
 687         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
 688         if (err)
 689                 goto err_map;
 690
 691         for_each_engine(engine, gt, id) {
 692                 struct i915_sched_attr attr = {
 693                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
 694                 };
 695                 struct i915_request *rq, *nop;
 696                 unsigned long saved;
 697
 698                 if (!intel_engine_has_preemption(engine))
 699                         continue;
 700
 701                 engine_heartbeat_disable(engine, &saved);
 702                 memset(vaddr, 0, PAGE_SIZE);
 703
 704                 /* ELSP[0]: semaphore wait */
 705                 rq = semaphore_queue(engine, vma, 0);
 706                 if (IS_ERR(rq)) {
 707                         err = PTR_ERR(rq);
 708                         goto err_heartbeat;
 709                 }
 710                 engine->schedule(rq, &attr);
 711                 err = wait_for_submit(engine, rq, HZ / 2);
 712                 if (err) {
 713                         pr_err("%s: Timed out trying to submit semaphores\n",
 714                                engine->name);
 715                         goto err_rq;
 716                 }
 717
 718                 /* ELSP[1]: nop request */
 719                 nop = nop_request(engine);
 720                 if (IS_ERR(nop)) {
 721                         err = PTR_ERR(nop);
 722                         goto err_rq;
 723                 }
 724                 err = wait_for_submit(engine, nop, HZ / 2);
 725                 i915_request_put(nop);
 726                 if (err) {
 727                         pr_err("%s: Timed out trying to submit nop\n",
 728                                engine->name);
 729                         goto err_rq;
 730                 }
 731
 732                 GEM_BUG_ON(i915_request_completed(rq));
 733                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 734
 735                 /* Queue: semaphore signal, matching priority as semaphore */
 736                 err = release_queue(engine, vma, 1, effective_prio(rq));
 737                 if (err)
 738                         goto err_rq;
 739
 740                 intel_engine_flush_submission(engine);
 741                 if (!READ_ONCE(engine->execlists.timer.expires) &&
 742                     !i915_request_completed(rq)) {
 743                         struct drm_printer p =
 744                                 drm_info_printer(gt->i915->drm.dev);
 745
 746                         GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
 747                                       engine->name);
 748                         intel_engine_dump(engine, &p,
 749                                           "%s\n", engine->name);
 750                         GEM_TRACE_DUMP();
 751
 752                         memset(vaddr, 0xff, PAGE_SIZE);
 753                         err = -EINVAL;
 754                 }
 755
 756                 /* Timeslice every jiffy, so within 2 we should signal */
 757                 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
 758                         struct drm_printer p =
 759                                 drm_info_printer(gt->i915->drm.dev);
 760
 761                         pr_err("%s: Failed to timeslice into queue\n",
 762                                engine->name);
 763                         intel_engine_dump(engine, &p,
 764                                           "%s\n", engine->name);
 765
 766                         memset(vaddr, 0xff, PAGE_SIZE);
 767                         err = -EIO;
 768                 }
 769 err_rq:
 770                 i915_request_put(rq);
 771 err_heartbeat:
 772                 engine_heartbeat_enable(engine, saved);
 773                 if (err)
 774                         break;
 775         }
 776
 777         i915_vma_unpin(vma);
 778 err_map:
 779         i915_gem_object_unpin_map(obj);
 780 err_obj:
 781         i915_gem_object_put(obj);
 782         return err;
 783 }
 784
 785 static int live_busywait_preempt(void *arg)
 786 {
 787         struct intel_gt *gt = arg;
 788         struct i915_gem_context *ctx_hi, *ctx_lo;
 789         struct intel_engine_cs *engine;
 790         struct drm_i915_gem_object *obj;
 791         struct i915_vma *vma;
 792         enum intel_engine_id id;
 793         int err = -ENOMEM;
 794         u32 *map;
 795
 796         /*
 797          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
 798          * preempt the busywaits used to synchronise between rings.
 799          */
 800
 801         ctx_hi = kernel_context(gt->i915);
 802         if (!ctx_hi)
 803                 return -ENOMEM;
 804         ctx_hi->sched.priority =
 805                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 806
 807         ctx_lo = kernel_context(gt->i915);
 808         if (!ctx_lo)
 809                 goto err_ctx_hi;
 810         ctx_lo->sched.priority =
 811                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 812
 813         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 814         if (IS_ERR(obj)) {
 815                 err = PTR_ERR(obj);
 816                 goto err_ctx_lo;
 817         }
 818
 819         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
 820         if (IS_ERR(map)) {
 821                 err = PTR_ERR(map);
 822                 goto err_obj;
 823         }
 824
 825         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 826         if (IS_ERR(vma)) {
 827                 err = PTR_ERR(vma);
 828                 goto err_map;
 829         }
 830
 831         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
 832         if (err)
 833                 goto err_map;
 834
 835         for_each_engine(engine, gt, id) {
 836                 struct i915_request *lo, *hi;
 837                 struct igt_live_test t;
 838                 u32 *cs;
 839
 840                 if (!intel_engine_has_preemption(engine))
 841                         continue;
 842
 843                 if (!intel_engine_can_store_dword(engine))
 844                         continue;
 845
 846                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 847                         err = -EIO;
 848                         goto err_vma;
 849                 }
 850
 851                 /*
 852                  * We create two requests. The low priority request
 853                  * busywaits on a semaphore (inside the ringbuffer where
 854                  * is should be preemptible) and the high priority requests
 855                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
 856                  * allowing the first request to complete. If preemption
 857                  * fails, we hang instead.
 858                  */
 859
 860                 lo = igt_request_alloc(ctx_lo, engine);
 861                 if (IS_ERR(lo)) {
 862                         err = PTR_ERR(lo);
 863                         goto err_vma;
 864                 }
 865
 866                 cs = intel_ring_begin(lo, 8);
 867                 if (IS_ERR(cs)) {
 868                         err = PTR_ERR(cs);
 869                         i915_request_add(lo);
 870                         goto err_vma;
 871                 }
 872
 873                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 874                 *cs++ = i915_ggtt_offset(vma);
 875                 *cs++ = 0;
 876                 *cs++ = 1;
 877
 878                 /* XXX Do we need a flush + invalidate here? */
 879
 880                 *cs++ = MI_SEMAPHORE_WAIT |
 881                         MI_SEMAPHORE_GLOBAL_GTT |
 882                         MI_SEMAPHORE_POLL |
 883                         MI_SEMAPHORE_SAD_EQ_SDD;
 884                 *cs++ = 0;
 885                 *cs++ = i915_ggtt_offset(vma);
 886                 *cs++ = 0;
 887
 888                 intel_ring_advance(lo, cs);
 889
 890                 i915_request_get(lo);
 891                 i915_request_add(lo);
 892
 893                 if (wait_for(READ_ONCE(*map), 10)) {
 894                         i915_request_put(lo);
 895                         err = -ETIMEDOUT;
 896                         goto err_vma;
 897                 }
 898
 899                 /* Low priority request should be busywaiting now */
 900                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
 901                         i915_request_put(lo);
 902                         pr_err("%s: Busywaiting request did not!\n",
 903                                engine->name);
 904                         err = -EIO;
 905                         goto err_vma;
 906                 }
 907
 908                 hi = igt_request_alloc(ctx_hi, engine);
 909                 if (IS_ERR(hi)) {
 910                         err = PTR_ERR(hi);
 911                         i915_request_put(lo);
 912                         goto err_vma;
 913                 }
 914
 915                 cs = intel_ring_begin(hi, 4);
 916                 if (IS_ERR(cs)) {
 917                         err = PTR_ERR(cs);
 918                         i915_request_add(hi);
 919                         i915_request_put(lo);
 920                         goto err_vma;
 921                 }
 922
 923                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 924                 *cs++ = i915_ggtt_offset(vma);
 925                 *cs++ = 0;
 926                 *cs++ = 0;
 927
 928                 intel_ring_advance(hi, cs);
 929                 i915_request_add(hi);
 930
 931                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
 932                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 933
 934                         pr_err("%s: Failed to preempt semaphore busywait!\n",
 935                                engine->name);
 936
 937                         intel_engine_dump(engine, &p, "%s\n", engine->name);
 938                         GEM_TRACE_DUMP();
 939
 940                         i915_request_put(lo);
 941                         intel_gt_set_wedged(gt);
 942                         err = -EIO;
 943                         goto err_vma;
 944                 }
 945                 GEM_BUG_ON(READ_ONCE(*map));
 946                 i915_request_put(lo);
 947
 948                 if (igt_live_test_end(&t)) {
 949                         err = -EIO;
 950                         goto err_vma;
 951                 }
 952         }
 953
 954         err = 0;
 955 err_vma:
 956         i915_vma_unpin(vma);
 957 err_map:
 958         i915_gem_object_unpin_map(obj);
 959 err_obj:
 960         i915_gem_object_put(obj);
 961 err_ctx_lo:
 962         kernel_context_close(ctx_lo);
 963 err_ctx_hi:
 964         kernel_context_close(ctx_hi);
 965         return err;
 966 }
 967
 968 static struct i915_request *
 969 spinner_create_request(struct igt_spinner *spin,
 970                        struct i915_gem_context *ctx,
 971                        struct intel_engine_cs *engine,
 972                        u32 arb)
 973 {
 974         struct intel_context *ce;
 975         struct i915_request *rq;
 976
 977         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
 978         if (IS_ERR(ce))
 979                 return ERR_CAST(ce);
 980
 981         rq = igt_spinner_create_request(spin, ce, arb);
 982         intel_context_put(ce);
 983         return rq;
 984 }
 985
 986 static int live_preempt(void *arg)
 987 {
 988         struct intel_gt *gt = arg;
 989         struct i915_gem_context *ctx_hi, *ctx_lo;
 990         struct igt_spinner spin_hi, spin_lo;
 991         struct intel_engine_cs *engine;
 992         enum intel_engine_id id;
 993         int err = -ENOMEM;
 994
 995         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 996                 return 0;
 997
 998         if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
 999                 pr_err("Logical preemption supported, but not exposed\n");
1000
1001         if (igt_spinner_init(&spin_hi, gt))
1002                 return -ENOMEM;
1003
1004         if (igt_spinner_init(&spin_lo, gt))
1005                 goto err_spin_hi;
1006
1007         ctx_hi = kernel_context(gt->i915);
1008         if (!ctx_hi)
1009                 goto err_spin_lo;
1010         ctx_hi->sched.priority =
1011                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1012
1013         ctx_lo = kernel_context(gt->i915);
1014         if (!ctx_lo)
1015                 goto err_ctx_hi;
1016         ctx_lo->sched.priority =
1017                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1018
1019         for_each_engine(engine, gt, id) {
1020                 struct igt_live_test t;
1021                 struct i915_request *rq;
1022
1023                 if (!intel_engine_has_preemption(engine))
1024                         continue;
1025
1026                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1027                         err = -EIO;
1028                         goto err_ctx_lo;
1029                 }
1030
1031                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1032                                             MI_ARB_CHECK);
1033                 if (IS_ERR(rq)) {
1034                         err = PTR_ERR(rq);
1035                         goto err_ctx_lo;
1036                 }
1037
1038                 i915_request_add(rq);
1039                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1040                         GEM_TRACE("lo spinner failed to start\n");
1041                         GEM_TRACE_DUMP();
1042                         intel_gt_set_wedged(gt);
1043                         err = -EIO;
1044                         goto err_ctx_lo;
1045                 }
1046
1047                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1048                                             MI_ARB_CHECK);
1049                 if (IS_ERR(rq)) {
1050                         igt_spinner_end(&spin_lo);
1051                         err = PTR_ERR(rq);
1052                         goto err_ctx_lo;
1053                 }
1054
1055                 i915_request_add(rq);
1056                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057                         GEM_TRACE("hi spinner failed to start\n");
1058                         GEM_TRACE_DUMP();
1059                         intel_gt_set_wedged(gt);
1060                         err = -EIO;
1061                         goto err_ctx_lo;
1062                 }
1063
1064                 igt_spinner_end(&spin_hi);
1065                 igt_spinner_end(&spin_lo);
1066
1067                 if (igt_live_test_end(&t)) {
1068                         err = -EIO;
1069                         goto err_ctx_lo;
1070                 }
1071         }
1072
1073         err = 0;
1074 err_ctx_lo:
1075         kernel_context_close(ctx_lo);
1076 err_ctx_hi:
1077         kernel_context_close(ctx_hi);
1078 err_spin_lo:
1079         igt_spinner_fini(&spin_lo);
1080 err_spin_hi:
1081         igt_spinner_fini(&spin_hi);
1082         return err;
1083 }
1084
1085 static int live_late_preempt(void *arg)
1086 {
1087         struct intel_gt *gt = arg;
1088         struct i915_gem_context *ctx_hi, *ctx_lo;
1089         struct igt_spinner spin_hi, spin_lo;
1090         struct intel_engine_cs *engine;
1091         struct i915_sched_attr attr = {};
1092         enum intel_engine_id id;
1093         int err = -ENOMEM;
1094
1095         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1096                 return 0;
1097
1098         if (igt_spinner_init(&spin_hi, gt))
1099                 return -ENOMEM;
1100
1101         if (igt_spinner_init(&spin_lo, gt))
1102                 goto err_spin_hi;
1103
1104         ctx_hi = kernel_context(gt->i915);
1105         if (!ctx_hi)
1106                 goto err_spin_lo;
1107
1108         ctx_lo = kernel_context(gt->i915);
1109         if (!ctx_lo)
1110                 goto err_ctx_hi;
1111
1112         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1113         ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1114
1115         for_each_engine(engine, gt, id) {
1116                 struct igt_live_test t;
1117                 struct i915_request *rq;
1118
1119                 if (!intel_engine_has_preemption(engine))
1120                         continue;
1121
1122                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1123                         err = -EIO;
1124                         goto err_ctx_lo;
1125                 }
1126
1127                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1128                                             MI_ARB_CHECK);
1129                 if (IS_ERR(rq)) {
1130                         err = PTR_ERR(rq);
1131                         goto err_ctx_lo;
1132                 }
1133
1134                 i915_request_add(rq);
1135                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1136                         pr_err("First context failed to start\n");
1137                         goto err_wedged;
1138                 }
1139
1140                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1141                                             MI_NOOP);
1142                 if (IS_ERR(rq)) {
1143                         igt_spinner_end(&spin_lo);
1144                         err = PTR_ERR(rq);
1145                         goto err_ctx_lo;
1146                 }
1147
1148                 i915_request_add(rq);
1149                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1150                         pr_err("Second context overtook first?\n");
1151                         goto err_wedged;
1152                 }
1153
1154                 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1155                 engine->schedule(rq, &attr);
1156
1157                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1158                         pr_err("High priority context failed to preempt the low priority context\n");
1159                         GEM_TRACE_DUMP();
1160                         goto err_wedged;
1161                 }
1162
1163                 igt_spinner_end(&spin_hi);
1164                 igt_spinner_end(&spin_lo);
1165
1166                 if (igt_live_test_end(&t)) {
1167                         err = -EIO;
1168                         goto err_ctx_lo;
1169                 }
1170         }
1171
1172         err = 0;
1173 err_ctx_lo:
1174         kernel_context_close(ctx_lo);
1175 err_ctx_hi:
1176         kernel_context_close(ctx_hi);
1177 err_spin_lo:
1178         igt_spinner_fini(&spin_lo);
1179 err_spin_hi:
1180         igt_spinner_fini(&spin_hi);
1181         return err;
1182
1183 err_wedged:
1184         igt_spinner_end(&spin_hi);
1185         igt_spinner_end(&spin_lo);
1186         intel_gt_set_wedged(gt);
1187         err = -EIO;
1188         goto err_ctx_lo;
1189 }
1190
1191 struct preempt_client {
1192         struct igt_spinner spin;
1193         struct i915_gem_context *ctx;
1194 };
1195
1196 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1197 {
1198         c->ctx = kernel_context(gt->i915);
1199         if (!c->ctx)
1200                 return -ENOMEM;
1201
1202         if (igt_spinner_init(&c->spin, gt))
1203                 goto err_ctx;
1204
1205         return 0;
1206
1207 err_ctx:
1208         kernel_context_close(c->ctx);
1209         return -ENOMEM;
1210 }
1211
1212 static void preempt_client_fini(struct preempt_client *c)
1213 {
1214         igt_spinner_fini(&c->spin);
1215         kernel_context_close(c->ctx);
1216 }
1217
1218 static int live_nopreempt(void *arg)
1219 {
1220         struct intel_gt *gt = arg;
1221         struct intel_engine_cs *engine;
1222         struct preempt_client a, b;
1223         enum intel_engine_id id;
1224         int err = -ENOMEM;
1225
1226         /*
1227          * Verify that we can disable preemption for an individual request
1228          * that may be being observed and not want to be interrupted.
1229          */
1230
1231         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1232                 return 0;
1233
1234         if (preempt_client_init(gt, &a))
1235                 return -ENOMEM;
1236         if (preempt_client_init(gt, &b))
1237                 goto err_client_a;
1238         b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1239
1240         for_each_engine(engine, gt, id) {
1241                 struct i915_request *rq_a, *rq_b;
1242
1243                 if (!intel_engine_has_preemption(engine))
1244                         continue;
1245
1246                 engine->execlists.preempt_hang.count = 0;
1247
1248                 rq_a = spinner_create_request(&a.spin,
1249                                               a.ctx, engine,
1250                                               MI_ARB_CHECK);
1251                 if (IS_ERR(rq_a)) {
1252                         err = PTR_ERR(rq_a);
1253                         goto err_client_b;
1254                 }
1255
1256                 /* Low priority client, but unpreemptable! */
1257                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1258
1259                 i915_request_add(rq_a);
1260                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1261                         pr_err("First client failed to start\n");
1262                         goto err_wedged;
1263                 }
1264
1265                 rq_b = spinner_create_request(&b.spin,
1266                                               b.ctx, engine,
1267                                               MI_ARB_CHECK);
1268                 if (IS_ERR(rq_b)) {
1269                         err = PTR_ERR(rq_b);
1270                         goto err_client_b;
1271                 }
1272
1273                 i915_request_add(rq_b);
1274
1275                 /* B is much more important than A! (But A is unpreemptable.) */
1276                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1277
1278                 /* Wait long enough for preemption and timeslicing */
1279                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1280                         pr_err("Second client started too early!\n");
1281                         goto err_wedged;
1282                 }
1283
1284                 igt_spinner_end(&a.spin);
1285
1286                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1287                         pr_err("Second client failed to start\n");
1288                         goto err_wedged;
1289                 }
1290
1291                 igt_spinner_end(&b.spin);
1292
1293                 if (engine->execlists.preempt_hang.count) {
1294                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
1295                                engine->execlists.preempt_hang.count);
1296                         err = -EINVAL;
1297                         goto err_wedged;
1298                 }
1299
1300                 if (igt_flush_test(gt->i915))
1301                         goto err_wedged;
1302         }
1303
1304         err = 0;
1305 err_client_b:
1306         preempt_client_fini(&b);
1307 err_client_a:
1308         preempt_client_fini(&a);
1309         return err;
1310
1311 err_wedged:
1312         igt_spinner_end(&b.spin);
1313         igt_spinner_end(&a.spin);
1314         intel_gt_set_wedged(gt);
1315         err = -EIO;
1316         goto err_client_b;
1317 }
1318
1319 struct live_preempt_cancel {
1320         struct intel_engine_cs *engine;
1321         struct preempt_client a, b;
1322 };
1323
1324 static int __cancel_active0(struct live_preempt_cancel *arg)
1325 {
1326         struct i915_request *rq;
1327         struct igt_live_test t;
1328         int err;
1329
1330         /* Preempt cancel of ELSP0 */
1331         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1332         if (igt_live_test_begin(&t, arg->engine->i915,
1333                                 __func__, arg->engine->name))
1334                 return -EIO;
1335
1336         rq = spinner_create_request(&arg->a.spin,
1337                                     arg->a.ctx, arg->engine,
1338                                     MI_ARB_CHECK);
1339         if (IS_ERR(rq))
1340                 return PTR_ERR(rq);
1341
1342         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1343         i915_request_get(rq);
1344         i915_request_add(rq);
1345         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1346                 err = -EIO;
1347                 goto out;
1348         }
1349
1350         intel_context_set_banned(rq->context);
1351         err = intel_engine_pulse(arg->engine);
1352         if (err)
1353                 goto out;
1354
1355         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1356                 err = -EIO;
1357                 goto out;
1358         }
1359
1360         if (rq->fence.error != -EIO) {
1361                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1362                 err = -EINVAL;
1363                 goto out;
1364         }
1365
1366 out:
1367         i915_request_put(rq);
1368         if (igt_live_test_end(&t))
1369                 err = -EIO;
1370         return err;
1371 }
1372
1373 static int __cancel_active1(struct live_preempt_cancel *arg)
1374 {
1375         struct i915_request *rq[2] = {};
1376         struct igt_live_test t;
1377         int err;
1378
1379         /* Preempt cancel of ELSP1 */
1380         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1381         if (igt_live_test_begin(&t, arg->engine->i915,
1382                                 __func__, arg->engine->name))
1383                 return -EIO;
1384
1385         rq[0] = spinner_create_request(&arg->a.spin,
1386                                        arg->a.ctx, arg->engine,
1387                                        MI_NOOP); /* no preemption */
1388         if (IS_ERR(rq[0]))
1389                 return PTR_ERR(rq[0]);
1390
1391         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1392         i915_request_get(rq[0]);
1393         i915_request_add(rq[0]);
1394         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1395                 err = -EIO;
1396                 goto out;
1397         }
1398
1399         rq[1] = spinner_create_request(&arg->b.spin,
1400                                        arg->b.ctx, arg->engine,
1401                                        MI_ARB_CHECK);
1402         if (IS_ERR(rq[1])) {
1403                 err = PTR_ERR(rq[1]);
1404                 goto out;
1405         }
1406
1407         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1408         i915_request_get(rq[1]);
1409         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1410         i915_request_add(rq[1]);
1411         if (err)
1412                 goto out;
1413
1414         intel_context_set_banned(rq[1]->context);
1415         err = intel_engine_pulse(arg->engine);
1416         if (err)
1417                 goto out;
1418
1419         igt_spinner_end(&arg->a.spin);
1420         if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1421                 err = -EIO;
1422                 goto out;
1423         }
1424
1425         if (rq[0]->fence.error != 0) {
1426                 pr_err("Normal inflight0 request did not complete\n");
1427                 err = -EINVAL;
1428                 goto out;
1429         }
1430
1431         if (rq[1]->fence.error != -EIO) {
1432                 pr_err("Cancelled inflight1 request did not report -EIO\n");
1433                 err = -EINVAL;
1434                 goto out;
1435         }
1436
1437 out:
1438         i915_request_put(rq[1]);
1439         i915_request_put(rq[0]);
1440         if (igt_live_test_end(&t))
1441                 err = -EIO;
1442         return err;
1443 }
1444
1445 static int __cancel_queued(struct live_preempt_cancel *arg)
1446 {
1447         struct i915_request *rq[3] = {};
1448         struct igt_live_test t;
1449         int err;
1450
1451         /* Full ELSP and one in the wings */
1452         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1453         if (igt_live_test_begin(&t, arg->engine->i915,
1454                                 __func__, arg->engine->name))
1455                 return -EIO;
1456
1457         rq[0] = spinner_create_request(&arg->a.spin,
1458                                        arg->a.ctx, arg->engine,
1459                                        MI_ARB_CHECK);
1460         if (IS_ERR(rq[0]))
1461                 return PTR_ERR(rq[0]);
1462
1463         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1464         i915_request_get(rq[0]);
1465         i915_request_add(rq[0]);
1466         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1467                 err = -EIO;
1468                 goto out;
1469         }
1470
1471         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1472         if (IS_ERR(rq[1])) {
1473                 err = PTR_ERR(rq[1]);
1474                 goto out;
1475         }
1476
1477         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1478         i915_request_get(rq[1]);
1479         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1480         i915_request_add(rq[1]);
1481         if (err)
1482                 goto out;
1483
1484         rq[2] = spinner_create_request(&arg->b.spin,
1485                                        arg->a.ctx, arg->engine,
1486                                        MI_ARB_CHECK);
1487         if (IS_ERR(rq[2])) {
1488                 err = PTR_ERR(rq[2]);
1489                 goto out;
1490         }
1491
1492         i915_request_get(rq[2]);
1493         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1494         i915_request_add(rq[2]);
1495         if (err)
1496                 goto out;
1497
1498         intel_context_set_banned(rq[2]->context);
1499         err = intel_engine_pulse(arg->engine);
1500         if (err)
1501                 goto out;
1502
1503         if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1504                 err = -EIO;
1505                 goto out;
1506         }
1507
1508         if (rq[0]->fence.error != -EIO) {
1509                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1510                 err = -EINVAL;
1511                 goto out;
1512         }
1513
1514         if (rq[1]->fence.error != 0) {
1515                 pr_err("Normal inflight1 request did not complete\n");
1516                 err = -EINVAL;
1517                 goto out;
1518         }
1519
1520         if (rq[2]->fence.error != -EIO) {
1521                 pr_err("Cancelled queued request did not report -EIO\n");
1522                 err = -EINVAL;
1523                 goto out;
1524         }
1525
1526 out:
1527         i915_request_put(rq[2]);
1528         i915_request_put(rq[1]);
1529         i915_request_put(rq[0]);
1530         if (igt_live_test_end(&t))
1531                 err = -EIO;
1532         return err;
1533 }
1534
1535 static int __cancel_hostile(struct live_preempt_cancel *arg)
1536 {
1537         struct i915_request *rq;
1538         int err;
1539
1540         /* Preempt cancel non-preemptible spinner in ELSP0 */
1541         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1542                 return 0;
1543
1544         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1545         rq = spinner_create_request(&arg->a.spin,
1546                                     arg->a.ctx, arg->engine,
1547                                     MI_NOOP); /* preemption disabled */
1548         if (IS_ERR(rq))
1549                 return PTR_ERR(rq);
1550
1551         clear_bit(CONTEXT_BANNED, &rq->context->flags);
1552         i915_request_get(rq);
1553         i915_request_add(rq);
1554         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1555                 err = -EIO;
1556                 goto out;
1557         }
1558
1559         intel_context_set_banned(rq->context);
1560         err = intel_engine_pulse(arg->engine); /* force reset */
1561         if (err)
1562                 goto out;
1563
1564         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1565                 err = -EIO;
1566                 goto out;
1567         }
1568
1569         if (rq->fence.error != -EIO) {
1570                 pr_err("Cancelled inflight0 request did not report -EIO\n");
1571                 err = -EINVAL;
1572                 goto out;
1573         }
1574
1575 out:
1576         i915_request_put(rq);
1577         if (igt_flush_test(arg->engine->i915))
1578                 err = -EIO;
1579         return err;
1580 }
1581
1582 static int live_preempt_cancel(void *arg)
1583 {
1584         struct intel_gt *gt = arg;
1585         struct live_preempt_cancel data;
1586         enum intel_engine_id id;
1587         int err = -ENOMEM;
1588
1589         /*
1590          * To cancel an inflight context, we need to first remove it from the
1591          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1592          */
1593
1594         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1595                 return 0;
1596
1597         if (preempt_client_init(gt, &data.a))
1598                 return -ENOMEM;
1599         if (preempt_client_init(gt, &data.b))
1600                 goto err_client_a;
1601
1602         for_each_engine(data.engine, gt, id) {
1603                 if (!intel_engine_has_preemption(data.engine))
1604                         continue;
1605
1606                 err = __cancel_active0(&data);
1607                 if (err)
1608                         goto err_wedged;
1609
1610                 err = __cancel_active1(&data);
1611                 if (err)
1612                         goto err_wedged;
1613
1614                 err = __cancel_queued(&data);
1615                 if (err)
1616                         goto err_wedged;
1617
1618                 err = __cancel_hostile(&data);
1619                 if (err)
1620                         goto err_wedged;
1621         }
1622
1623         err = 0;
1624 err_client_b:
1625         preempt_client_fini(&data.b);
1626 err_client_a:
1627         preempt_client_fini(&data.a);
1628         return err;
1629
1630 err_wedged:
1631         GEM_TRACE_DUMP();
1632         igt_spinner_end(&data.b.spin);
1633         igt_spinner_end(&data.a.spin);
1634         intel_gt_set_wedged(gt);
1635         goto err_client_b;
1636 }
1637
1638 static int live_suppress_self_preempt(void *arg)
1639 {
1640         struct intel_gt *gt = arg;
1641         struct intel_engine_cs *engine;
1642         struct i915_sched_attr attr = {
1643                 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1644         };
1645         struct preempt_client a, b;
1646         enum intel_engine_id id;
1647         int err = -ENOMEM;
1648
1649         /*
1650          * Verify that if a preemption request does not cause a change in
1651          * the current execution order, the preempt-to-idle injection is
1652          * skipped and that we do not accidentally apply it after the CS
1653          * completion event.
1654          */
1655
1656         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1657                 return 0;
1658
1659         if (USES_GUC_SUBMISSION(gt->i915))
1660                 return 0; /* presume black blox */
1661
1662         if (intel_vgpu_active(gt->i915))
1663                 return 0; /* GVT forces single port & request submission */
1664
1665         if (preempt_client_init(gt, &a))
1666                 return -ENOMEM;
1667         if (preempt_client_init(gt, &b))
1668                 goto err_client_a;
1669
1670         for_each_engine(engine, gt, id) {
1671                 struct i915_request *rq_a, *rq_b;
1672                 int depth;
1673
1674                 if (!intel_engine_has_preemption(engine))
1675                         continue;
1676
1677                 if (igt_flush_test(gt->i915))
1678                         goto err_wedged;
1679
1680                 intel_engine_pm_get(engine);
1681                 engine->execlists.preempt_hang.count = 0;
1682
1683                 rq_a = spinner_create_request(&a.spin,
1684                                               a.ctx, engine,
1685                                               MI_NOOP);
1686                 if (IS_ERR(rq_a)) {
1687                         err = PTR_ERR(rq_a);
1688                         intel_engine_pm_put(engine);
1689                         goto err_client_b;
1690                 }
1691
1692                 i915_request_add(rq_a);
1693                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1694                         pr_err("First client failed to start\n");
1695                         intel_engine_pm_put(engine);
1696                         goto err_wedged;
1697                 }
1698
1699                 /* Keep postponing the timer to avoid premature slicing */
1700                 mod_timer(&engine->execlists.timer, jiffies + HZ);
1701                 for (depth = 0; depth < 8; depth++) {
1702                         rq_b = spinner_create_request(&b.spin,
1703                                                       b.ctx, engine,
1704                                                       MI_NOOP);
1705                         if (IS_ERR(rq_b)) {
1706                                 err = PTR_ERR(rq_b);
1707                                 intel_engine_pm_put(engine);
1708                                 goto err_client_b;
1709                         }
1710                         i915_request_add(rq_b);
1711
1712                         GEM_BUG_ON(i915_request_completed(rq_a));
1713                         engine->schedule(rq_a, &attr);
1714                         igt_spinner_end(&a.spin);
1715
1716                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1717                                 pr_err("Second client failed to start\n");
1718                                 intel_engine_pm_put(engine);
1719                                 goto err_wedged;
1720                         }
1721
1722                         swap(a, b);
1723                         rq_a = rq_b;
1724                 }
1725                 igt_spinner_end(&a.spin);
1726
1727                 if (engine->execlists.preempt_hang.count) {
1728                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1729                                engine->name,
1730                                engine->execlists.preempt_hang.count,
1731                                depth);
1732                         intel_engine_pm_put(engine);
1733                         err = -EINVAL;
1734                         goto err_client_b;
1735                 }
1736
1737                 intel_engine_pm_put(engine);
1738                 if (igt_flush_test(gt->i915))
1739                         goto err_wedged;
1740         }
1741
1742         err = 0;
1743 err_client_b:
1744         preempt_client_fini(&b);
1745 err_client_a:
1746         preempt_client_fini(&a);
1747         return err;
1748
1749 err_wedged:
1750         igt_spinner_end(&b.spin);
1751         igt_spinner_end(&a.spin);
1752         intel_gt_set_wedged(gt);
1753         err = -EIO;
1754         goto err_client_b;
1755 }
1756
1757 static int __i915_sw_fence_call
1758 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1759 {
1760         return NOTIFY_DONE;
1761 }
1762
1763 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1764 {
1765         struct i915_request *rq;
1766
1767         rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1768         if (!rq)
1769                 return NULL;
1770
1771         rq->engine = engine;
1772
1773         spin_lock_init(&rq->lock);
1774         INIT_LIST_HEAD(&rq->fence.cb_list);
1775         rq->fence.lock = &rq->lock;
1776         rq->fence.ops = &i915_fence_ops;
1777
1778         i915_sched_node_init(&rq->sched);
1779
1780         /* mark this request as permanently incomplete */
1781         rq->fence.seqno = 1;
1782         BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1783         rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1784         GEM_BUG_ON(i915_request_completed(rq));
1785
1786         i915_sw_fence_init(&rq->submit, dummy_notify);
1787         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1788
1789         spin_lock_init(&rq->lock);
1790         rq->fence.lock = &rq->lock;
1791         INIT_LIST_HEAD(&rq->fence.cb_list);
1792
1793         return rq;
1794 }
1795
1796 static void dummy_request_free(struct i915_request *dummy)
1797 {
1798         /* We have to fake the CS interrupt to kick the next request */
1799         i915_sw_fence_commit(&dummy->submit);
1800
1801         i915_request_mark_complete(dummy);
1802         dma_fence_signal(&dummy->fence);
1803
1804         i915_sched_node_fini(&dummy->sched);
1805         i915_sw_fence_fini(&dummy->submit);
1806
1807         dma_fence_free(&dummy->fence);
1808 }
1809
1810 static int live_suppress_wait_preempt(void *arg)
1811 {
1812         struct intel_gt *gt = arg;
1813         struct preempt_client client[4];
1814         struct i915_request *rq[ARRAY_SIZE(client)] = {};
1815         struct intel_engine_cs *engine;
1816         enum intel_engine_id id;
1817         int err = -ENOMEM;
1818         int i;
1819
1820         /*
1821          * Waiters are given a little priority nudge, but not enough
1822          * to actually cause any preemption. Double check that we do
1823          * not needlessly generate preempt-to-idle cycles.
1824          */
1825
1826         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1827                 return 0;
1828
1829         if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1830                 return -ENOMEM;
1831         if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1832                 goto err_client_0;
1833         if (preempt_client_init(gt, &client[2])) /* head of queue */
1834                 goto err_client_1;
1835         if (preempt_client_init(gt, &client[3])) /* bystander */
1836                 goto err_client_2;
1837
1838         for_each_engine(engine, gt, id) {
1839                 int depth;
1840
1841                 if (!intel_engine_has_preemption(engine))
1842                         continue;
1843
1844                 if (!engine->emit_init_breadcrumb)
1845                         continue;
1846
1847                 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1848                         struct i915_request *dummy;
1849
1850                         engine->execlists.preempt_hang.count = 0;
1851
1852                         dummy = dummy_request(engine);
1853                         if (!dummy)
1854                                 goto err_client_3;
1855
1856                         for (i = 0; i < ARRAY_SIZE(client); i++) {
1857                                 struct i915_request *this;
1858
1859                                 this = spinner_create_request(&client[i].spin,
1860                                                               client[i].ctx, engine,
1861                                                               MI_NOOP);
1862                                 if (IS_ERR(this)) {
1863                                         err = PTR_ERR(this);
1864                                         goto err_wedged;
1865                                 }
1866
1867                                 /* Disable NEWCLIENT promotion */
1868                                 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
1869                                                         &dummy->fence);
1870
1871                                 rq[i] = i915_request_get(this);
1872                                 i915_request_add(this);
1873                         }
1874
1875                         dummy_request_free(dummy);
1876
1877                         GEM_BUG_ON(i915_request_completed(rq[0]));
1878                         if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1879                                 pr_err("%s: First client failed to start\n",
1880                                        engine->name);
1881                                 goto err_wedged;
1882                         }
1883                         GEM_BUG_ON(!i915_request_started(rq[0]));
1884
1885                         if (i915_request_wait(rq[depth],
1886                                               I915_WAIT_PRIORITY,
1887                                               1) != -ETIME) {
1888                                 pr_err("%s: Waiter depth:%d completed!\n",
1889                                        engine->name, depth);
1890                                 goto err_wedged;
1891                         }
1892
1893                         for (i = 0; i < ARRAY_SIZE(client); i++) {
1894                                 igt_spinner_end(&client[i].spin);
1895                                 i915_request_put(rq[i]);
1896                                 rq[i] = NULL;
1897                         }
1898
1899                         if (igt_flush_test(gt->i915))
1900                                 goto err_wedged;
1901
1902                         if (engine->execlists.preempt_hang.count) {
1903                                 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1904                                        engine->name,
1905                                        engine->execlists.preempt_hang.count,
1906                                        depth);
1907                                 err = -EINVAL;
1908                                 goto err_client_3;
1909                         }
1910                 }
1911         }
1912
1913         err = 0;
1914 err_client_3:
1915         preempt_client_fini(&client[3]);
1916 err_client_2:
1917         preempt_client_fini(&client[2]);
1918 err_client_1:
1919         preempt_client_fini(&client[1]);
1920 err_client_0:
1921         preempt_client_fini(&client[0]);
1922         return err;
1923
1924 err_wedged:
1925         for (i = 0; i < ARRAY_SIZE(client); i++) {
1926                 igt_spinner_end(&client[i].spin);
1927                 i915_request_put(rq[i]);
1928         }
1929         intel_gt_set_wedged(gt);
1930         err = -EIO;
1931         goto err_client_3;
1932 }
1933
1934 static int live_chain_preempt(void *arg)
1935 {
1936         struct intel_gt *gt = arg;
1937         struct intel_engine_cs *engine;
1938         struct preempt_client hi, lo;
1939         enum intel_engine_id id;
1940         int err = -ENOMEM;
1941
1942         /*
1943          * Build a chain AB...BA between two contexts (A, B) and request
1944          * preemption of the last request. It should then complete before
1945          * the previously submitted spinner in B.
1946          */
1947
1948         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1949                 return 0;
1950
1951         if (preempt_client_init(gt, &hi))
1952                 return -ENOMEM;
1953
1954         if (preempt_client_init(gt, &lo))
1955                 goto err_client_hi;
1956
1957         for_each_engine(engine, gt, id) {
1958                 struct i915_sched_attr attr = {
1959                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1960                 };
1961                 struct igt_live_test t;
1962                 struct i915_request *rq;
1963                 int ring_size, count, i;
1964
1965                 if (!intel_engine_has_preemption(engine))
1966                         continue;
1967
1968                 rq = spinner_create_request(&lo.spin,
1969                                             lo.ctx, engine,
1970                                             MI_ARB_CHECK);
1971                 if (IS_ERR(rq))
1972                         goto err_wedged;
1973
1974                 i915_request_get(rq);
1975                 i915_request_add(rq);
1976
1977                 ring_size = rq->wa_tail - rq->head;
1978                 if (ring_size < 0)
1979                         ring_size += rq->ring->size;
1980                 ring_size = rq->ring->size / ring_size;
1981                 pr_debug("%s(%s): Using maximum of %d requests\n",
1982                          __func__, engine->name, ring_size);
1983
1984                 igt_spinner_end(&lo.spin);
1985                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1986                         pr_err("Timed out waiting to flush %s\n", engine->name);
1987                         i915_request_put(rq);
1988                         goto err_wedged;
1989                 }
1990                 i915_request_put(rq);
1991
1992                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1993                         err = -EIO;
1994                         goto err_wedged;
1995                 }
1996
1997                 for_each_prime_number_from(count, 1, ring_size) {
1998                         rq = spinner_create_request(&hi.spin,
1999                                                     hi.ctx, engine,
2000                                                     MI_ARB_CHECK);
2001                         if (IS_ERR(rq))
2002                                 goto err_wedged;
2003                         i915_request_add(rq);
2004                         if (!igt_wait_for_spinner(&hi.spin, rq))
2005                                 goto err_wedged;
2006
2007                         rq = spinner_create_request(&lo.spin,
2008                                                     lo.ctx, engine,
2009                                                     MI_ARB_CHECK);
2010                         if (IS_ERR(rq))
2011                                 goto err_wedged;
2012                         i915_request_add(rq);
2013
2014                         for (i = 0; i < count; i++) {
2015                                 rq = igt_request_alloc(lo.ctx, engine);
2016                                 if (IS_ERR(rq))
2017                                         goto err_wedged;
2018                                 i915_request_add(rq);
2019                         }
2020
2021                         rq = igt_request_alloc(hi.ctx, engine);
2022                         if (IS_ERR(rq))
2023                                 goto err_wedged;
2024
2025                         i915_request_get(rq);
2026                         i915_request_add(rq);
2027                         engine->schedule(rq, &attr);
2028
2029                         igt_spinner_end(&hi.spin);
2030                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2031                                 struct drm_printer p =
2032                                         drm_info_printer(gt->i915->drm.dev);
2033
2034                                 pr_err("Failed to preempt over chain of %d\n",
2035                                        count);
2036                                 intel_engine_dump(engine, &p,
2037                                                   "%s\n", engine->name);
2038                                 i915_request_put(rq);
2039                                 goto err_wedged;
2040                         }
2041                         igt_spinner_end(&lo.spin);
2042                         i915_request_put(rq);
2043
2044                         rq = igt_request_alloc(lo.ctx, engine);
2045                         if (IS_ERR(rq))
2046                                 goto err_wedged;
2047
2048                         i915_request_get(rq);
2049                         i915_request_add(rq);
2050
2051                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2052                                 struct drm_printer p =
2053                                         drm_info_printer(gt->i915->drm.dev);
2054
2055                                 pr_err("Failed to flush low priority chain of %d requests\n",
2056                                        count);
2057                                 intel_engine_dump(engine, &p,
2058                                                   "%s\n", engine->name);
2059
2060                                 i915_request_put(rq);
2061                                 goto err_wedged;
2062                         }
2063                         i915_request_put(rq);
2064                 }
2065
2066                 if (igt_live_test_end(&t)) {
2067                         err = -EIO;
2068                         goto err_wedged;
2069                 }
2070         }
2071
2072         err = 0;
2073 err_client_lo:
2074         preempt_client_fini(&lo);
2075 err_client_hi:
2076         preempt_client_fini(&hi);
2077         return err;
2078
2079 err_wedged:
2080         igt_spinner_end(&hi.spin);
2081         igt_spinner_end(&lo.spin);
2082         intel_gt_set_wedged(gt);
2083         err = -EIO;
2084         goto err_client_lo;
2085 }
2086
2087 static int create_gang(struct intel_engine_cs *engine,
2088                        struct i915_request **prev)
2089 {
2090         struct drm_i915_gem_object *obj;
2091         struct intel_context *ce;
2092         struct i915_request *rq;
2093         struct i915_vma *vma;
2094         u32 *cs;
2095         int err;
2096
2097         ce = intel_context_create(engine);
2098         if (IS_ERR(ce))
2099                 return PTR_ERR(ce);
2100
2101         obj = i915_gem_object_create_internal(engine->i915, 4096);
2102         if (IS_ERR(obj)) {
2103                 err = PTR_ERR(obj);
2104                 goto err_ce;
2105         }
2106
2107         vma = i915_vma_instance(obj, ce->vm, NULL);
2108         if (IS_ERR(vma)) {
2109                 err = PTR_ERR(vma);
2110                 goto err_obj;
2111         }
2112
2113         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2114         if (err)
2115                 goto err_obj;
2116
2117         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2118         if (IS_ERR(cs))
2119                 goto err_obj;
2120
2121         /* Semaphore target: spin until zero */
2122         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2123
2124         *cs++ = MI_SEMAPHORE_WAIT |
2125                 MI_SEMAPHORE_POLL |
2126                 MI_SEMAPHORE_SAD_EQ_SDD;
2127         *cs++ = 0;
2128         *cs++ = lower_32_bits(vma->node.start);
2129         *cs++ = upper_32_bits(vma->node.start);
2130
2131         if (*prev) {
2132                 u64 offset = (*prev)->batch->node.start;
2133
2134                 /* Terminate the spinner in the next lower priority batch. */
2135                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2136                 *cs++ = lower_32_bits(offset);
2137                 *cs++ = upper_32_bits(offset);
2138                 *cs++ = 0;
2139         }
2140
2141         *cs++ = MI_BATCH_BUFFER_END;
2142         i915_gem_object_flush_map(obj);
2143         i915_gem_object_unpin_map(obj);
2144
2145         rq = intel_context_create_request(ce);
2146         if (IS_ERR(rq))
2147                 goto err_obj;
2148
2149         rq->batch = vma;
2150         i915_request_get(rq);
2151
2152         i915_vma_lock(vma);
2153         err = i915_request_await_object(rq, vma->obj, false);
2154         if (!err)
2155                 err = i915_vma_move_to_active(vma, rq, 0);
2156         if (!err)
2157                 err = rq->engine->emit_bb_start(rq,
2158                                                 vma->node.start,
2159                                                 PAGE_SIZE, 0);
2160         i915_vma_unlock(vma);
2161         i915_request_add(rq);
2162         if (err)
2163                 goto err_rq;
2164
2165         i915_gem_object_put(obj);
2166         intel_context_put(ce);
2167
2168         rq->client_link.next = &(*prev)->client_link;
2169         *prev = rq;
2170         return 0;
2171
2172 err_rq:
2173         i915_request_put(rq);
2174 err_obj:
2175         i915_gem_object_put(obj);
2176 err_ce:
2177         intel_context_put(ce);
2178         return err;
2179 }
2180
2181 static int live_preempt_gang(void *arg)
2182 {
2183         struct intel_gt *gt = arg;
2184         struct intel_engine_cs *engine;
2185         enum intel_engine_id id;
2186
2187         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2188                 return 0;
2189
2190         /*
2191          * Build as long a chain of preempters as we can, with each
2192          * request higher priority than the last. Once we are ready, we release
2193          * the last batch which then precolates down the chain, each releasing
2194          * the next oldest in turn. The intent is to simply push as hard as we
2195          * can with the number of preemptions, trying to exceed narrow HW
2196          * limits. At a minimum, we insist that we can sort all the user
2197          * high priority levels into execution order.
2198          */
2199
2200         for_each_engine(engine, gt, id) {
2201                 struct i915_request *rq = NULL;
2202                 struct igt_live_test t;
2203                 IGT_TIMEOUT(end_time);
2204                 int prio = 0;
2205                 int err = 0;
2206                 u32 *cs;
2207
2208                 if (!intel_engine_has_preemption(engine))
2209                         continue;
2210
2211                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2212                         return -EIO;
2213
2214                 do {
2215                         struct i915_sched_attr attr = {
2216                                 .priority = I915_USER_PRIORITY(prio++),
2217                         };
2218
2219                         err = create_gang(engine, &rq);
2220                         if (err)
2221                                 break;
2222
2223                         /* Submit each spinner at increasing priority */
2224                         engine->schedule(rq, &attr);
2225
2226                         if (prio <= I915_PRIORITY_MAX)
2227                                 continue;
2228
2229                         if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2230                                 break;
2231
2232                         if (__igt_timeout(end_time, NULL))
2233                                 break;
2234                 } while (1);
2235                 pr_debug("%s: Preempt chain of %d requests\n",
2236                          engine->name, prio);
2237
2238                 /*
2239                  * Such that the last spinner is the highest priority and
2240                  * should execute first. When that spinner completes,
2241                  * it will terminate the next lowest spinner until there
2242                  * are no more spinners and the gang is complete.
2243                  */
2244                 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2245                 if (!IS_ERR(cs)) {
2246                         *cs = 0;
2247                         i915_gem_object_unpin_map(rq->batch->obj);
2248                 } else {
2249                         err = PTR_ERR(cs);
2250                         intel_gt_set_wedged(gt);
2251                 }
2252
2253                 while (rq) { /* wait for each rq from highest to lowest prio */
2254                         struct i915_request *n =
2255                                 list_next_entry(rq, client_link);
2256
2257                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2258                                 struct drm_printer p =
2259                                         drm_info_printer(engine->i915->drm.dev);
2260
2261                                 pr_err("Failed to flush chain of %d requests, at %d\n",
2262                                        prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2263                                 intel_engine_dump(engine, &p,
2264                                                   "%s\n", engine->name);
2265
2266                                 err = -ETIME;
2267                         }
2268
2269                         i915_request_put(rq);
2270                         rq = n;
2271                 }
2272
2273                 if (igt_live_test_end(&t))
2274                         err = -EIO;
2275                 if (err)
2276                         return err;
2277         }
2278
2279         return 0;
2280 }
2281
2282 static int live_preempt_hang(void *arg)
2283 {
2284         struct intel_gt *gt = arg;
2285         struct i915_gem_context *ctx_hi, *ctx_lo;
2286         struct igt_spinner spin_hi, spin_lo;
2287         struct intel_engine_cs *engine;
2288         enum intel_engine_id id;
2289         int err = -ENOMEM;
2290
2291         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2292                 return 0;
2293
2294         if (!intel_has_reset_engine(gt))
2295                 return 0;
2296
2297         if (igt_spinner_init(&spin_hi, gt))
2298                 return -ENOMEM;
2299
2300         if (igt_spinner_init(&spin_lo, gt))
2301                 goto err_spin_hi;
2302
2303         ctx_hi = kernel_context(gt->i915);
2304         if (!ctx_hi)
2305                 goto err_spin_lo;
2306         ctx_hi->sched.priority =
2307                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2308
2309         ctx_lo = kernel_context(gt->i915);
2310         if (!ctx_lo)
2311                 goto err_ctx_hi;
2312         ctx_lo->sched.priority =
2313                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2314
2315         for_each_engine(engine, gt, id) {
2316                 struct i915_request *rq;
2317
2318                 if (!intel_engine_has_preemption(engine))
2319                         continue;
2320
2321                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2322                                             MI_ARB_CHECK);
2323                 if (IS_ERR(rq)) {
2324                         err = PTR_ERR(rq);
2325                         goto err_ctx_lo;
2326                 }
2327
2328                 i915_request_add(rq);
2329                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2330                         GEM_TRACE("lo spinner failed to start\n");
2331                         GEM_TRACE_DUMP();
2332                         intel_gt_set_wedged(gt);
2333                         err = -EIO;
2334                         goto err_ctx_lo;
2335                 }
2336
2337                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2338                                             MI_ARB_CHECK);
2339                 if (IS_ERR(rq)) {
2340                         igt_spinner_end(&spin_lo);
2341                         err = PTR_ERR(rq);
2342                         goto err_ctx_lo;
2343                 }
2344
2345                 init_completion(&engine->execlists.preempt_hang.completion);
2346                 engine->execlists.preempt_hang.inject_hang = true;
2347
2348                 i915_request_add(rq);
2349
2350                 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2351                                                  HZ / 10)) {
2352                         pr_err("Preemption did not occur within timeout!");
2353                         GEM_TRACE_DUMP();
2354                         intel_gt_set_wedged(gt);
2355                         err = -EIO;
2356                         goto err_ctx_lo;
2357                 }
2358
2359                 set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2360                 intel_engine_reset(engine, NULL);
2361                 clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2362
2363                 engine->execlists.preempt_hang.inject_hang = false;
2364
2365                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
2366                         GEM_TRACE("hi spinner failed to start\n");
2367                         GEM_TRACE_DUMP();
2368                         intel_gt_set_wedged(gt);
2369                         err = -EIO;
2370                         goto err_ctx_lo;
2371                 }
2372
2373                 igt_spinner_end(&spin_hi);
2374                 igt_spinner_end(&spin_lo);
2375                 if (igt_flush_test(gt->i915)) {
2376                         err = -EIO;
2377                         goto err_ctx_lo;
2378                 }
2379         }
2380
2381         err = 0;
2382 err_ctx_lo:
2383         kernel_context_close(ctx_lo);
2384 err_ctx_hi:
2385         kernel_context_close(ctx_hi);
2386 err_spin_lo:
2387         igt_spinner_fini(&spin_lo);
2388 err_spin_hi:
2389         igt_spinner_fini(&spin_hi);
2390         return err;
2391 }
2392
2393 static int live_preempt_timeout(void *arg)
2394 {
2395         struct intel_gt *gt = arg;
2396         struct i915_gem_context *ctx_hi, *ctx_lo;
2397         struct igt_spinner spin_lo;
2398         struct intel_engine_cs *engine;
2399         enum intel_engine_id id;
2400         int err = -ENOMEM;
2401
2402         /*
2403          * Check that we force preemption to occur by cancelling the previous
2404          * context if it refuses to yield the GPU.
2405          */
2406         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2407                 return 0;
2408
2409         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2410                 return 0;
2411
2412         if (!intel_has_reset_engine(gt))
2413                 return 0;
2414
2415         if (igt_spinner_init(&spin_lo, gt))
2416                 return -ENOMEM;
2417
2418         ctx_hi = kernel_context(gt->i915);
2419         if (!ctx_hi)
2420                 goto err_spin_lo;
2421         ctx_hi->sched.priority =
2422                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2423
2424         ctx_lo = kernel_context(gt->i915);
2425         if (!ctx_lo)
2426                 goto err_ctx_hi;
2427         ctx_lo->sched.priority =
2428                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2429
2430         for_each_engine(engine, gt, id) {
2431                 unsigned long saved_timeout;
2432                 struct i915_request *rq;
2433
2434                 if (!intel_engine_has_preemption(engine))
2435                         continue;
2436
2437                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2438                                             MI_NOOP); /* preemption disabled */
2439                 if (IS_ERR(rq)) {
2440                         err = PTR_ERR(rq);
2441                         goto err_ctx_lo;
2442                 }
2443
2444                 i915_request_add(rq);
2445                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2446                         intel_gt_set_wedged(gt);
2447                         err = -EIO;
2448                         goto err_ctx_lo;
2449                 }
2450
2451                 rq = igt_request_alloc(ctx_hi, engine);
2452                 if (IS_ERR(rq)) {
2453                         igt_spinner_end(&spin_lo);
2454                         err = PTR_ERR(rq);
2455                         goto err_ctx_lo;
2456                 }
2457
2458                 /* Flush the previous CS ack before changing timeouts */
2459                 while (READ_ONCE(engine->execlists.pending[0]))
2460                         cpu_relax();
2461
2462                 saved_timeout = engine->props.preempt_timeout_ms;
2463                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2464
2465                 i915_request_get(rq);
2466                 i915_request_add(rq);
2467
2468                 intel_engine_flush_submission(engine);
2469                 engine->props.preempt_timeout_ms = saved_timeout;
2470
2471                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2472                         intel_gt_set_wedged(gt);
2473                         i915_request_put(rq);
2474                         err = -ETIME;
2475                         goto err_ctx_lo;
2476                 }
2477
2478                 igt_spinner_end(&spin_lo);
2479                 i915_request_put(rq);
2480         }
2481
2482         err = 0;
2483 err_ctx_lo:
2484         kernel_context_close(ctx_lo);
2485 err_ctx_hi:
2486         kernel_context_close(ctx_hi);
2487 err_spin_lo:
2488         igt_spinner_fini(&spin_lo);
2489         return err;
2490 }
2491
2492 static int random_range(struct rnd_state *rnd, int min, int max)
2493 {
2494         return i915_prandom_u32_max_state(max - min, rnd) + min;
2495 }
2496
2497 static int random_priority(struct rnd_state *rnd)
2498 {
2499         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2500 }
2501
2502 struct preempt_smoke {
2503         struct intel_gt *gt;
2504         struct i915_gem_context **contexts;
2505         struct intel_engine_cs *engine;
2506         struct drm_i915_gem_object *batch;
2507         unsigned int ncontext;
2508         struct rnd_state prng;
2509         unsigned long count;
2510 };
2511
2512 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2513 {
2514         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2515                                                           &smoke->prng)];
2516 }
2517
2518 static int smoke_submit(struct preempt_smoke *smoke,
2519                         struct i915_gem_context *ctx, int prio,
2520                         struct drm_i915_gem_object *batch)
2521 {
2522         struct i915_request *rq;
2523         struct i915_vma *vma = NULL;
2524         int err = 0;
2525
2526         if (batch) {
2527                 struct i915_address_space *vm;
2528
2529                 vm = i915_gem_context_get_vm_rcu(ctx);
2530                 vma = i915_vma_instance(batch, vm, NULL);
2531                 i915_vm_put(vm);
2532                 if (IS_ERR(vma))
2533                         return PTR_ERR(vma);
2534
2535                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2536                 if (err)
2537                         return err;
2538         }
2539
2540         ctx->sched.priority = prio;
2541
2542         rq = igt_request_alloc(ctx, smoke->engine);
2543         if (IS_ERR(rq)) {
2544                 err = PTR_ERR(rq);
2545                 goto unpin;
2546         }
2547
2548         if (vma) {
2549                 i915_vma_lock(vma);
2550                 err = i915_request_await_object(rq, vma->obj, false);
2551                 if (!err)
2552                         err = i915_vma_move_to_active(vma, rq, 0);
2553                 if (!err)
2554                         err = rq->engine->emit_bb_start(rq,
2555                                                         vma->node.start,
2556                                                         PAGE_SIZE, 0);
2557                 i915_vma_unlock(vma);
2558         }
2559
2560         i915_request_add(rq);
2561
2562 unpin:
2563         if (vma)
2564                 i915_vma_unpin(vma);
2565
2566         return err;
2567 }
2568
2569 static int smoke_crescendo_thread(void *arg)
2570 {
2571         struct preempt_smoke *smoke = arg;
2572         IGT_TIMEOUT(end_time);
2573         unsigned long count;
2574
2575         count = 0;
2576         do {
2577                 struct i915_gem_context *ctx = smoke_context(smoke);
2578                 int err;
2579
2580                 err = smoke_submit(smoke,
2581                                    ctx, count % I915_PRIORITY_MAX,
2582                                    smoke->batch);
2583                 if (err)
2584                         return err;
2585
2586                 count++;
2587         } while (!__igt_timeout(end_time, NULL));
2588
2589         smoke->count = count;
2590         return 0;
2591 }
2592
2593 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2594 #define BATCH BIT(0)
2595 {
2596         struct task_struct *tsk[I915_NUM_ENGINES] = {};
2597         struct preempt_smoke arg[I915_NUM_ENGINES];
2598         struct intel_engine_cs *engine;
2599         enum intel_engine_id id;
2600         unsigned long count;
2601         int err = 0;
2602
2603         for_each_engine(engine, smoke->gt, id) {
2604                 arg[id] = *smoke;
2605                 arg[id].engine = engine;
2606                 if (!(flags & BATCH))
2607                         arg[id].batch = NULL;
2608                 arg[id].count = 0;
2609
2610                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2611                                       "igt/smoke:%d", id);
2612                 if (IS_ERR(tsk[id])) {
2613                         err = PTR_ERR(tsk[id]);
2614                         break;
2615                 }
2616                 get_task_struct(tsk[id]);
2617         }
2618
2619         yield(); /* start all threads before we kthread_stop() */
2620
2621         count = 0;
2622         for_each_engine(engine, smoke->gt, id) {
2623                 int status;
2624
2625                 if (IS_ERR_OR_NULL(tsk[id]))
2626                         continue;
2627
2628                 status = kthread_stop(tsk[id]);
2629                 if (status && !err)
2630                         err = status;
2631
2632                 count += arg[id].count;
2633
2634                 put_task_struct(tsk[id]);
2635         }
2636
2637         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2638                 count, flags,
2639                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2640         return 0;
2641 }
2642
2643 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2644 {
2645         enum intel_engine_id id;
2646         IGT_TIMEOUT(end_time);
2647         unsigned long count;
2648
2649         count = 0;
2650         do {
2651                 for_each_engine(smoke->engine, smoke->gt, id) {
2652                         struct i915_gem_context *ctx = smoke_context(smoke);
2653                         int err;
2654
2655                         err = smoke_submit(smoke,
2656                                            ctx, random_priority(&smoke->prng),
2657                                            flags & BATCH ? smoke->batch : NULL);
2658                         if (err)
2659                                 return err;
2660
2661                         count++;
2662                 }
2663         } while (!__igt_timeout(end_time, NULL));
2664
2665         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2666                 count, flags,
2667                 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2668         return 0;
2669 }
2670
2671 static int live_preempt_smoke(void *arg)
2672 {
2673         struct preempt_smoke smoke = {
2674                 .gt = arg,
2675                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2676                 .ncontext = 1024,
2677         };
2678         const unsigned int phase[] = { 0, BATCH };
2679         struct igt_live_test t;
2680         int err = -ENOMEM;
2681         u32 *cs;
2682         int n;
2683
2684         if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2685                 return 0;
2686
2687         smoke.contexts = kmalloc_array(smoke.ncontext,
2688                                        sizeof(*smoke.contexts),
2689                                        GFP_KERNEL);
2690         if (!smoke.contexts)
2691                 return -ENOMEM;
2692
2693         smoke.batch =
2694                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2695         if (IS_ERR(smoke.batch)) {
2696                 err = PTR_ERR(smoke.batch);
2697                 goto err_free;
2698         }
2699
2700         cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2701         if (IS_ERR(cs)) {
2702                 err = PTR_ERR(cs);
2703                 goto err_batch;
2704         }
2705         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2706                 cs[n] = MI_ARB_CHECK;
2707         cs[n] = MI_BATCH_BUFFER_END;
2708         i915_gem_object_flush_map(smoke.batch);
2709         i915_gem_object_unpin_map(smoke.batch);
2710
2711         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2712                 err = -EIO;
2713                 goto err_batch;
2714         }
2715
2716         for (n = 0; n < smoke.ncontext; n++) {
2717                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
2718                 if (!smoke.contexts[n])
2719                         goto err_ctx;
2720         }
2721
2722         for (n = 0; n < ARRAY_SIZE(phase); n++) {
2723                 err = smoke_crescendo(&smoke, phase[n]);
2724                 if (err)
2725                         goto err_ctx;
2726
2727                 err = smoke_random(&smoke, phase[n]);
2728                 if (err)
2729                         goto err_ctx;
2730         }
2731
2732 err_ctx:
2733         if (igt_live_test_end(&t))
2734                 err = -EIO;
2735
2736         for (n = 0; n < smoke.ncontext; n++) {
2737                 if (!smoke.contexts[n])
2738                         break;
2739                 kernel_context_close(smoke.contexts[n]);
2740         }
2741
2742 err_batch:
2743         i915_gem_object_put(smoke.batch);
2744 err_free:
2745         kfree(smoke.contexts);
2746
2747         return err;
2748 }
2749
2750 static int nop_virtual_engine(struct intel_gt *gt,
2751                               struct intel_engine_cs **siblings,
2752                               unsigned int nsibling,
2753                               unsigned int nctx,
2754                               unsigned int flags)
2755 #define CHAIN BIT(0)
2756 {
2757         IGT_TIMEOUT(end_time);
2758         struct i915_request *request[16] = {};
2759         struct intel_context *ve[16];
2760         unsigned long n, prime, nc;
2761         struct igt_live_test t;
2762         ktime_t times[2] = {};
2763         int err;
2764
2765         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2766
2767         for (n = 0; n < nctx; n++) {
2768                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2769                 if (IS_ERR(ve[n])) {
2770                         err = PTR_ERR(ve[n]);
2771                         nctx = n;
2772                         goto out;
2773                 }
2774
2775                 err = intel_context_pin(ve[n]);
2776                 if (err) {
2777                         intel_context_put(ve[n]);
2778                         nctx = n;
2779                         goto out;
2780                 }
2781         }
2782
2783         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2784         if (err)
2785                 goto out;
2786
2787         for_each_prime_number_from(prime, 1, 8192) {
2788                 times[1] = ktime_get_raw();
2789
2790                 if (flags & CHAIN) {
2791                         for (nc = 0; nc < nctx; nc++) {
2792                                 for (n = 0; n < prime; n++) {
2793                                         struct i915_request *rq;
2794
2795                                         rq = i915_request_create(ve[nc]);
2796                                         if (IS_ERR(rq)) {
2797                                                 err = PTR_ERR(rq);
2798                                                 goto out;
2799                                         }
2800
2801                                         if (request[nc])
2802                                                 i915_request_put(request[nc]);
2803                                         request[nc] = i915_request_get(rq);
2804                                         i915_request_add(rq);
2805                                 }
2806                         }
2807                 } else {
2808                         for (n = 0; n < prime; n++) {
2809                                 for (nc = 0; nc < nctx; nc++) {
2810                                         struct i915_request *rq;
2811
2812                                         rq = i915_request_create(ve[nc]);
2813                                         if (IS_ERR(rq)) {
2814                                                 err = PTR_ERR(rq);
2815                                                 goto out;
2816                                         }
2817
2818                                         if (request[nc])
2819                                                 i915_request_put(request[nc]);
2820                                         request[nc] = i915_request_get(rq);
2821                                         i915_request_add(rq);
2822                                 }
2823                         }
2824                 }
2825
2826                 for (nc = 0; nc < nctx; nc++) {
2827                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2828                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2829                                        __func__, ve[0]->engine->name,
2830                                        request[nc]->fence.context,
2831                                        request[nc]->fence.seqno);
2832
2833                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2834                                           __func__, ve[0]->engine->name,
2835                                           request[nc]->fence.context,
2836                                           request[nc]->fence.seqno);
2837                                 GEM_TRACE_DUMP();
2838                                 intel_gt_set_wedged(gt);
2839                                 break;
2840                         }
2841                 }
2842
2843                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
2844                 if (prime == 1)
2845                         times[0] = times[1];
2846
2847                 for (nc = 0; nc < nctx; nc++) {
2848                         i915_request_put(request[nc]);
2849                         request[nc] = NULL;
2850                 }
2851
2852                 if (__igt_timeout(end_time, NULL))
2853                         break;
2854         }
2855
2856         err = igt_live_test_end(&t);
2857         if (err)
2858                 goto out;
2859
2860         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2861                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2862                 prime, div64_u64(ktime_to_ns(times[1]), prime));
2863
2864 out:
2865         if (igt_flush_test(gt->i915))
2866                 err = -EIO;
2867
2868         for (nc = 0; nc < nctx; nc++) {
2869                 i915_request_put(request[nc]);
2870                 intel_context_unpin(ve[nc]);
2871                 intel_context_put(ve[nc]);
2872         }
2873         return err;
2874 }
2875
2876 static int live_virtual_engine(void *arg)
2877 {
2878         struct intel_gt *gt = arg;
2879         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2880         struct intel_engine_cs *engine;
2881         enum intel_engine_id id;
2882         unsigned int class, inst;
2883         int err;
2884
2885         if (USES_GUC_SUBMISSION(gt->i915))
2886                 return 0;
2887
2888         for_each_engine(engine, gt, id) {
2889                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2890                 if (err) {
2891                         pr_err("Failed to wrap engine %s: err=%d\n",
2892                                engine->name, err);
2893                         return err;
2894                 }
2895         }
2896
2897         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2898                 int nsibling, n;
2899
2900                 nsibling = 0;
2901                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2902                         if (!gt->engine_class[class][inst])
2903                                 continue;
2904
2905                         siblings[nsibling++] = gt->engine_class[class][inst];
2906                 }
2907                 if (nsibling < 2)
2908                         continue;
2909
2910                 for (n = 1; n <= nsibling + 1; n++) {
2911                         err = nop_virtual_engine(gt, siblings, nsibling,
2912                                                  n, 0);
2913                         if (err)
2914                                 return err;
2915                 }
2916
2917                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2918                 if (err)
2919                         return err;
2920         }
2921
2922         return 0;
2923 }
2924
2925 static int mask_virtual_engine(struct intel_gt *gt,
2926                                struct intel_engine_cs **siblings,
2927                                unsigned int nsibling)
2928 {
2929         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2930         struct intel_context *ve;
2931         struct igt_live_test t;
2932         unsigned int n;
2933         int err;
2934
2935         /*
2936          * Check that by setting the execution mask on a request, we can
2937          * restrict it to our desired engine within the virtual engine.
2938          */
2939
2940         ve = intel_execlists_create_virtual(siblings, nsibling);
2941         if (IS_ERR(ve)) {
2942                 err = PTR_ERR(ve);
2943                 goto out_close;
2944         }
2945
2946         err = intel_context_pin(ve);
2947         if (err)
2948                 goto out_put;
2949
2950         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2951         if (err)
2952                 goto out_unpin;
2953
2954         for (n = 0; n < nsibling; n++) {
2955                 request[n] = i915_request_create(ve);
2956                 if (IS_ERR(request[n])) {
2957                         err = PTR_ERR(request[n]);
2958                         nsibling = n;
2959                         goto out;
2960                 }
2961
2962                 /* Reverse order as it's more likely to be unnatural */
2963                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2964
2965                 i915_request_get(request[n]);
2966                 i915_request_add(request[n]);
2967         }
2968
2969         for (n = 0; n < nsibling; n++) {
2970                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2971                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
2972                                __func__, ve->engine->name,
2973                                request[n]->fence.context,
2974                                request[n]->fence.seqno);
2975
2976                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2977                                   __func__, ve->engine->name,
2978                                   request[n]->fence.context,
2979                                   request[n]->fence.seqno);
2980                         GEM_TRACE_DUMP();
2981                         intel_gt_set_wedged(gt);
2982                         err = -EIO;
2983                         goto out;
2984                 }
2985
2986                 if (request[n]->engine != siblings[nsibling - n - 1]) {
2987                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2988                                request[n]->engine->name,
2989                                siblings[nsibling - n - 1]->name);
2990                         err = -EINVAL;
2991                         goto out;
2992                 }
2993         }
2994
2995         err = igt_live_test_end(&t);
2996 out:
2997         if (igt_flush_test(gt->i915))
2998                 err = -EIO;
2999
3000         for (n = 0; n < nsibling; n++)
3001                 i915_request_put(request[n]);
3002
3003 out_unpin:
3004         intel_context_unpin(ve);
3005 out_put:
3006         intel_context_put(ve);
3007 out_close:
3008         return err;
3009 }
3010
3011 static int live_virtual_mask(void *arg)
3012 {
3013         struct intel_gt *gt = arg;
3014         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3015         unsigned int class, inst;
3016         int err;
3017
3018         if (USES_GUC_SUBMISSION(gt->i915))
3019                 return 0;
3020
3021         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3022                 unsigned int nsibling;
3023
3024                 nsibling = 0;
3025                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3026                         if (!gt->engine_class[class][inst])
3027                                 break;
3028
3029                         siblings[nsibling++] = gt->engine_class[class][inst];
3030                 }
3031                 if (nsibling < 2)
3032                         continue;
3033
3034                 err = mask_virtual_engine(gt, siblings, nsibling);
3035                 if (err)
3036                         return err;
3037         }
3038
3039         return 0;
3040 }
3041
3042 static int preserved_virtual_engine(struct intel_gt *gt,
3043                                     struct intel_engine_cs **siblings,
3044                                     unsigned int nsibling)
3045 {
3046         struct i915_request *last = NULL;
3047         struct intel_context *ve;
3048         struct i915_vma *scratch;
3049         struct igt_live_test t;
3050         unsigned int n;
3051         int err = 0;
3052         u32 *cs;
3053
3054         scratch = create_scratch(siblings[0]->gt);
3055         if (IS_ERR(scratch))
3056                 return PTR_ERR(scratch);
3057
3058         ve = intel_execlists_create_virtual(siblings, nsibling);
3059         if (IS_ERR(ve)) {
3060                 err = PTR_ERR(ve);
3061                 goto out_scratch;
3062         }
3063
3064         err = intel_context_pin(ve);
3065         if (err)
3066                 goto out_put;
3067
3068         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3069         if (err)
3070                 goto out_unpin;
3071
3072         for (n = 0; n < NUM_GPR_DW; n++) {
3073                 struct intel_engine_cs *engine = siblings[n % nsibling];
3074                 struct i915_request *rq;
3075
3076                 rq = i915_request_create(ve);
3077                 if (IS_ERR(rq)) {
3078                         err = PTR_ERR(rq);
3079                         goto out_end;
3080                 }
3081
3082                 i915_request_put(last);
3083                 last = i915_request_get(rq);
3084
3085                 cs = intel_ring_begin(rq, 8);
3086                 if (IS_ERR(cs)) {
3087                         i915_request_add(rq);
3088                         err = PTR_ERR(cs);
3089                         goto out_end;
3090                 }
3091
3092                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3093                 *cs++ = CS_GPR(engine, n);
3094                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3095                 *cs++ = 0;
3096
3097                 *cs++ = MI_LOAD_REGISTER_IMM(1);
3098                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3099                 *cs++ = n + 1;
3100
3101                 *cs++ = MI_NOOP;
3102                 intel_ring_advance(rq, cs);
3103
3104                 /* Restrict this request to run on a particular engine */
3105                 rq->execution_mask = engine->mask;
3106                 i915_request_add(rq);
3107         }
3108
3109         if (i915_request_wait(last, 0, HZ / 5) < 0) {
3110                 err = -ETIME;
3111                 goto out_end;
3112         }
3113
3114         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3115         if (IS_ERR(cs)) {
3116                 err = PTR_ERR(cs);
3117                 goto out_end;
3118         }
3119
3120         for (n = 0; n < NUM_GPR_DW; n++) {
3121                 if (cs[n] != n) {
3122                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
3123                                cs[n], n);
3124                         err = -EINVAL;
3125                         break;
3126                 }
3127         }
3128
3129         i915_gem_object_unpin_map(scratch->obj);
3130
3131 out_end:
3132         if (igt_live_test_end(&t))
3133                 err = -EIO;
3134         i915_request_put(last);
3135 out_unpin:
3136         intel_context_unpin(ve);
3137 out_put:
3138         intel_context_put(ve);
3139 out_scratch:
3140         i915_vma_unpin_and_release(&scratch, 0);
3141         return err;
3142 }
3143
3144 static int live_virtual_preserved(void *arg)
3145 {
3146         struct intel_gt *gt = arg;
3147         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3148         unsigned int class, inst;
3149
3150         /*
3151          * Check that the context image retains non-privileged (user) registers
3152          * from one engine to the next. For this we check that the CS_GPR
3153          * are preserved.
3154          */
3155
3156         if (USES_GUC_SUBMISSION(gt->i915))
3157                 return 0;
3158
3159         /* As we use CS_GPR we cannot run before they existed on all engines. */
3160         if (INTEL_GEN(gt->i915) < 9)
3161                 return 0;
3162
3163         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3164                 int nsibling, err;
3165
3166                 nsibling = 0;
3167                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3168                         if (!gt->engine_class[class][inst])
3169                                 continue;
3170
3171                         siblings[nsibling++] = gt->engine_class[class][inst];
3172                 }
3173                 if (nsibling < 2)
3174                         continue;
3175
3176                 err = preserved_virtual_engine(gt, siblings, nsibling);
3177                 if (err)
3178                         return err;
3179         }
3180
3181         return 0;
3182 }
3183
3184 static int bond_virtual_engine(struct intel_gt *gt,
3185                                unsigned int class,
3186                                struct intel_engine_cs **siblings,
3187                                unsigned int nsibling,
3188                                unsigned int flags)
3189 #define BOND_SCHEDULE BIT(0)
3190 {
3191         struct intel_engine_cs *master;
3192         struct i915_request *rq[16];
3193         enum intel_engine_id id;
3194         struct igt_spinner spin;
3195         unsigned long n;
3196         int err;
3197
3198         /*
3199          * A set of bonded requests is intended to be run concurrently
3200          * across a number of engines. We use one request per-engine
3201          * and a magic fence to schedule each of the bonded requests
3202          * at the same time. A consequence of our current scheduler is that
3203          * we only move requests to the HW ready queue when the request
3204          * becomes ready, that is when all of its prerequisite fences have
3205          * been signaled. As one of those fences is the master submit fence,
3206          * there is a delay on all secondary fences as the HW may be
3207          * currently busy. Equally, as all the requests are independent,
3208          * they may have other fences that delay individual request
3209          * submission to HW. Ergo, we do not guarantee that all requests are
3210          * immediately submitted to HW at the same time, just that if the
3211          * rules are abided by, they are ready at the same time as the
3212          * first is submitted. Userspace can embed semaphores in its batch
3213          * to ensure parallel execution of its phases as it requires.
3214          * Though naturally it gets requested that perhaps the scheduler should
3215          * take care of parallel execution, even across preemption events on
3216          * different HW. (The proper answer is of course "lalalala".)
3217          *
3218          * With the submit-fence, we have identified three possible phases
3219          * of synchronisation depending on the master fence: queued (not
3220          * ready), executing, and signaled. The first two are quite simple
3221          * and checked below. However, the signaled master fence handling is
3222          * contentious. Currently we do not distinguish between a signaled
3223          * fence and an expired fence, as once signaled it does not convey
3224          * any information about the previous execution. It may even be freed
3225          * and hence checking later it may not exist at all. Ergo we currently
3226          * do not apply the bonding constraint for an already signaled fence,
3227          * as our expectation is that it should not constrain the secondaries
3228          * and is outside of the scope of the bonded request API (i.e. all
3229          * userspace requests are meant to be running in parallel). As
3230          * it imposes no constraint, and is effectively a no-op, we do not
3231          * check below as normal execution flows are checked extensively above.
3232          *
3233          * XXX Is the degenerate handling of signaled submit fences the
3234          * expected behaviour for userpace?
3235          */
3236
3237         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3238
3239         if (igt_spinner_init(&spin, gt))
3240                 return -ENOMEM;
3241
3242         err = 0;
3243         rq[0] = ERR_PTR(-ENOMEM);
3244         for_each_engine(master, gt, id) {
3245                 struct i915_sw_fence fence = {};
3246
3247                 if (master->class == class)
3248                         continue;
3249
3250                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3251
3252                 rq[0] = igt_spinner_create_request(&spin,
3253                                                    master->kernel_context,
3254                                                    MI_NOOP);
3255                 if (IS_ERR(rq[0])) {
3256                         err = PTR_ERR(rq[0]);
3257                         goto out;
3258                 }
3259                 i915_request_get(rq[0]);
3260
3261                 if (flags & BOND_SCHEDULE) {
3262                         onstack_fence_init(&fence);
3263                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3264                                                                &fence,
3265                                                                GFP_KERNEL);
3266                 }
3267
3268                 i915_request_add(rq[0]);
3269                 if (err < 0)
3270                         goto out;
3271
3272                 if (!(flags & BOND_SCHEDULE) &&
3273                     !igt_wait_for_spinner(&spin, rq[0])) {
3274                         err = -EIO;
3275                         goto out;
3276                 }
3277
3278                 for (n = 0; n < nsibling; n++) {
3279                         struct intel_context *ve;
3280
3281                         ve = intel_execlists_create_virtual(siblings, nsibling);
3282                         if (IS_ERR(ve)) {
3283                                 err = PTR_ERR(ve);
3284                                 onstack_fence_fini(&fence);
3285                                 goto out;
3286                         }
3287
3288                         err = intel_virtual_engine_attach_bond(ve->engine,
3289                                                                master,
3290                                                                siblings[n]);
3291                         if (err) {
3292                                 intel_context_put(ve);
3293                                 onstack_fence_fini(&fence);
3294                                 goto out;
3295                         }
3296
3297                         err = intel_context_pin(ve);
3298                         intel_context_put(ve);
3299                         if (err) {
3300                                 onstack_fence_fini(&fence);
3301                                 goto out;
3302                         }
3303
3304                         rq[n + 1] = i915_request_create(ve);
3305                         intel_context_unpin(ve);
3306                         if (IS_ERR(rq[n + 1])) {
3307                                 err = PTR_ERR(rq[n + 1]);
3308                                 onstack_fence_fini(&fence);
3309                                 goto out;
3310                         }
3311                         i915_request_get(rq[n + 1]);
3312
3313                         err = i915_request_await_execution(rq[n + 1],
3314                                                            &rq[0]->fence,
3315                                                            ve->engine->bond_execute);
3316                         i915_request_add(rq[n + 1]);
3317                         if (err < 0) {
3318                                 onstack_fence_fini(&fence);
3319                                 goto out;
3320                         }
3321                 }
3322                 onstack_fence_fini(&fence);
3323                 intel_engine_flush_submission(master);
3324                 igt_spinner_end(&spin);
3325
3326                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3327                         pr_err("Master request did not execute (on %s)!\n",
3328                                rq[0]->engine->name);
3329                         err = -EIO;
3330                         goto out;
3331                 }
3332
3333                 for (n = 0; n < nsibling; n++) {
3334                         if (i915_request_wait(rq[n + 1], 0,
3335                                               MAX_SCHEDULE_TIMEOUT) < 0) {
3336                                 err = -EIO;
3337                                 goto out;
3338                         }
3339
3340                         if (rq[n + 1]->engine != siblings[n]) {
3341                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3342                                        siblings[n]->name,
3343                                        rq[n + 1]->engine->name,
3344                                        rq[0]->engine->name);
3345                                 err = -EINVAL;
3346                                 goto out;
3347                         }
3348                 }
3349
3350                 for (n = 0; !IS_ERR(rq[n]); n++)
3351                         i915_request_put(rq[n]);
3352                 rq[0] = ERR_PTR(-ENOMEM);
3353         }
3354
3355 out:
3356         for (n = 0; !IS_ERR(rq[n]); n++)
3357                 i915_request_put(rq[n]);
3358         if (igt_flush_test(gt->i915))
3359                 err = -EIO;
3360
3361         igt_spinner_fini(&spin);
3362         return err;
3363 }
3364
3365 static int live_virtual_bond(void *arg)
3366 {
3367         static const struct phase {
3368                 const char *name;
3369                 unsigned int flags;
3370         } phases[] = {
3371                 { "", 0 },
3372                 { "schedule", BOND_SCHEDULE },
3373                 { },
3374         };
3375         struct intel_gt *gt = arg;
3376         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3377         unsigned int class, inst;
3378         int err;
3379
3380         if (USES_GUC_SUBMISSION(gt->i915))
3381                 return 0;
3382
3383         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3384                 const struct phase *p;
3385                 int nsibling;
3386
3387                 nsibling = 0;
3388                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3389                         if (!gt->engine_class[class][inst])
3390                                 break;
3391
3392                         GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3393                         siblings[nsibling++] = gt->engine_class[class][inst];
3394                 }
3395                 if (nsibling < 2)
3396                         continue;
3397
3398                 for (p = phases; p->name; p++) {
3399                         err = bond_virtual_engine(gt,
3400                                                   class, siblings, nsibling,
3401                                                   p->flags);
3402                         if (err) {
3403                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3404                                        __func__, p->name, class, nsibling, err);
3405                                 return err;
3406                         }
3407                 }
3408         }
3409
3410         return 0;
3411 }
3412
3413 static int reset_virtual_engine(struct intel_gt *gt,
3414                                 struct intel_engine_cs **siblings,
3415                                 unsigned int nsibling)
3416 {
3417         struct intel_engine_cs *engine;
3418         struct intel_context *ve;
3419         unsigned long *heartbeat;
3420         struct igt_spinner spin;
3421         struct i915_request *rq;
3422         unsigned int n;
3423         int err = 0;
3424
3425         /*
3426          * In order to support offline error capture for fast preempt reset,
3427          * we need to decouple the guilty request and ensure that it and its
3428          * descendents are not executed while the capture is in progress.
3429          */
3430
3431         heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3432         if (!heartbeat)
3433                 return -ENOMEM;
3434
3435         if (igt_spinner_init(&spin, gt)) {
3436                 err = -ENOMEM;
3437                 goto out_free;
3438         }
3439
3440         ve = intel_execlists_create_virtual(siblings, nsibling);
3441         if (IS_ERR(ve)) {
3442                 err = PTR_ERR(ve);
3443                 goto out_spin;
3444         }
3445
3446         for (n = 0; n < nsibling; n++)
3447                 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3448
3449         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3450         if (IS_ERR(rq)) {
3451                 err = PTR_ERR(rq);
3452                 goto out_heartbeat;
3453         }
3454         i915_request_add(rq);
3455
3456         if (!igt_wait_for_spinner(&spin, rq)) {
3457                 intel_gt_set_wedged(gt);
3458                 err = -ETIME;
3459                 goto out_heartbeat;
3460         }
3461
3462         engine = rq->engine;
3463         GEM_BUG_ON(engine == ve->engine);
3464
3465         /* Take ownership of the reset and tasklet */
3466         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3467                              &gt->reset.flags)) {
3468                 intel_gt_set_wedged(gt);
3469                 err = -EBUSY;
3470                 goto out_heartbeat;
3471         }
3472         tasklet_disable(&engine->execlists.tasklet);
3473
3474         engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3475         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3476
3477         /* Fake a preemption event; failed of course */
3478         spin_lock_irq(&engine->active.lock);
3479         __unwind_incomplete_requests(engine);
3480         spin_unlock_irq(&engine->active.lock);
3481         GEM_BUG_ON(rq->engine != ve->engine);
3482
3483         /* Reset the engine while keeping our active request on hold */
3484         execlists_hold(engine, rq);
3485         GEM_BUG_ON(!i915_request_on_hold(rq));
3486
3487         intel_engine_reset(engine, NULL);
3488         GEM_BUG_ON(rq->fence.error != -EIO);
3489
3490         /* Release our grasp on the engine, letting CS flow again */
3491         tasklet_enable(&engine->execlists.tasklet);
3492         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3493
3494         /* Check that we do not resubmit the held request */
3495         i915_request_get(rq);
3496         if (!i915_request_wait(rq, 0, HZ / 5)) {
3497                 pr_err("%s: on hold request completed!\n",
3498                        engine->name);
3499                 intel_gt_set_wedged(gt);
3500                 err = -EIO;
3501                 goto out_rq;
3502         }
3503         GEM_BUG_ON(!i915_request_on_hold(rq));
3504
3505         /* But is resubmitted on release */
3506         execlists_unhold(engine, rq);
3507         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3508                 pr_err("%s: held request did not complete!\n",
3509                        engine->name);
3510                 intel_gt_set_wedged(gt);
3511                 err = -ETIME;
3512         }
3513
3514 out_rq:
3515         i915_request_put(rq);
3516 out_heartbeat:
3517         for (n = 0; n < nsibling; n++)
3518                 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3519
3520         intel_context_put(ve);
3521 out_spin:
3522         igt_spinner_fini(&spin);
3523 out_free:
3524         kfree(heartbeat);
3525         return err;
3526 }
3527
3528 static int live_virtual_reset(void *arg)
3529 {
3530         struct intel_gt *gt = arg;
3531         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3532         unsigned int class, inst;
3533
3534         /*
3535          * Check that we handle a reset event within a virtual engine.
3536          * Only the physical engine is reset, but we have to check the flow
3537          * of the virtual requests around the reset, and make sure it is not
3538          * forgotten.
3539          */
3540
3541         if (USES_GUC_SUBMISSION(gt->i915))
3542                 return 0;
3543
3544         if (!intel_has_reset_engine(gt))
3545                 return 0;
3546
3547         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3548                 int nsibling, err;
3549
3550                 nsibling = 0;
3551                 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3552                         if (!gt->engine_class[class][inst])
3553                                 continue;
3554
3555                         siblings[nsibling++] = gt->engine_class[class][inst];
3556                 }
3557                 if (nsibling < 2)
3558                         continue;
3559
3560                 err = reset_virtual_engine(gt, siblings, nsibling);
3561                 if (err)
3562                         return err;
3563         }
3564
3565         return 0;
3566 }
3567
3568 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3569 {
3570         static const struct i915_subtest tests[] = {
3571                 SUBTEST(live_sanitycheck),
3572                 SUBTEST(live_unlite_switch),
3573                 SUBTEST(live_unlite_preempt),
3574                 SUBTEST(live_hold_reset),
3575                 SUBTEST(live_timeslice_preempt),
3576                 SUBTEST(live_timeslice_queue),
3577                 SUBTEST(live_busywait_preempt),
3578                 SUBTEST(live_preempt),
3579                 SUBTEST(live_late_preempt),
3580                 SUBTEST(live_nopreempt),
3581                 SUBTEST(live_preempt_cancel),
3582                 SUBTEST(live_suppress_self_preempt),
3583                 SUBTEST(live_suppress_wait_preempt),
3584                 SUBTEST(live_chain_preempt),
3585                 SUBTEST(live_preempt_gang),
3586                 SUBTEST(live_preempt_hang),
3587                 SUBTEST(live_preempt_timeout),
3588                 SUBTEST(live_preempt_smoke),
3589                 SUBTEST(live_virtual_engine),
3590                 SUBTEST(live_virtual_mask),
3591                 SUBTEST(live_virtual_preserved),
3592                 SUBTEST(live_virtual_bond),
3593                 SUBTEST(live_virtual_reset),
3594         };
3595
3596         if (!HAS_EXECLISTS(i915))
3597                 return 0;
3598
3599         if (intel_gt_is_wedged(&i915->gt))
3600                 return 0;
3601
3602         return intel_gt_live_subtests(tests, &i915->gt);
3603 }
3604
3605 static void hexdump(const void *buf, size_t len)
3606 {
3607         const size_t rowsize = 8 * sizeof(u32);
3608         const void *prev = NULL;
3609         bool skip = false;
3610         size_t pos;
3611
3612         for (pos = 0; pos < len; pos += rowsize) {
3613                 char line[128];
3614
3615                 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3616                         if (!skip) {
3617                                 pr_info("*\n");
3618                                 skip = true;
3619                         }
3620                         continue;
3621                 }
3622
3623                 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3624                                                 rowsize, sizeof(u32),
3625                                                 line, sizeof(line),
3626                                                 false) >= sizeof(line));
3627                 pr_info("[%04zx] %s\n", pos, line);
3628
3629                 prev = buf + pos;
3630                 skip = false;
3631         }
3632 }
3633
3634 static int live_lrc_layout(void *arg)
3635 {
3636         struct intel_gt *gt = arg;
3637         struct intel_engine_cs *engine;
3638         enum intel_engine_id id;
3639         u32 *lrc;
3640         int err;
3641
3642         /*
3643          * Check the registers offsets we use to create the initial reg state
3644          * match the layout saved by HW.
3645          */
3646
3647         lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3648         if (!lrc)
3649                 return -ENOMEM;
3650
3651         err = 0;
3652         for_each_engine(engine, gt, id) {
3653                 u32 *hw;
3654                 int dw;
3655
3656                 if (!engine->default_state)
3657                         continue;
3658
3659                 hw = i915_gem_object_pin_map(engine->default_state,
3660                                              I915_MAP_WB);
3661                 if (IS_ERR(hw)) {
3662                         err = PTR_ERR(hw);
3663                         break;
3664                 }
3665                 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3666
3667                 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3668                                          engine->kernel_context,
3669                                          engine,
3670                                          engine->kernel_context->ring,
3671                                          true);
3672
3673                 dw = 0;
3674                 do {
3675                         u32 lri = hw[dw];
3676
3677                         if (lri == 0) {
3678                                 dw++;
3679                                 continue;
3680                         }
3681
3682                         if (lrc[dw] == 0) {
3683                                 pr_debug("%s: skipped instruction %x at dword %d\n",
3684                                          engine->name, lri, dw);
3685                                 dw++;
3686                                 continue;
3687                         }
3688
3689                         if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3690                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3691                                        engine->name, dw, lri);
3692                                 err = -EINVAL;
3693                                 break;
3694                         }
3695
3696                         if (lrc[dw] != lri) {
3697                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3698                                        engine->name, dw, lri, lrc[dw]);
3699                                 err = -EINVAL;
3700                                 break;
3701                         }
3702
3703                         lri &= 0x7f;
3704                         lri++;
3705                         dw++;
3706
3707                         while (lri) {
3708                                 if (hw[dw] != lrc[dw]) {
3709                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3710                                                engine->name, dw, hw[dw], lrc[dw]);
3711                                         err = -EINVAL;
3712                                         break;
3713                                 }
3714
3715                                 /*
3716                                  * Skip over the actual register value as we
3717                                  * expect that to differ.
3718                                  */
3719                                 dw += 2;
3720                                 lri -= 2;
3721                         }
3722                 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3723
3724                 if (err) {
3725                         pr_info("%s: HW register image:\n", engine->name);
3726                         hexdump(hw, PAGE_SIZE);
3727
3728                         pr_info("%s: SW register image:\n", engine->name);
3729                         hexdump(lrc, PAGE_SIZE);
3730                 }
3731
3732                 i915_gem_object_unpin_map(engine->default_state);
3733                 if (err)
3734                         break;
3735         }
3736
3737         kfree(lrc);
3738         return err;
3739 }
3740
3741 static int find_offset(const u32 *lri, u32 offset)
3742 {
3743         int i;
3744
3745         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3746                 if (lri[i] == offset)
3747                         return i;
3748
3749         return -1;
3750 }
3751
3752 static int live_lrc_fixed(void *arg)
3753 {
3754         struct intel_gt *gt = arg;
3755         struct intel_engine_cs *engine;
3756         enum intel_engine_id id;
3757         int err = 0;
3758
3759         /*
3760          * Check the assumed register offsets match the actual locations in
3761          * the context image.
3762          */
3763
3764         for_each_engine(engine, gt, id) {
3765                 const struct {
3766                         u32 reg;
3767                         u32 offset;
3768                         const char *name;
3769                 } tbl[] = {
3770                         {
3771                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3772                                 CTX_RING_START - 1,
3773                                 "RING_START"
3774                         },
3775                         {
3776                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3777                                 CTX_RING_CTL - 1,
3778                                 "RING_CTL"
3779                         },
3780                         {
3781                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3782                                 CTX_RING_HEAD - 1,
3783                                 "RING_HEAD"
3784                         },
3785                         {
3786                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3787                                 CTX_RING_TAIL - 1,
3788                                 "RING_TAIL"
3789                         },
3790                         {
3791                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3792                                 lrc_ring_mi_mode(engine),
3793                                 "RING_MI_MODE"
3794                         },
3795                         {
3796                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3797                                 CTX_BB_STATE - 1,
3798                                 "BB_STATE"
3799                         },
3800                         { },
3801                 }, *t;
3802                 u32 *hw;
3803
3804                 if (!engine->default_state)
3805                         continue;
3806
3807                 hw = i915_gem_object_pin_map(engine->default_state,
3808                                              I915_MAP_WB);
3809                 if (IS_ERR(hw)) {
3810                         err = PTR_ERR(hw);
3811                         break;
3812                 }
3813                 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3814
3815                 for (t = tbl; t->name; t++) {
3816                         int dw = find_offset(hw, t->reg);
3817
3818                         if (dw != t->offset) {
3819                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3820                                        engine->name,
3821                                        t->name,
3822                                        t->reg,
3823                                        dw,
3824                                        t->offset);
3825                                 err = -EINVAL;
3826                         }
3827                 }
3828
3829                 i915_gem_object_unpin_map(engine->default_state);
3830         }
3831
3832         return err;
3833 }
3834
3835 static int __live_lrc_state(struct intel_engine_cs *engine,
3836                             struct i915_vma *scratch)
3837 {
3838         struct intel_context *ce;
3839         struct i915_request *rq;
3840         enum {
3841                 RING_START_IDX = 0,
3842                 RING_TAIL_IDX,
3843                 MAX_IDX
3844         };
3845         u32 expected[MAX_IDX];
3846         u32 *cs;
3847         int err;
3848         int n;
3849
3850         ce = intel_context_create(engine);
3851         if (IS_ERR(ce))
3852                 return PTR_ERR(ce);
3853
3854         err = intel_context_pin(ce);
3855         if (err)
3856                 goto err_put;
3857
3858         rq = i915_request_create(ce);
3859         if (IS_ERR(rq)) {
3860                 err = PTR_ERR(rq);
3861                 goto err_unpin;
3862         }
3863
3864         cs = intel_ring_begin(rq, 4 * MAX_IDX);
3865         if (IS_ERR(cs)) {
3866                 err = PTR_ERR(cs);
3867                 i915_request_add(rq);
3868                 goto err_unpin;
3869         }
3870
3871         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3872         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3873         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3874         *cs++ = 0;
3875
3876         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3877
3878         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3879         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3880         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3881         *cs++ = 0;
3882
3883         i915_request_get(rq);
3884         i915_request_add(rq);
3885
3886         intel_engine_flush_submission(engine);
3887         expected[RING_TAIL_IDX] = ce->ring->tail;
3888
3889         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3890                 err = -ETIME;
3891                 goto err_rq;
3892         }
3893
3894         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3895         if (IS_ERR(cs)) {
3896                 err = PTR_ERR(cs);
3897                 goto err_rq;
3898         }
3899
3900         for (n = 0; n < MAX_IDX; n++) {
3901                 if (cs[n] != expected[n]) {
3902                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3903                                engine->name, n, cs[n], expected[n]);
3904                         err = -EINVAL;
3905                         break;
3906                 }
3907         }
3908
3909         i915_gem_object_unpin_map(scratch->obj);
3910
3911 err_rq:
3912         i915_request_put(rq);
3913 err_unpin:
3914         intel_context_unpin(ce);
3915 err_put:
3916         intel_context_put(ce);
3917         return err;
3918 }
3919
3920 static int live_lrc_state(void *arg)
3921 {
3922         struct intel_gt *gt = arg;
3923         struct intel_engine_cs *engine;
3924         struct i915_vma *scratch;
3925         enum intel_engine_id id;
3926         int err = 0;
3927
3928         /*
3929          * Check the live register state matches what we expect for this
3930          * intel_context.
3931          */
3932
3933         scratch = create_scratch(gt);
3934         if (IS_ERR(scratch))
3935                 return PTR_ERR(scratch);
3936
3937         for_each_engine(engine, gt, id) {
3938                 err = __live_lrc_state(engine, scratch);
3939                 if (err)
3940                         break;
3941         }
3942
3943         if (igt_flush_test(gt->i915))
3944                 err = -EIO;
3945
3946         i915_vma_unpin_and_release(&scratch, 0);
3947         return err;
3948 }
3949
3950 static int gpr_make_dirty(struct intel_engine_cs *engine)
3951 {
3952         struct i915_request *rq;
3953         u32 *cs;
3954         int n;
3955
3956         rq = intel_engine_create_kernel_request(engine);
3957         if (IS_ERR(rq))
3958                 return PTR_ERR(rq);
3959
3960         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3961         if (IS_ERR(cs)) {
3962                 i915_request_add(rq);
3963                 return PTR_ERR(cs);
3964         }
3965
3966         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3967         for (n = 0; n < NUM_GPR_DW; n++) {
3968                 *cs++ = CS_GPR(engine, n);
3969                 *cs++ = STACK_MAGIC;
3970         }
3971         *cs++ = MI_NOOP;
3972
3973         intel_ring_advance(rq, cs);
3974         i915_request_add(rq);
3975
3976         return 0;
3977 }
3978
3979 static int __live_gpr_clear(struct intel_engine_cs *engine,
3980                             struct i915_vma *scratch)
3981 {
3982         struct intel_context *ce;
3983         struct i915_request *rq;
3984         u32 *cs;
3985         int err;
3986         int n;
3987
3988         if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3989                 return 0; /* GPR only on rcs0 for gen8 */
3990
3991         err = gpr_make_dirty(engine);
3992         if (err)
3993                 return err;
3994
3995         ce = intel_context_create(engine);
3996         if (IS_ERR(ce))
3997                 return PTR_ERR(ce);
3998
3999         rq = intel_context_create_request(ce);
4000         if (IS_ERR(rq)) {
4001                 err = PTR_ERR(rq);
4002                 goto err_put;
4003         }
4004
4005         cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
4006         if (IS_ERR(cs)) {
4007                 err = PTR_ERR(cs);
4008                 i915_request_add(rq);
4009                 goto err_put;
4010         }
4011
4012         for (n = 0; n < NUM_GPR_DW; n++) {
4013                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4014                 *cs++ = CS_GPR(engine, n);
4015                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4016                 *cs++ = 0;
4017         }
4018
4019         i915_request_get(rq);
4020         i915_request_add(rq);
4021
4022         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4023                 err = -ETIME;
4024                 goto err_rq;
4025         }
4026
4027         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4028         if (IS_ERR(cs)) {
4029                 err = PTR_ERR(cs);
4030                 goto err_rq;
4031         }
4032
4033         for (n = 0; n < NUM_GPR_DW; n++) {
4034                 if (cs[n]) {
4035                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4036                                engine->name,
4037                                n / 2, n & 1 ? "udw" : "ldw",
4038                                cs[n]);
4039                         err = -EINVAL;
4040                         break;
4041                 }
4042         }
4043
4044         i915_gem_object_unpin_map(scratch->obj);
4045
4046 err_rq:
4047         i915_request_put(rq);
4048 err_put:
4049         intel_context_put(ce);
4050         return err;
4051 }
4052
4053 static int live_gpr_clear(void *arg)
4054 {
4055         struct intel_gt *gt = arg;
4056         struct intel_engine_cs *engine;
4057         struct i915_vma *scratch;
4058         enum intel_engine_id id;
4059         int err = 0;
4060
4061         /*
4062          * Check that GPR registers are cleared in new contexts as we need
4063          * to avoid leaking any information from previous contexts.
4064          */
4065
4066         scratch = create_scratch(gt);
4067         if (IS_ERR(scratch))
4068                 return PTR_ERR(scratch);
4069
4070         for_each_engine(engine, gt, id) {
4071                 err = __live_gpr_clear(engine, scratch);
4072                 if (err)
4073                         break;
4074         }
4075
4076         if (igt_flush_test(gt->i915))
4077                 err = -EIO;
4078
4079         i915_vma_unpin_and_release(&scratch, 0);
4080         return err;
4081 }
4082
4083 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4084 {
4085         static const struct i915_subtest tests[] = {
4086                 SUBTEST(live_lrc_layout),
4087                 SUBTEST(live_lrc_fixed),
4088                 SUBTEST(live_lrc_state),
4089                 SUBTEST(live_gpr_clear),
4090         };
4091
4092         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4093                 return 0;
4094
4095         return intel_gt_live_subtests(tests, &i915->gt);
4096 }