2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 struct drm_i915_gem_object *obj;
32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
40 i915_gem_object_put(obj);
44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46 i915_gem_object_put(obj);
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
56 *saved = engine->props.heartbeat_interval_ms;
57 engine->props.heartbeat_interval_ms = 0;
59 intel_engine_pm_get(engine);
60 intel_engine_park_heartbeat(engine);
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
66 intel_engine_pm_put(engine);
68 engine->props.heartbeat_interval_ms = saved;
71 static int live_sanitycheck(void *arg)
73 struct intel_gt *gt = arg;
74 struct intel_engine_cs *engine;
75 enum intel_engine_id id;
76 struct igt_spinner spin;
79 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
82 if (igt_spinner_init(&spin, gt))
85 for_each_engine(engine, gt, id) {
86 struct intel_context *ce;
87 struct i915_request *rq;
89 ce = intel_context_create(engine);
95 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
101 i915_request_add(rq);
102 if (!igt_wait_for_spinner(&spin, rq)) {
103 GEM_TRACE("spinner failed to start\n");
105 intel_gt_set_wedged(gt);
110 igt_spinner_end(&spin);
111 if (igt_flush_test(gt->i915)) {
117 intel_context_put(ce);
122 igt_spinner_fini(&spin);
126 static int live_unlite_restore(struct intel_gt *gt, int prio)
128 struct intel_engine_cs *engine;
129 enum intel_engine_id id;
130 struct igt_spinner spin;
134 * Check that we can correctly context switch between 2 instances
135 * on the same engine from the same parent context.
138 if (igt_spinner_init(&spin, gt))
142 for_each_engine(engine, gt, id) {
143 struct intel_context *ce[2] = {};
144 struct i915_request *rq[2];
145 struct igt_live_test t;
149 if (prio && !intel_engine_has_preemption(engine))
152 if (!intel_engine_can_store_dword(engine))
155 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
159 engine_heartbeat_disable(engine, &saved);
161 for (n = 0; n < ARRAY_SIZE(ce); n++) {
162 struct intel_context *tmp;
164 tmp = intel_context_create(engine);
170 err = intel_context_pin(tmp);
172 intel_context_put(tmp);
177 * Setup the pair of contexts such that if we
178 * lite-restore using the RING_TAIL from ce[1] it
179 * will execute garbage from ce[0]->ring.
181 memset(tmp->ring->vaddr,
182 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
183 tmp->ring->vma->size);
187 GEM_BUG_ON(!ce[1]->ring->size);
188 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
189 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
191 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
193 err = PTR_ERR(rq[0]);
197 i915_request_get(rq[0]);
198 i915_request_add(rq[0]);
199 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
201 if (!igt_wait_for_spinner(&spin, rq[0])) {
202 i915_request_put(rq[0]);
206 rq[1] = i915_request_create(ce[1]);
208 err = PTR_ERR(rq[1]);
209 i915_request_put(rq[0]);
215 * Ensure we do the switch to ce[1] on completion.
217 * rq[0] is already submitted, so this should reduce
218 * to a no-op (a wait on a request on the same engine
219 * uses the submit fence, not the completion fence),
220 * but it will install a dependency on rq[1] for rq[0]
221 * that will prevent the pair being reordered by
224 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
227 i915_request_get(rq[1]);
228 i915_request_add(rq[1]);
229 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
230 i915_request_put(rq[0]);
233 struct i915_sched_attr attr = {
237 /* Alternatively preempt the spinner with ce[1] */
238 engine->schedule(rq[1], &attr);
241 /* And switch back to ce[0] for good measure */
242 rq[0] = i915_request_create(ce[0]);
244 err = PTR_ERR(rq[0]);
245 i915_request_put(rq[1]);
249 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
250 i915_request_get(rq[0]);
251 i915_request_add(rq[0]);
252 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
253 i915_request_put(rq[1]);
254 i915_request_put(rq[0]);
257 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
258 igt_spinner_end(&spin);
259 for (n = 0; n < ARRAY_SIZE(ce); n++) {
260 if (IS_ERR_OR_NULL(ce[n]))
263 intel_context_unpin(ce[n]);
264 intel_context_put(ce[n]);
267 engine_heartbeat_enable(engine, saved);
268 if (igt_live_test_end(&t))
274 igt_spinner_fini(&spin);
278 static int live_unlite_switch(void *arg)
280 return live_unlite_restore(arg, 0);
283 static int live_unlite_preempt(void *arg)
285 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
288 static int live_hold_reset(void *arg)
290 struct intel_gt *gt = arg;
291 struct intel_engine_cs *engine;
292 enum intel_engine_id id;
293 struct igt_spinner spin;
297 * In order to support offline error capture for fast preempt reset,
298 * we need to decouple the guilty request and ensure that it and its
299 * descendents are not executed while the capture is in progress.
302 if (!intel_has_reset_engine(gt))
305 if (igt_spinner_init(&spin, gt))
308 for_each_engine(engine, gt, id) {
309 struct intel_context *ce;
310 unsigned long heartbeat;
311 struct i915_request *rq;
313 ce = intel_context_create(engine);
319 engine_heartbeat_disable(engine, &heartbeat);
321 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
326 i915_request_add(rq);
328 if (!igt_wait_for_spinner(&spin, rq)) {
329 intel_gt_set_wedged(gt);
334 /* We have our request executing, now remove it and reset */
336 if (test_and_set_bit(I915_RESET_ENGINE + id,
338 intel_gt_set_wedged(gt);
342 tasklet_disable(&engine->execlists.tasklet);
344 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
345 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
347 i915_request_get(rq);
348 execlists_hold(engine, rq);
349 GEM_BUG_ON(!i915_request_on_hold(rq));
351 intel_engine_reset(engine, NULL);
352 GEM_BUG_ON(rq->fence.error != -EIO);
354 tasklet_enable(&engine->execlists.tasklet);
355 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
358 /* Check that we do not resubmit the held request */
359 if (!i915_request_wait(rq, 0, HZ / 5)) {
360 pr_err("%s: on hold request completed!\n",
362 i915_request_put(rq);
366 GEM_BUG_ON(!i915_request_on_hold(rq));
368 /* But is resubmitted on release */
369 execlists_unhold(engine, rq);
370 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
371 pr_err("%s: held request did not complete!\n",
373 intel_gt_set_wedged(gt);
376 i915_request_put(rq);
379 engine_heartbeat_enable(engine, heartbeat);
380 intel_context_put(ce);
385 igt_spinner_fini(&spin);
390 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
394 cs = intel_ring_begin(rq, 10);
398 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
400 *cs++ = MI_SEMAPHORE_WAIT |
401 MI_SEMAPHORE_GLOBAL_GTT |
403 MI_SEMAPHORE_SAD_NEQ_SDD;
405 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
409 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
410 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
420 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
422 intel_ring_advance(rq, cs);
426 static struct i915_request *
427 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
429 struct intel_context *ce;
430 struct i915_request *rq;
433 ce = intel_context_create(engine);
437 rq = intel_context_create_request(ce);
442 if (rq->engine->emit_init_breadcrumb)
443 err = rq->engine->emit_init_breadcrumb(rq);
445 err = emit_semaphore_chain(rq, vma, idx);
447 i915_request_get(rq);
448 i915_request_add(rq);
453 intel_context_put(ce);
458 release_queue(struct intel_engine_cs *engine,
459 struct i915_vma *vma,
462 struct i915_sched_attr attr = {
465 struct i915_request *rq;
468 rq = intel_engine_create_kernel_request(engine);
472 cs = intel_ring_begin(rq, 4);
474 i915_request_add(rq);
478 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
479 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
483 intel_ring_advance(rq, cs);
485 i915_request_get(rq);
486 i915_request_add(rq);
489 engine->schedule(rq, &attr);
490 local_bh_enable(); /* kick tasklet */
492 i915_request_put(rq);
498 slice_semaphore_queue(struct intel_engine_cs *outer,
499 struct i915_vma *vma,
502 struct intel_engine_cs *engine;
503 struct i915_request *head;
504 enum intel_engine_id id;
507 head = semaphore_queue(outer, vma, n++);
509 return PTR_ERR(head);
511 for_each_engine(engine, outer->gt, id) {
512 for (i = 0; i < count; i++) {
513 struct i915_request *rq;
515 rq = semaphore_queue(engine, vma, n++);
521 i915_request_put(rq);
525 err = release_queue(outer, vma, n, INT_MAX);
529 if (i915_request_wait(head, 0,
530 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
531 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
534 intel_gt_set_wedged(outer->gt);
539 i915_request_put(head);
543 static int live_timeslice_preempt(void *arg)
545 struct intel_gt *gt = arg;
546 struct drm_i915_gem_object *obj;
547 struct i915_vma *vma;
553 * If a request takes too long, we would like to give other users
554 * a fair go on the GPU. In particular, users may create batches
555 * that wait upon external input, where that input may even be
556 * supplied by another GPU job. To avoid blocking forever, we
557 * need to preempt the current task and replace it with another
560 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
563 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
567 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
573 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
575 err = PTR_ERR(vaddr);
579 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
583 for_each_prime_number_from(count, 1, 16) {
584 struct intel_engine_cs *engine;
585 enum intel_engine_id id;
587 for_each_engine(engine, gt, id) {
590 if (!intel_engine_has_preemption(engine))
593 memset(vaddr, 0, PAGE_SIZE);
595 engine_heartbeat_disable(engine, &saved);
596 err = slice_semaphore_queue(engine, vma, count);
597 engine_heartbeat_enable(engine, saved);
601 if (igt_flush_test(gt->i915)) {
611 i915_gem_object_unpin_map(obj);
613 i915_gem_object_put(obj);
617 static struct i915_request *nop_request(struct intel_engine_cs *engine)
619 struct i915_request *rq;
621 rq = intel_engine_create_kernel_request(engine);
625 i915_request_get(rq);
626 i915_request_add(rq);
631 static int wait_for_submit(struct intel_engine_cs *engine,
632 struct i915_request *rq,
633 unsigned long timeout)
638 intel_engine_flush_submission(engine);
639 if (i915_request_is_active(rq))
641 } while (time_before(jiffies, timeout));
646 static long timeslice_threshold(const struct intel_engine_cs *engine)
648 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
651 static int live_timeslice_queue(void *arg)
653 struct intel_gt *gt = arg;
654 struct drm_i915_gem_object *obj;
655 struct intel_engine_cs *engine;
656 enum intel_engine_id id;
657 struct i915_vma *vma;
662 * Make sure that even if ELSP[0] and ELSP[1] are filled with
663 * timeslicing between them disabled, we *do* enable timeslicing
664 * if the queue demands it. (Normally, we do not submit if
665 * ELSP[1] is already occupied, so must rely on timeslicing to
666 * eject ELSP[0] in favour of the queue.)
668 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
671 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
675 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
681 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
683 err = PTR_ERR(vaddr);
687 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
691 for_each_engine(engine, gt, id) {
692 struct i915_sched_attr attr = {
693 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
695 struct i915_request *rq, *nop;
698 if (!intel_engine_has_preemption(engine))
701 engine_heartbeat_disable(engine, &saved);
702 memset(vaddr, 0, PAGE_SIZE);
704 /* ELSP[0]: semaphore wait */
705 rq = semaphore_queue(engine, vma, 0);
710 engine->schedule(rq, &attr);
711 err = wait_for_submit(engine, rq, HZ / 2);
713 pr_err("%s: Timed out trying to submit semaphores\n",
718 /* ELSP[1]: nop request */
719 nop = nop_request(engine);
724 err = wait_for_submit(engine, nop, HZ / 2);
725 i915_request_put(nop);
727 pr_err("%s: Timed out trying to submit nop\n",
732 GEM_BUG_ON(i915_request_completed(rq));
733 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
735 /* Queue: semaphore signal, matching priority as semaphore */
736 err = release_queue(engine, vma, 1, effective_prio(rq));
740 intel_engine_flush_submission(engine);
741 if (!READ_ONCE(engine->execlists.timer.expires) &&
742 !i915_request_completed(rq)) {
743 struct drm_printer p =
744 drm_info_printer(gt->i915->drm.dev);
746 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
748 intel_engine_dump(engine, &p,
749 "%s\n", engine->name);
752 memset(vaddr, 0xff, PAGE_SIZE);
756 /* Timeslice every jiffy, so within 2 we should signal */
757 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
758 struct drm_printer p =
759 drm_info_printer(gt->i915->drm.dev);
761 pr_err("%s: Failed to timeslice into queue\n",
763 intel_engine_dump(engine, &p,
764 "%s\n", engine->name);
766 memset(vaddr, 0xff, PAGE_SIZE);
770 i915_request_put(rq);
772 engine_heartbeat_enable(engine, saved);
779 i915_gem_object_unpin_map(obj);
781 i915_gem_object_put(obj);
785 static int live_busywait_preempt(void *arg)
787 struct intel_gt *gt = arg;
788 struct i915_gem_context *ctx_hi, *ctx_lo;
789 struct intel_engine_cs *engine;
790 struct drm_i915_gem_object *obj;
791 struct i915_vma *vma;
792 enum intel_engine_id id;
797 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
798 * preempt the busywaits used to synchronise between rings.
801 ctx_hi = kernel_context(gt->i915);
804 ctx_hi->sched.priority =
805 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
807 ctx_lo = kernel_context(gt->i915);
810 ctx_lo->sched.priority =
811 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
813 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
819 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
825 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
831 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
835 for_each_engine(engine, gt, id) {
836 struct i915_request *lo, *hi;
837 struct igt_live_test t;
840 if (!intel_engine_has_preemption(engine))
843 if (!intel_engine_can_store_dword(engine))
846 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
852 * We create two requests. The low priority request
853 * busywaits on a semaphore (inside the ringbuffer where
854 * is should be preemptible) and the high priority requests
855 * uses a MI_STORE_DWORD_IMM to update the semaphore value
856 * allowing the first request to complete. If preemption
857 * fails, we hang instead.
860 lo = igt_request_alloc(ctx_lo, engine);
866 cs = intel_ring_begin(lo, 8);
869 i915_request_add(lo);
873 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
874 *cs++ = i915_ggtt_offset(vma);
878 /* XXX Do we need a flush + invalidate here? */
880 *cs++ = MI_SEMAPHORE_WAIT |
881 MI_SEMAPHORE_GLOBAL_GTT |
883 MI_SEMAPHORE_SAD_EQ_SDD;
885 *cs++ = i915_ggtt_offset(vma);
888 intel_ring_advance(lo, cs);
890 i915_request_get(lo);
891 i915_request_add(lo);
893 if (wait_for(READ_ONCE(*map), 10)) {
894 i915_request_put(lo);
899 /* Low priority request should be busywaiting now */
900 if (i915_request_wait(lo, 0, 1) != -ETIME) {
901 i915_request_put(lo);
902 pr_err("%s: Busywaiting request did not!\n",
908 hi = igt_request_alloc(ctx_hi, engine);
911 i915_request_put(lo);
915 cs = intel_ring_begin(hi, 4);
918 i915_request_add(hi);
919 i915_request_put(lo);
923 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
924 *cs++ = i915_ggtt_offset(vma);
928 intel_ring_advance(hi, cs);
929 i915_request_add(hi);
931 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
932 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
934 pr_err("%s: Failed to preempt semaphore busywait!\n",
937 intel_engine_dump(engine, &p, "%s\n", engine->name);
940 i915_request_put(lo);
941 intel_gt_set_wedged(gt);
945 GEM_BUG_ON(READ_ONCE(*map));
946 i915_request_put(lo);
948 if (igt_live_test_end(&t)) {
958 i915_gem_object_unpin_map(obj);
960 i915_gem_object_put(obj);
962 kernel_context_close(ctx_lo);
964 kernel_context_close(ctx_hi);
968 static struct i915_request *
969 spinner_create_request(struct igt_spinner *spin,
970 struct i915_gem_context *ctx,
971 struct intel_engine_cs *engine,
974 struct intel_context *ce;
975 struct i915_request *rq;
977 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
981 rq = igt_spinner_create_request(spin, ce, arb);
982 intel_context_put(ce);
986 static int live_preempt(void *arg)
988 struct intel_gt *gt = arg;
989 struct i915_gem_context *ctx_hi, *ctx_lo;
990 struct igt_spinner spin_hi, spin_lo;
991 struct intel_engine_cs *engine;
992 enum intel_engine_id id;
995 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
998 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
999 pr_err("Logical preemption supported, but not exposed\n");
1001 if (igt_spinner_init(&spin_hi, gt))
1004 if (igt_spinner_init(&spin_lo, gt))
1007 ctx_hi = kernel_context(gt->i915);
1010 ctx_hi->sched.priority =
1011 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1013 ctx_lo = kernel_context(gt->i915);
1016 ctx_lo->sched.priority =
1017 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1019 for_each_engine(engine, gt, id) {
1020 struct igt_live_test t;
1021 struct i915_request *rq;
1023 if (!intel_engine_has_preemption(engine))
1026 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1031 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1038 i915_request_add(rq);
1039 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1040 GEM_TRACE("lo spinner failed to start\n");
1042 intel_gt_set_wedged(gt);
1047 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1050 igt_spinner_end(&spin_lo);
1055 i915_request_add(rq);
1056 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057 GEM_TRACE("hi spinner failed to start\n");
1059 intel_gt_set_wedged(gt);
1064 igt_spinner_end(&spin_hi);
1065 igt_spinner_end(&spin_lo);
1067 if (igt_live_test_end(&t)) {
1075 kernel_context_close(ctx_lo);
1077 kernel_context_close(ctx_hi);
1079 igt_spinner_fini(&spin_lo);
1081 igt_spinner_fini(&spin_hi);
1085 static int live_late_preempt(void *arg)
1087 struct intel_gt *gt = arg;
1088 struct i915_gem_context *ctx_hi, *ctx_lo;
1089 struct igt_spinner spin_hi, spin_lo;
1090 struct intel_engine_cs *engine;
1091 struct i915_sched_attr attr = {};
1092 enum intel_engine_id id;
1095 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1098 if (igt_spinner_init(&spin_hi, gt))
1101 if (igt_spinner_init(&spin_lo, gt))
1104 ctx_hi = kernel_context(gt->i915);
1108 ctx_lo = kernel_context(gt->i915);
1112 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1113 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1115 for_each_engine(engine, gt, id) {
1116 struct igt_live_test t;
1117 struct i915_request *rq;
1119 if (!intel_engine_has_preemption(engine))
1122 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1127 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1134 i915_request_add(rq);
1135 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1136 pr_err("First context failed to start\n");
1140 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1143 igt_spinner_end(&spin_lo);
1148 i915_request_add(rq);
1149 if (igt_wait_for_spinner(&spin_hi, rq)) {
1150 pr_err("Second context overtook first?\n");
1154 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1155 engine->schedule(rq, &attr);
1157 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1158 pr_err("High priority context failed to preempt the low priority context\n");
1163 igt_spinner_end(&spin_hi);
1164 igt_spinner_end(&spin_lo);
1166 if (igt_live_test_end(&t)) {
1174 kernel_context_close(ctx_lo);
1176 kernel_context_close(ctx_hi);
1178 igt_spinner_fini(&spin_lo);
1180 igt_spinner_fini(&spin_hi);
1184 igt_spinner_end(&spin_hi);
1185 igt_spinner_end(&spin_lo);
1186 intel_gt_set_wedged(gt);
1191 struct preempt_client {
1192 struct igt_spinner spin;
1193 struct i915_gem_context *ctx;
1196 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1198 c->ctx = kernel_context(gt->i915);
1202 if (igt_spinner_init(&c->spin, gt))
1208 kernel_context_close(c->ctx);
1212 static void preempt_client_fini(struct preempt_client *c)
1214 igt_spinner_fini(&c->spin);
1215 kernel_context_close(c->ctx);
1218 static int live_nopreempt(void *arg)
1220 struct intel_gt *gt = arg;
1221 struct intel_engine_cs *engine;
1222 struct preempt_client a, b;
1223 enum intel_engine_id id;
1227 * Verify that we can disable preemption for an individual request
1228 * that may be being observed and not want to be interrupted.
1231 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1234 if (preempt_client_init(gt, &a))
1236 if (preempt_client_init(gt, &b))
1238 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1240 for_each_engine(engine, gt, id) {
1241 struct i915_request *rq_a, *rq_b;
1243 if (!intel_engine_has_preemption(engine))
1246 engine->execlists.preempt_hang.count = 0;
1248 rq_a = spinner_create_request(&a.spin,
1252 err = PTR_ERR(rq_a);
1256 /* Low priority client, but unpreemptable! */
1257 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1259 i915_request_add(rq_a);
1260 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1261 pr_err("First client failed to start\n");
1265 rq_b = spinner_create_request(&b.spin,
1269 err = PTR_ERR(rq_b);
1273 i915_request_add(rq_b);
1275 /* B is much more important than A! (But A is unpreemptable.) */
1276 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1278 /* Wait long enough for preemption and timeslicing */
1279 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1280 pr_err("Second client started too early!\n");
1284 igt_spinner_end(&a.spin);
1286 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1287 pr_err("Second client failed to start\n");
1291 igt_spinner_end(&b.spin);
1293 if (engine->execlists.preempt_hang.count) {
1294 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1295 engine->execlists.preempt_hang.count);
1300 if (igt_flush_test(gt->i915))
1306 preempt_client_fini(&b);
1308 preempt_client_fini(&a);
1312 igt_spinner_end(&b.spin);
1313 igt_spinner_end(&a.spin);
1314 intel_gt_set_wedged(gt);
1319 struct live_preempt_cancel {
1320 struct intel_engine_cs *engine;
1321 struct preempt_client a, b;
1324 static int __cancel_active0(struct live_preempt_cancel *arg)
1326 struct i915_request *rq;
1327 struct igt_live_test t;
1330 /* Preempt cancel of ELSP0 */
1331 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1332 if (igt_live_test_begin(&t, arg->engine->i915,
1333 __func__, arg->engine->name))
1336 rq = spinner_create_request(&arg->a.spin,
1337 arg->a.ctx, arg->engine,
1342 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1343 i915_request_get(rq);
1344 i915_request_add(rq);
1345 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1350 intel_context_set_banned(rq->context);
1351 err = intel_engine_pulse(arg->engine);
1355 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1360 if (rq->fence.error != -EIO) {
1361 pr_err("Cancelled inflight0 request did not report -EIO\n");
1367 i915_request_put(rq);
1368 if (igt_live_test_end(&t))
1373 static int __cancel_active1(struct live_preempt_cancel *arg)
1375 struct i915_request *rq[2] = {};
1376 struct igt_live_test t;
1379 /* Preempt cancel of ELSP1 */
1380 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1381 if (igt_live_test_begin(&t, arg->engine->i915,
1382 __func__, arg->engine->name))
1385 rq[0] = spinner_create_request(&arg->a.spin,
1386 arg->a.ctx, arg->engine,
1387 MI_NOOP); /* no preemption */
1389 return PTR_ERR(rq[0]);
1391 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1392 i915_request_get(rq[0]);
1393 i915_request_add(rq[0]);
1394 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1399 rq[1] = spinner_create_request(&arg->b.spin,
1400 arg->b.ctx, arg->engine,
1402 if (IS_ERR(rq[1])) {
1403 err = PTR_ERR(rq[1]);
1407 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1408 i915_request_get(rq[1]);
1409 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1410 i915_request_add(rq[1]);
1414 intel_context_set_banned(rq[1]->context);
1415 err = intel_engine_pulse(arg->engine);
1419 igt_spinner_end(&arg->a.spin);
1420 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1425 if (rq[0]->fence.error != 0) {
1426 pr_err("Normal inflight0 request did not complete\n");
1431 if (rq[1]->fence.error != -EIO) {
1432 pr_err("Cancelled inflight1 request did not report -EIO\n");
1438 i915_request_put(rq[1]);
1439 i915_request_put(rq[0]);
1440 if (igt_live_test_end(&t))
1445 static int __cancel_queued(struct live_preempt_cancel *arg)
1447 struct i915_request *rq[3] = {};
1448 struct igt_live_test t;
1451 /* Full ELSP and one in the wings */
1452 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1453 if (igt_live_test_begin(&t, arg->engine->i915,
1454 __func__, arg->engine->name))
1457 rq[0] = spinner_create_request(&arg->a.spin,
1458 arg->a.ctx, arg->engine,
1461 return PTR_ERR(rq[0]);
1463 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1464 i915_request_get(rq[0]);
1465 i915_request_add(rq[0]);
1466 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1471 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1472 if (IS_ERR(rq[1])) {
1473 err = PTR_ERR(rq[1]);
1477 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1478 i915_request_get(rq[1]);
1479 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1480 i915_request_add(rq[1]);
1484 rq[2] = spinner_create_request(&arg->b.spin,
1485 arg->a.ctx, arg->engine,
1487 if (IS_ERR(rq[2])) {
1488 err = PTR_ERR(rq[2]);
1492 i915_request_get(rq[2]);
1493 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1494 i915_request_add(rq[2]);
1498 intel_context_set_banned(rq[2]->context);
1499 err = intel_engine_pulse(arg->engine);
1503 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1508 if (rq[0]->fence.error != -EIO) {
1509 pr_err("Cancelled inflight0 request did not report -EIO\n");
1514 if (rq[1]->fence.error != 0) {
1515 pr_err("Normal inflight1 request did not complete\n");
1520 if (rq[2]->fence.error != -EIO) {
1521 pr_err("Cancelled queued request did not report -EIO\n");
1527 i915_request_put(rq[2]);
1528 i915_request_put(rq[1]);
1529 i915_request_put(rq[0]);
1530 if (igt_live_test_end(&t))
1535 static int __cancel_hostile(struct live_preempt_cancel *arg)
1537 struct i915_request *rq;
1540 /* Preempt cancel non-preemptible spinner in ELSP0 */
1541 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1544 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1545 rq = spinner_create_request(&arg->a.spin,
1546 arg->a.ctx, arg->engine,
1547 MI_NOOP); /* preemption disabled */
1551 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1552 i915_request_get(rq);
1553 i915_request_add(rq);
1554 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1559 intel_context_set_banned(rq->context);
1560 err = intel_engine_pulse(arg->engine); /* force reset */
1564 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1569 if (rq->fence.error != -EIO) {
1570 pr_err("Cancelled inflight0 request did not report -EIO\n");
1576 i915_request_put(rq);
1577 if (igt_flush_test(arg->engine->i915))
1582 static int live_preempt_cancel(void *arg)
1584 struct intel_gt *gt = arg;
1585 struct live_preempt_cancel data;
1586 enum intel_engine_id id;
1590 * To cancel an inflight context, we need to first remove it from the
1591 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1594 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1597 if (preempt_client_init(gt, &data.a))
1599 if (preempt_client_init(gt, &data.b))
1602 for_each_engine(data.engine, gt, id) {
1603 if (!intel_engine_has_preemption(data.engine))
1606 err = __cancel_active0(&data);
1610 err = __cancel_active1(&data);
1614 err = __cancel_queued(&data);
1618 err = __cancel_hostile(&data);
1625 preempt_client_fini(&data.b);
1627 preempt_client_fini(&data.a);
1632 igt_spinner_end(&data.b.spin);
1633 igt_spinner_end(&data.a.spin);
1634 intel_gt_set_wedged(gt);
1638 static int live_suppress_self_preempt(void *arg)
1640 struct intel_gt *gt = arg;
1641 struct intel_engine_cs *engine;
1642 struct i915_sched_attr attr = {
1643 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1645 struct preempt_client a, b;
1646 enum intel_engine_id id;
1650 * Verify that if a preemption request does not cause a change in
1651 * the current execution order, the preempt-to-idle injection is
1652 * skipped and that we do not accidentally apply it after the CS
1656 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1659 if (USES_GUC_SUBMISSION(gt->i915))
1660 return 0; /* presume black blox */
1662 if (intel_vgpu_active(gt->i915))
1663 return 0; /* GVT forces single port & request submission */
1665 if (preempt_client_init(gt, &a))
1667 if (preempt_client_init(gt, &b))
1670 for_each_engine(engine, gt, id) {
1671 struct i915_request *rq_a, *rq_b;
1674 if (!intel_engine_has_preemption(engine))
1677 if (igt_flush_test(gt->i915))
1680 intel_engine_pm_get(engine);
1681 engine->execlists.preempt_hang.count = 0;
1683 rq_a = spinner_create_request(&a.spin,
1687 err = PTR_ERR(rq_a);
1688 intel_engine_pm_put(engine);
1692 i915_request_add(rq_a);
1693 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1694 pr_err("First client failed to start\n");
1695 intel_engine_pm_put(engine);
1699 /* Keep postponing the timer to avoid premature slicing */
1700 mod_timer(&engine->execlists.timer, jiffies + HZ);
1701 for (depth = 0; depth < 8; depth++) {
1702 rq_b = spinner_create_request(&b.spin,
1706 err = PTR_ERR(rq_b);
1707 intel_engine_pm_put(engine);
1710 i915_request_add(rq_b);
1712 GEM_BUG_ON(i915_request_completed(rq_a));
1713 engine->schedule(rq_a, &attr);
1714 igt_spinner_end(&a.spin);
1716 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1717 pr_err("Second client failed to start\n");
1718 intel_engine_pm_put(engine);
1725 igt_spinner_end(&a.spin);
1727 if (engine->execlists.preempt_hang.count) {
1728 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1730 engine->execlists.preempt_hang.count,
1732 intel_engine_pm_put(engine);
1737 intel_engine_pm_put(engine);
1738 if (igt_flush_test(gt->i915))
1744 preempt_client_fini(&b);
1746 preempt_client_fini(&a);
1750 igt_spinner_end(&b.spin);
1751 igt_spinner_end(&a.spin);
1752 intel_gt_set_wedged(gt);
1757 static int __i915_sw_fence_call
1758 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1763 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1765 struct i915_request *rq;
1767 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1771 rq->engine = engine;
1773 spin_lock_init(&rq->lock);
1774 INIT_LIST_HEAD(&rq->fence.cb_list);
1775 rq->fence.lock = &rq->lock;
1776 rq->fence.ops = &i915_fence_ops;
1778 i915_sched_node_init(&rq->sched);
1780 /* mark this request as permanently incomplete */
1781 rq->fence.seqno = 1;
1782 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1783 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1784 GEM_BUG_ON(i915_request_completed(rq));
1786 i915_sw_fence_init(&rq->submit, dummy_notify);
1787 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1789 spin_lock_init(&rq->lock);
1790 rq->fence.lock = &rq->lock;
1791 INIT_LIST_HEAD(&rq->fence.cb_list);
1796 static void dummy_request_free(struct i915_request *dummy)
1798 /* We have to fake the CS interrupt to kick the next request */
1799 i915_sw_fence_commit(&dummy->submit);
1801 i915_request_mark_complete(dummy);
1802 dma_fence_signal(&dummy->fence);
1804 i915_sched_node_fini(&dummy->sched);
1805 i915_sw_fence_fini(&dummy->submit);
1807 dma_fence_free(&dummy->fence);
1810 static int live_suppress_wait_preempt(void *arg)
1812 struct intel_gt *gt = arg;
1813 struct preempt_client client[4];
1814 struct i915_request *rq[ARRAY_SIZE(client)] = {};
1815 struct intel_engine_cs *engine;
1816 enum intel_engine_id id;
1821 * Waiters are given a little priority nudge, but not enough
1822 * to actually cause any preemption. Double check that we do
1823 * not needlessly generate preempt-to-idle cycles.
1826 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1829 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1831 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1833 if (preempt_client_init(gt, &client[2])) /* head of queue */
1835 if (preempt_client_init(gt, &client[3])) /* bystander */
1838 for_each_engine(engine, gt, id) {
1841 if (!intel_engine_has_preemption(engine))
1844 if (!engine->emit_init_breadcrumb)
1847 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1848 struct i915_request *dummy;
1850 engine->execlists.preempt_hang.count = 0;
1852 dummy = dummy_request(engine);
1856 for (i = 0; i < ARRAY_SIZE(client); i++) {
1857 struct i915_request *this;
1859 this = spinner_create_request(&client[i].spin,
1860 client[i].ctx, engine,
1863 err = PTR_ERR(this);
1867 /* Disable NEWCLIENT promotion */
1868 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
1871 rq[i] = i915_request_get(this);
1872 i915_request_add(this);
1875 dummy_request_free(dummy);
1877 GEM_BUG_ON(i915_request_completed(rq[0]));
1878 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1879 pr_err("%s: First client failed to start\n",
1883 GEM_BUG_ON(!i915_request_started(rq[0]));
1885 if (i915_request_wait(rq[depth],
1888 pr_err("%s: Waiter depth:%d completed!\n",
1889 engine->name, depth);
1893 for (i = 0; i < ARRAY_SIZE(client); i++) {
1894 igt_spinner_end(&client[i].spin);
1895 i915_request_put(rq[i]);
1899 if (igt_flush_test(gt->i915))
1902 if (engine->execlists.preempt_hang.count) {
1903 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1905 engine->execlists.preempt_hang.count,
1915 preempt_client_fini(&client[3]);
1917 preempt_client_fini(&client[2]);
1919 preempt_client_fini(&client[1]);
1921 preempt_client_fini(&client[0]);
1925 for (i = 0; i < ARRAY_SIZE(client); i++) {
1926 igt_spinner_end(&client[i].spin);
1927 i915_request_put(rq[i]);
1929 intel_gt_set_wedged(gt);
1934 static int live_chain_preempt(void *arg)
1936 struct intel_gt *gt = arg;
1937 struct intel_engine_cs *engine;
1938 struct preempt_client hi, lo;
1939 enum intel_engine_id id;
1943 * Build a chain AB...BA between two contexts (A, B) and request
1944 * preemption of the last request. It should then complete before
1945 * the previously submitted spinner in B.
1948 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1951 if (preempt_client_init(gt, &hi))
1954 if (preempt_client_init(gt, &lo))
1957 for_each_engine(engine, gt, id) {
1958 struct i915_sched_attr attr = {
1959 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1961 struct igt_live_test t;
1962 struct i915_request *rq;
1963 int ring_size, count, i;
1965 if (!intel_engine_has_preemption(engine))
1968 rq = spinner_create_request(&lo.spin,
1974 i915_request_get(rq);
1975 i915_request_add(rq);
1977 ring_size = rq->wa_tail - rq->head;
1979 ring_size += rq->ring->size;
1980 ring_size = rq->ring->size / ring_size;
1981 pr_debug("%s(%s): Using maximum of %d requests\n",
1982 __func__, engine->name, ring_size);
1984 igt_spinner_end(&lo.spin);
1985 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1986 pr_err("Timed out waiting to flush %s\n", engine->name);
1987 i915_request_put(rq);
1990 i915_request_put(rq);
1992 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1997 for_each_prime_number_from(count, 1, ring_size) {
1998 rq = spinner_create_request(&hi.spin,
2003 i915_request_add(rq);
2004 if (!igt_wait_for_spinner(&hi.spin, rq))
2007 rq = spinner_create_request(&lo.spin,
2012 i915_request_add(rq);
2014 for (i = 0; i < count; i++) {
2015 rq = igt_request_alloc(lo.ctx, engine);
2018 i915_request_add(rq);
2021 rq = igt_request_alloc(hi.ctx, engine);
2025 i915_request_get(rq);
2026 i915_request_add(rq);
2027 engine->schedule(rq, &attr);
2029 igt_spinner_end(&hi.spin);
2030 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2031 struct drm_printer p =
2032 drm_info_printer(gt->i915->drm.dev);
2034 pr_err("Failed to preempt over chain of %d\n",
2036 intel_engine_dump(engine, &p,
2037 "%s\n", engine->name);
2038 i915_request_put(rq);
2041 igt_spinner_end(&lo.spin);
2042 i915_request_put(rq);
2044 rq = igt_request_alloc(lo.ctx, engine);
2048 i915_request_get(rq);
2049 i915_request_add(rq);
2051 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2052 struct drm_printer p =
2053 drm_info_printer(gt->i915->drm.dev);
2055 pr_err("Failed to flush low priority chain of %d requests\n",
2057 intel_engine_dump(engine, &p,
2058 "%s\n", engine->name);
2060 i915_request_put(rq);
2063 i915_request_put(rq);
2066 if (igt_live_test_end(&t)) {
2074 preempt_client_fini(&lo);
2076 preempt_client_fini(&hi);
2080 igt_spinner_end(&hi.spin);
2081 igt_spinner_end(&lo.spin);
2082 intel_gt_set_wedged(gt);
2087 static int create_gang(struct intel_engine_cs *engine,
2088 struct i915_request **prev)
2090 struct drm_i915_gem_object *obj;
2091 struct intel_context *ce;
2092 struct i915_request *rq;
2093 struct i915_vma *vma;
2097 ce = intel_context_create(engine);
2101 obj = i915_gem_object_create_internal(engine->i915, 4096);
2107 vma = i915_vma_instance(obj, ce->vm, NULL);
2113 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2117 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2121 /* Semaphore target: spin until zero */
2122 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2124 *cs++ = MI_SEMAPHORE_WAIT |
2126 MI_SEMAPHORE_SAD_EQ_SDD;
2128 *cs++ = lower_32_bits(vma->node.start);
2129 *cs++ = upper_32_bits(vma->node.start);
2132 u64 offset = (*prev)->batch->node.start;
2134 /* Terminate the spinner in the next lower priority batch. */
2135 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2136 *cs++ = lower_32_bits(offset);
2137 *cs++ = upper_32_bits(offset);
2141 *cs++ = MI_BATCH_BUFFER_END;
2142 i915_gem_object_flush_map(obj);
2143 i915_gem_object_unpin_map(obj);
2145 rq = intel_context_create_request(ce);
2150 i915_request_get(rq);
2153 err = i915_request_await_object(rq, vma->obj, false);
2155 err = i915_vma_move_to_active(vma, rq, 0);
2157 err = rq->engine->emit_bb_start(rq,
2160 i915_vma_unlock(vma);
2161 i915_request_add(rq);
2165 i915_gem_object_put(obj);
2166 intel_context_put(ce);
2168 rq->client_link.next = &(*prev)->client_link;
2173 i915_request_put(rq);
2175 i915_gem_object_put(obj);
2177 intel_context_put(ce);
2181 static int live_preempt_gang(void *arg)
2183 struct intel_gt *gt = arg;
2184 struct intel_engine_cs *engine;
2185 enum intel_engine_id id;
2187 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2191 * Build as long a chain of preempters as we can, with each
2192 * request higher priority than the last. Once we are ready, we release
2193 * the last batch which then precolates down the chain, each releasing
2194 * the next oldest in turn. The intent is to simply push as hard as we
2195 * can with the number of preemptions, trying to exceed narrow HW
2196 * limits. At a minimum, we insist that we can sort all the user
2197 * high priority levels into execution order.
2200 for_each_engine(engine, gt, id) {
2201 struct i915_request *rq = NULL;
2202 struct igt_live_test t;
2203 IGT_TIMEOUT(end_time);
2208 if (!intel_engine_has_preemption(engine))
2211 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2215 struct i915_sched_attr attr = {
2216 .priority = I915_USER_PRIORITY(prio++),
2219 err = create_gang(engine, &rq);
2223 /* Submit each spinner at increasing priority */
2224 engine->schedule(rq, &attr);
2226 if (prio <= I915_PRIORITY_MAX)
2229 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2232 if (__igt_timeout(end_time, NULL))
2235 pr_debug("%s: Preempt chain of %d requests\n",
2236 engine->name, prio);
2239 * Such that the last spinner is the highest priority and
2240 * should execute first. When that spinner completes,
2241 * it will terminate the next lowest spinner until there
2242 * are no more spinners and the gang is complete.
2244 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2247 i915_gem_object_unpin_map(rq->batch->obj);
2250 intel_gt_set_wedged(gt);
2253 while (rq) { /* wait for each rq from highest to lowest prio */
2254 struct i915_request *n =
2255 list_next_entry(rq, client_link);
2257 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2258 struct drm_printer p =
2259 drm_info_printer(engine->i915->drm.dev);
2261 pr_err("Failed to flush chain of %d requests, at %d\n",
2262 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2263 intel_engine_dump(engine, &p,
2264 "%s\n", engine->name);
2269 i915_request_put(rq);
2273 if (igt_live_test_end(&t))
2282 static int live_preempt_hang(void *arg)
2284 struct intel_gt *gt = arg;
2285 struct i915_gem_context *ctx_hi, *ctx_lo;
2286 struct igt_spinner spin_hi, spin_lo;
2287 struct intel_engine_cs *engine;
2288 enum intel_engine_id id;
2291 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2294 if (!intel_has_reset_engine(gt))
2297 if (igt_spinner_init(&spin_hi, gt))
2300 if (igt_spinner_init(&spin_lo, gt))
2303 ctx_hi = kernel_context(gt->i915);
2306 ctx_hi->sched.priority =
2307 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2309 ctx_lo = kernel_context(gt->i915);
2312 ctx_lo->sched.priority =
2313 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2315 for_each_engine(engine, gt, id) {
2316 struct i915_request *rq;
2318 if (!intel_engine_has_preemption(engine))
2321 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2328 i915_request_add(rq);
2329 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2330 GEM_TRACE("lo spinner failed to start\n");
2332 intel_gt_set_wedged(gt);
2337 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2340 igt_spinner_end(&spin_lo);
2345 init_completion(&engine->execlists.preempt_hang.completion);
2346 engine->execlists.preempt_hang.inject_hang = true;
2348 i915_request_add(rq);
2350 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2352 pr_err("Preemption did not occur within timeout!");
2354 intel_gt_set_wedged(gt);
2359 set_bit(I915_RESET_ENGINE + id, >->reset.flags);
2360 intel_engine_reset(engine, NULL);
2361 clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
2363 engine->execlists.preempt_hang.inject_hang = false;
2365 if (!igt_wait_for_spinner(&spin_hi, rq)) {
2366 GEM_TRACE("hi spinner failed to start\n");
2368 intel_gt_set_wedged(gt);
2373 igt_spinner_end(&spin_hi);
2374 igt_spinner_end(&spin_lo);
2375 if (igt_flush_test(gt->i915)) {
2383 kernel_context_close(ctx_lo);
2385 kernel_context_close(ctx_hi);
2387 igt_spinner_fini(&spin_lo);
2389 igt_spinner_fini(&spin_hi);
2393 static int live_preempt_timeout(void *arg)
2395 struct intel_gt *gt = arg;
2396 struct i915_gem_context *ctx_hi, *ctx_lo;
2397 struct igt_spinner spin_lo;
2398 struct intel_engine_cs *engine;
2399 enum intel_engine_id id;
2403 * Check that we force preemption to occur by cancelling the previous
2404 * context if it refuses to yield the GPU.
2406 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2409 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2412 if (!intel_has_reset_engine(gt))
2415 if (igt_spinner_init(&spin_lo, gt))
2418 ctx_hi = kernel_context(gt->i915);
2421 ctx_hi->sched.priority =
2422 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2424 ctx_lo = kernel_context(gt->i915);
2427 ctx_lo->sched.priority =
2428 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2430 for_each_engine(engine, gt, id) {
2431 unsigned long saved_timeout;
2432 struct i915_request *rq;
2434 if (!intel_engine_has_preemption(engine))
2437 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2438 MI_NOOP); /* preemption disabled */
2444 i915_request_add(rq);
2445 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2446 intel_gt_set_wedged(gt);
2451 rq = igt_request_alloc(ctx_hi, engine);
2453 igt_spinner_end(&spin_lo);
2458 /* Flush the previous CS ack before changing timeouts */
2459 while (READ_ONCE(engine->execlists.pending[0]))
2462 saved_timeout = engine->props.preempt_timeout_ms;
2463 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2465 i915_request_get(rq);
2466 i915_request_add(rq);
2468 intel_engine_flush_submission(engine);
2469 engine->props.preempt_timeout_ms = saved_timeout;
2471 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2472 intel_gt_set_wedged(gt);
2473 i915_request_put(rq);
2478 igt_spinner_end(&spin_lo);
2479 i915_request_put(rq);
2484 kernel_context_close(ctx_lo);
2486 kernel_context_close(ctx_hi);
2488 igt_spinner_fini(&spin_lo);
2492 static int random_range(struct rnd_state *rnd, int min, int max)
2494 return i915_prandom_u32_max_state(max - min, rnd) + min;
2497 static int random_priority(struct rnd_state *rnd)
2499 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2502 struct preempt_smoke {
2503 struct intel_gt *gt;
2504 struct i915_gem_context **contexts;
2505 struct intel_engine_cs *engine;
2506 struct drm_i915_gem_object *batch;
2507 unsigned int ncontext;
2508 struct rnd_state prng;
2509 unsigned long count;
2512 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2514 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2518 static int smoke_submit(struct preempt_smoke *smoke,
2519 struct i915_gem_context *ctx, int prio,
2520 struct drm_i915_gem_object *batch)
2522 struct i915_request *rq;
2523 struct i915_vma *vma = NULL;
2527 struct i915_address_space *vm;
2529 vm = i915_gem_context_get_vm_rcu(ctx);
2530 vma = i915_vma_instance(batch, vm, NULL);
2533 return PTR_ERR(vma);
2535 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2540 ctx->sched.priority = prio;
2542 rq = igt_request_alloc(ctx, smoke->engine);
2550 err = i915_request_await_object(rq, vma->obj, false);
2552 err = i915_vma_move_to_active(vma, rq, 0);
2554 err = rq->engine->emit_bb_start(rq,
2557 i915_vma_unlock(vma);
2560 i915_request_add(rq);
2564 i915_vma_unpin(vma);
2569 static int smoke_crescendo_thread(void *arg)
2571 struct preempt_smoke *smoke = arg;
2572 IGT_TIMEOUT(end_time);
2573 unsigned long count;
2577 struct i915_gem_context *ctx = smoke_context(smoke);
2580 err = smoke_submit(smoke,
2581 ctx, count % I915_PRIORITY_MAX,
2587 } while (!__igt_timeout(end_time, NULL));
2589 smoke->count = count;
2593 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2594 #define BATCH BIT(0)
2596 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2597 struct preempt_smoke arg[I915_NUM_ENGINES];
2598 struct intel_engine_cs *engine;
2599 enum intel_engine_id id;
2600 unsigned long count;
2603 for_each_engine(engine, smoke->gt, id) {
2605 arg[id].engine = engine;
2606 if (!(flags & BATCH))
2607 arg[id].batch = NULL;
2610 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2611 "igt/smoke:%d", id);
2612 if (IS_ERR(tsk[id])) {
2613 err = PTR_ERR(tsk[id]);
2616 get_task_struct(tsk[id]);
2619 yield(); /* start all threads before we kthread_stop() */
2622 for_each_engine(engine, smoke->gt, id) {
2625 if (IS_ERR_OR_NULL(tsk[id]))
2628 status = kthread_stop(tsk[id]);
2632 count += arg[id].count;
2634 put_task_struct(tsk[id]);
2637 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2639 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2643 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2645 enum intel_engine_id id;
2646 IGT_TIMEOUT(end_time);
2647 unsigned long count;
2651 for_each_engine(smoke->engine, smoke->gt, id) {
2652 struct i915_gem_context *ctx = smoke_context(smoke);
2655 err = smoke_submit(smoke,
2656 ctx, random_priority(&smoke->prng),
2657 flags & BATCH ? smoke->batch : NULL);
2663 } while (!__igt_timeout(end_time, NULL));
2665 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2667 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2671 static int live_preempt_smoke(void *arg)
2673 struct preempt_smoke smoke = {
2675 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2678 const unsigned int phase[] = { 0, BATCH };
2679 struct igt_live_test t;
2684 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2687 smoke.contexts = kmalloc_array(smoke.ncontext,
2688 sizeof(*smoke.contexts),
2690 if (!smoke.contexts)
2694 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2695 if (IS_ERR(smoke.batch)) {
2696 err = PTR_ERR(smoke.batch);
2700 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2705 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2706 cs[n] = MI_ARB_CHECK;
2707 cs[n] = MI_BATCH_BUFFER_END;
2708 i915_gem_object_flush_map(smoke.batch);
2709 i915_gem_object_unpin_map(smoke.batch);
2711 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2716 for (n = 0; n < smoke.ncontext; n++) {
2717 smoke.contexts[n] = kernel_context(smoke.gt->i915);
2718 if (!smoke.contexts[n])
2722 for (n = 0; n < ARRAY_SIZE(phase); n++) {
2723 err = smoke_crescendo(&smoke, phase[n]);
2727 err = smoke_random(&smoke, phase[n]);
2733 if (igt_live_test_end(&t))
2736 for (n = 0; n < smoke.ncontext; n++) {
2737 if (!smoke.contexts[n])
2739 kernel_context_close(smoke.contexts[n]);
2743 i915_gem_object_put(smoke.batch);
2745 kfree(smoke.contexts);
2750 static int nop_virtual_engine(struct intel_gt *gt,
2751 struct intel_engine_cs **siblings,
2752 unsigned int nsibling,
2755 #define CHAIN BIT(0)
2757 IGT_TIMEOUT(end_time);
2758 struct i915_request *request[16] = {};
2759 struct intel_context *ve[16];
2760 unsigned long n, prime, nc;
2761 struct igt_live_test t;
2762 ktime_t times[2] = {};
2765 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2767 for (n = 0; n < nctx; n++) {
2768 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2769 if (IS_ERR(ve[n])) {
2770 err = PTR_ERR(ve[n]);
2775 err = intel_context_pin(ve[n]);
2777 intel_context_put(ve[n]);
2783 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2787 for_each_prime_number_from(prime, 1, 8192) {
2788 times[1] = ktime_get_raw();
2790 if (flags & CHAIN) {
2791 for (nc = 0; nc < nctx; nc++) {
2792 for (n = 0; n < prime; n++) {
2793 struct i915_request *rq;
2795 rq = i915_request_create(ve[nc]);
2802 i915_request_put(request[nc]);
2803 request[nc] = i915_request_get(rq);
2804 i915_request_add(rq);
2808 for (n = 0; n < prime; n++) {
2809 for (nc = 0; nc < nctx; nc++) {
2810 struct i915_request *rq;
2812 rq = i915_request_create(ve[nc]);
2819 i915_request_put(request[nc]);
2820 request[nc] = i915_request_get(rq);
2821 i915_request_add(rq);
2826 for (nc = 0; nc < nctx; nc++) {
2827 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2828 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2829 __func__, ve[0]->engine->name,
2830 request[nc]->fence.context,
2831 request[nc]->fence.seqno);
2833 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2834 __func__, ve[0]->engine->name,
2835 request[nc]->fence.context,
2836 request[nc]->fence.seqno);
2838 intel_gt_set_wedged(gt);
2843 times[1] = ktime_sub(ktime_get_raw(), times[1]);
2845 times[0] = times[1];
2847 for (nc = 0; nc < nctx; nc++) {
2848 i915_request_put(request[nc]);
2852 if (__igt_timeout(end_time, NULL))
2856 err = igt_live_test_end(&t);
2860 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2861 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2862 prime, div64_u64(ktime_to_ns(times[1]), prime));
2865 if (igt_flush_test(gt->i915))
2868 for (nc = 0; nc < nctx; nc++) {
2869 i915_request_put(request[nc]);
2870 intel_context_unpin(ve[nc]);
2871 intel_context_put(ve[nc]);
2876 static int live_virtual_engine(void *arg)
2878 struct intel_gt *gt = arg;
2879 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2880 struct intel_engine_cs *engine;
2881 enum intel_engine_id id;
2882 unsigned int class, inst;
2885 if (USES_GUC_SUBMISSION(gt->i915))
2888 for_each_engine(engine, gt, id) {
2889 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2891 pr_err("Failed to wrap engine %s: err=%d\n",
2897 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2901 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2902 if (!gt->engine_class[class][inst])
2905 siblings[nsibling++] = gt->engine_class[class][inst];
2910 for (n = 1; n <= nsibling + 1; n++) {
2911 err = nop_virtual_engine(gt, siblings, nsibling,
2917 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2925 static int mask_virtual_engine(struct intel_gt *gt,
2926 struct intel_engine_cs **siblings,
2927 unsigned int nsibling)
2929 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2930 struct intel_context *ve;
2931 struct igt_live_test t;
2936 * Check that by setting the execution mask on a request, we can
2937 * restrict it to our desired engine within the virtual engine.
2940 ve = intel_execlists_create_virtual(siblings, nsibling);
2946 err = intel_context_pin(ve);
2950 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2954 for (n = 0; n < nsibling; n++) {
2955 request[n] = i915_request_create(ve);
2956 if (IS_ERR(request[n])) {
2957 err = PTR_ERR(request[n]);
2962 /* Reverse order as it's more likely to be unnatural */
2963 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2965 i915_request_get(request[n]);
2966 i915_request_add(request[n]);
2969 for (n = 0; n < nsibling; n++) {
2970 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2971 pr_err("%s(%s): wait for %llx:%lld timed out\n",
2972 __func__, ve->engine->name,
2973 request[n]->fence.context,
2974 request[n]->fence.seqno);
2976 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2977 __func__, ve->engine->name,
2978 request[n]->fence.context,
2979 request[n]->fence.seqno);
2981 intel_gt_set_wedged(gt);
2986 if (request[n]->engine != siblings[nsibling - n - 1]) {
2987 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2988 request[n]->engine->name,
2989 siblings[nsibling - n - 1]->name);
2995 err = igt_live_test_end(&t);
2997 if (igt_flush_test(gt->i915))
3000 for (n = 0; n < nsibling; n++)
3001 i915_request_put(request[n]);
3004 intel_context_unpin(ve);
3006 intel_context_put(ve);
3011 static int live_virtual_mask(void *arg)
3013 struct intel_gt *gt = arg;
3014 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3015 unsigned int class, inst;
3018 if (USES_GUC_SUBMISSION(gt->i915))
3021 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3022 unsigned int nsibling;
3025 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3026 if (!gt->engine_class[class][inst])
3029 siblings[nsibling++] = gt->engine_class[class][inst];
3034 err = mask_virtual_engine(gt, siblings, nsibling);
3042 static int preserved_virtual_engine(struct intel_gt *gt,
3043 struct intel_engine_cs **siblings,
3044 unsigned int nsibling)
3046 struct i915_request *last = NULL;
3047 struct intel_context *ve;
3048 struct i915_vma *scratch;
3049 struct igt_live_test t;
3054 scratch = create_scratch(siblings[0]->gt);
3055 if (IS_ERR(scratch))
3056 return PTR_ERR(scratch);
3058 ve = intel_execlists_create_virtual(siblings, nsibling);
3064 err = intel_context_pin(ve);
3068 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3072 for (n = 0; n < NUM_GPR_DW; n++) {
3073 struct intel_engine_cs *engine = siblings[n % nsibling];
3074 struct i915_request *rq;
3076 rq = i915_request_create(ve);
3082 i915_request_put(last);
3083 last = i915_request_get(rq);
3085 cs = intel_ring_begin(rq, 8);
3087 i915_request_add(rq);
3092 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3093 *cs++ = CS_GPR(engine, n);
3094 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3097 *cs++ = MI_LOAD_REGISTER_IMM(1);
3098 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3102 intel_ring_advance(rq, cs);
3104 /* Restrict this request to run on a particular engine */
3105 rq->execution_mask = engine->mask;
3106 i915_request_add(rq);
3109 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3114 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3120 for (n = 0; n < NUM_GPR_DW; n++) {
3122 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3129 i915_gem_object_unpin_map(scratch->obj);
3132 if (igt_live_test_end(&t))
3134 i915_request_put(last);
3136 intel_context_unpin(ve);
3138 intel_context_put(ve);
3140 i915_vma_unpin_and_release(&scratch, 0);
3144 static int live_virtual_preserved(void *arg)
3146 struct intel_gt *gt = arg;
3147 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3148 unsigned int class, inst;
3151 * Check that the context image retains non-privileged (user) registers
3152 * from one engine to the next. For this we check that the CS_GPR
3156 if (USES_GUC_SUBMISSION(gt->i915))
3159 /* As we use CS_GPR we cannot run before they existed on all engines. */
3160 if (INTEL_GEN(gt->i915) < 9)
3163 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3167 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3168 if (!gt->engine_class[class][inst])
3171 siblings[nsibling++] = gt->engine_class[class][inst];
3176 err = preserved_virtual_engine(gt, siblings, nsibling);
3184 static int bond_virtual_engine(struct intel_gt *gt,
3186 struct intel_engine_cs **siblings,
3187 unsigned int nsibling,
3189 #define BOND_SCHEDULE BIT(0)
3191 struct intel_engine_cs *master;
3192 struct i915_request *rq[16];
3193 enum intel_engine_id id;
3194 struct igt_spinner spin;
3199 * A set of bonded requests is intended to be run concurrently
3200 * across a number of engines. We use one request per-engine
3201 * and a magic fence to schedule each of the bonded requests
3202 * at the same time. A consequence of our current scheduler is that
3203 * we only move requests to the HW ready queue when the request
3204 * becomes ready, that is when all of its prerequisite fences have
3205 * been signaled. As one of those fences is the master submit fence,
3206 * there is a delay on all secondary fences as the HW may be
3207 * currently busy. Equally, as all the requests are independent,
3208 * they may have other fences that delay individual request
3209 * submission to HW. Ergo, we do not guarantee that all requests are
3210 * immediately submitted to HW at the same time, just that if the
3211 * rules are abided by, they are ready at the same time as the
3212 * first is submitted. Userspace can embed semaphores in its batch
3213 * to ensure parallel execution of its phases as it requires.
3214 * Though naturally it gets requested that perhaps the scheduler should
3215 * take care of parallel execution, even across preemption events on
3216 * different HW. (The proper answer is of course "lalalala".)
3218 * With the submit-fence, we have identified three possible phases
3219 * of synchronisation depending on the master fence: queued (not
3220 * ready), executing, and signaled. The first two are quite simple
3221 * and checked below. However, the signaled master fence handling is
3222 * contentious. Currently we do not distinguish between a signaled
3223 * fence and an expired fence, as once signaled it does not convey
3224 * any information about the previous execution. It may even be freed
3225 * and hence checking later it may not exist at all. Ergo we currently
3226 * do not apply the bonding constraint for an already signaled fence,
3227 * as our expectation is that it should not constrain the secondaries
3228 * and is outside of the scope of the bonded request API (i.e. all
3229 * userspace requests are meant to be running in parallel). As
3230 * it imposes no constraint, and is effectively a no-op, we do not
3231 * check below as normal execution flows are checked extensively above.
3233 * XXX Is the degenerate handling of signaled submit fences the
3234 * expected behaviour for userpace?
3237 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3239 if (igt_spinner_init(&spin, gt))
3243 rq[0] = ERR_PTR(-ENOMEM);
3244 for_each_engine(master, gt, id) {
3245 struct i915_sw_fence fence = {};
3247 if (master->class == class)
3250 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3252 rq[0] = igt_spinner_create_request(&spin,
3253 master->kernel_context,
3255 if (IS_ERR(rq[0])) {
3256 err = PTR_ERR(rq[0]);
3259 i915_request_get(rq[0]);
3261 if (flags & BOND_SCHEDULE) {
3262 onstack_fence_init(&fence);
3263 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3268 i915_request_add(rq[0]);
3272 if (!(flags & BOND_SCHEDULE) &&
3273 !igt_wait_for_spinner(&spin, rq[0])) {
3278 for (n = 0; n < nsibling; n++) {
3279 struct intel_context *ve;
3281 ve = intel_execlists_create_virtual(siblings, nsibling);
3284 onstack_fence_fini(&fence);
3288 err = intel_virtual_engine_attach_bond(ve->engine,
3292 intel_context_put(ve);
3293 onstack_fence_fini(&fence);
3297 err = intel_context_pin(ve);
3298 intel_context_put(ve);
3300 onstack_fence_fini(&fence);
3304 rq[n + 1] = i915_request_create(ve);
3305 intel_context_unpin(ve);
3306 if (IS_ERR(rq[n + 1])) {
3307 err = PTR_ERR(rq[n + 1]);
3308 onstack_fence_fini(&fence);
3311 i915_request_get(rq[n + 1]);
3313 err = i915_request_await_execution(rq[n + 1],
3315 ve->engine->bond_execute);
3316 i915_request_add(rq[n + 1]);
3318 onstack_fence_fini(&fence);
3322 onstack_fence_fini(&fence);
3323 intel_engine_flush_submission(master);
3324 igt_spinner_end(&spin);
3326 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3327 pr_err("Master request did not execute (on %s)!\n",
3328 rq[0]->engine->name);
3333 for (n = 0; n < nsibling; n++) {
3334 if (i915_request_wait(rq[n + 1], 0,
3335 MAX_SCHEDULE_TIMEOUT) < 0) {
3340 if (rq[n + 1]->engine != siblings[n]) {
3341 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3343 rq[n + 1]->engine->name,
3344 rq[0]->engine->name);
3350 for (n = 0; !IS_ERR(rq[n]); n++)
3351 i915_request_put(rq[n]);
3352 rq[0] = ERR_PTR(-ENOMEM);
3356 for (n = 0; !IS_ERR(rq[n]); n++)
3357 i915_request_put(rq[n]);
3358 if (igt_flush_test(gt->i915))
3361 igt_spinner_fini(&spin);
3365 static int live_virtual_bond(void *arg)
3367 static const struct phase {
3372 { "schedule", BOND_SCHEDULE },
3375 struct intel_gt *gt = arg;
3376 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3377 unsigned int class, inst;
3380 if (USES_GUC_SUBMISSION(gt->i915))
3383 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3384 const struct phase *p;
3388 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3389 if (!gt->engine_class[class][inst])
3392 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3393 siblings[nsibling++] = gt->engine_class[class][inst];
3398 for (p = phases; p->name; p++) {
3399 err = bond_virtual_engine(gt,
3400 class, siblings, nsibling,
3403 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3404 __func__, p->name, class, nsibling, err);
3413 static int reset_virtual_engine(struct intel_gt *gt,
3414 struct intel_engine_cs **siblings,
3415 unsigned int nsibling)
3417 struct intel_engine_cs *engine;
3418 struct intel_context *ve;
3419 unsigned long *heartbeat;
3420 struct igt_spinner spin;
3421 struct i915_request *rq;
3426 * In order to support offline error capture for fast preempt reset,
3427 * we need to decouple the guilty request and ensure that it and its
3428 * descendents are not executed while the capture is in progress.
3431 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3435 if (igt_spinner_init(&spin, gt)) {
3440 ve = intel_execlists_create_virtual(siblings, nsibling);
3446 for (n = 0; n < nsibling; n++)
3447 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3449 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3454 i915_request_add(rq);
3456 if (!igt_wait_for_spinner(&spin, rq)) {
3457 intel_gt_set_wedged(gt);
3462 engine = rq->engine;
3463 GEM_BUG_ON(engine == ve->engine);
3465 /* Take ownership of the reset and tasklet */
3466 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3467 >->reset.flags)) {
3468 intel_gt_set_wedged(gt);
3472 tasklet_disable(&engine->execlists.tasklet);
3474 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3475 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3477 /* Fake a preemption event; failed of course */
3478 spin_lock_irq(&engine->active.lock);
3479 __unwind_incomplete_requests(engine);
3480 spin_unlock_irq(&engine->active.lock);
3481 GEM_BUG_ON(rq->engine != ve->engine);
3483 /* Reset the engine while keeping our active request on hold */
3484 execlists_hold(engine, rq);
3485 GEM_BUG_ON(!i915_request_on_hold(rq));
3487 intel_engine_reset(engine, NULL);
3488 GEM_BUG_ON(rq->fence.error != -EIO);
3490 /* Release our grasp on the engine, letting CS flow again */
3491 tasklet_enable(&engine->execlists.tasklet);
3492 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
3494 /* Check that we do not resubmit the held request */
3495 i915_request_get(rq);
3496 if (!i915_request_wait(rq, 0, HZ / 5)) {
3497 pr_err("%s: on hold request completed!\n",
3499 intel_gt_set_wedged(gt);
3503 GEM_BUG_ON(!i915_request_on_hold(rq));
3505 /* But is resubmitted on release */
3506 execlists_unhold(engine, rq);
3507 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3508 pr_err("%s: held request did not complete!\n",
3510 intel_gt_set_wedged(gt);
3515 i915_request_put(rq);
3517 for (n = 0; n < nsibling; n++)
3518 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3520 intel_context_put(ve);
3522 igt_spinner_fini(&spin);
3528 static int live_virtual_reset(void *arg)
3530 struct intel_gt *gt = arg;
3531 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3532 unsigned int class, inst;
3535 * Check that we handle a reset event within a virtual engine.
3536 * Only the physical engine is reset, but we have to check the flow
3537 * of the virtual requests around the reset, and make sure it is not
3541 if (USES_GUC_SUBMISSION(gt->i915))
3544 if (!intel_has_reset_engine(gt))
3547 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3551 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3552 if (!gt->engine_class[class][inst])
3555 siblings[nsibling++] = gt->engine_class[class][inst];
3560 err = reset_virtual_engine(gt, siblings, nsibling);
3568 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3570 static const struct i915_subtest tests[] = {
3571 SUBTEST(live_sanitycheck),
3572 SUBTEST(live_unlite_switch),
3573 SUBTEST(live_unlite_preempt),
3574 SUBTEST(live_hold_reset),
3575 SUBTEST(live_timeslice_preempt),
3576 SUBTEST(live_timeslice_queue),
3577 SUBTEST(live_busywait_preempt),
3578 SUBTEST(live_preempt),
3579 SUBTEST(live_late_preempt),
3580 SUBTEST(live_nopreempt),
3581 SUBTEST(live_preempt_cancel),
3582 SUBTEST(live_suppress_self_preempt),
3583 SUBTEST(live_suppress_wait_preempt),
3584 SUBTEST(live_chain_preempt),
3585 SUBTEST(live_preempt_gang),
3586 SUBTEST(live_preempt_hang),
3587 SUBTEST(live_preempt_timeout),
3588 SUBTEST(live_preempt_smoke),
3589 SUBTEST(live_virtual_engine),
3590 SUBTEST(live_virtual_mask),
3591 SUBTEST(live_virtual_preserved),
3592 SUBTEST(live_virtual_bond),
3593 SUBTEST(live_virtual_reset),
3596 if (!HAS_EXECLISTS(i915))
3599 if (intel_gt_is_wedged(&i915->gt))
3602 return intel_gt_live_subtests(tests, &i915->gt);
3605 static void hexdump(const void *buf, size_t len)
3607 const size_t rowsize = 8 * sizeof(u32);
3608 const void *prev = NULL;
3612 for (pos = 0; pos < len; pos += rowsize) {
3615 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3623 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3624 rowsize, sizeof(u32),
3626 false) >= sizeof(line));
3627 pr_info("[%04zx] %s\n", pos, line);
3634 static int live_lrc_layout(void *arg)
3636 struct intel_gt *gt = arg;
3637 struct intel_engine_cs *engine;
3638 enum intel_engine_id id;
3643 * Check the registers offsets we use to create the initial reg state
3644 * match the layout saved by HW.
3647 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3652 for_each_engine(engine, gt, id) {
3656 if (!engine->default_state)
3659 hw = i915_gem_object_pin_map(engine->default_state,
3665 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3667 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3668 engine->kernel_context,
3670 engine->kernel_context->ring,
3683 pr_debug("%s: skipped instruction %x at dword %d\n",
3684 engine->name, lri, dw);
3689 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3690 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3691 engine->name, dw, lri);
3696 if (lrc[dw] != lri) {
3697 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3698 engine->name, dw, lri, lrc[dw]);
3708 if (hw[dw] != lrc[dw]) {
3709 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3710 engine->name, dw, hw[dw], lrc[dw]);
3716 * Skip over the actual register value as we
3717 * expect that to differ.
3722 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3725 pr_info("%s: HW register image:\n", engine->name);
3726 hexdump(hw, PAGE_SIZE);
3728 pr_info("%s: SW register image:\n", engine->name);
3729 hexdump(lrc, PAGE_SIZE);
3732 i915_gem_object_unpin_map(engine->default_state);
3741 static int find_offset(const u32 *lri, u32 offset)
3745 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3746 if (lri[i] == offset)
3752 static int live_lrc_fixed(void *arg)
3754 struct intel_gt *gt = arg;
3755 struct intel_engine_cs *engine;
3756 enum intel_engine_id id;
3760 * Check the assumed register offsets match the actual locations in
3761 * the context image.
3764 for_each_engine(engine, gt, id) {
3771 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3776 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3781 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3786 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3791 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3792 lrc_ring_mi_mode(engine),
3796 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3804 if (!engine->default_state)
3807 hw = i915_gem_object_pin_map(engine->default_state,
3813 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3815 for (t = tbl; t->name; t++) {
3816 int dw = find_offset(hw, t->reg);
3818 if (dw != t->offset) {
3819 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3829 i915_gem_object_unpin_map(engine->default_state);
3835 static int __live_lrc_state(struct intel_engine_cs *engine,
3836 struct i915_vma *scratch)
3838 struct intel_context *ce;
3839 struct i915_request *rq;
3845 u32 expected[MAX_IDX];
3850 ce = intel_context_create(engine);
3854 err = intel_context_pin(ce);
3858 rq = i915_request_create(ce);
3864 cs = intel_ring_begin(rq, 4 * MAX_IDX);
3867 i915_request_add(rq);
3871 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3872 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3873 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3876 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3878 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3879 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3880 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3883 i915_request_get(rq);
3884 i915_request_add(rq);
3886 intel_engine_flush_submission(engine);
3887 expected[RING_TAIL_IDX] = ce->ring->tail;
3889 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3894 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3900 for (n = 0; n < MAX_IDX; n++) {
3901 if (cs[n] != expected[n]) {
3902 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3903 engine->name, n, cs[n], expected[n]);
3909 i915_gem_object_unpin_map(scratch->obj);
3912 i915_request_put(rq);
3914 intel_context_unpin(ce);
3916 intel_context_put(ce);
3920 static int live_lrc_state(void *arg)
3922 struct intel_gt *gt = arg;
3923 struct intel_engine_cs *engine;
3924 struct i915_vma *scratch;
3925 enum intel_engine_id id;
3929 * Check the live register state matches what we expect for this
3933 scratch = create_scratch(gt);
3934 if (IS_ERR(scratch))
3935 return PTR_ERR(scratch);
3937 for_each_engine(engine, gt, id) {
3938 err = __live_lrc_state(engine, scratch);
3943 if (igt_flush_test(gt->i915))
3946 i915_vma_unpin_and_release(&scratch, 0);
3950 static int gpr_make_dirty(struct intel_engine_cs *engine)
3952 struct i915_request *rq;
3956 rq = intel_engine_create_kernel_request(engine);
3960 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3962 i915_request_add(rq);
3966 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3967 for (n = 0; n < NUM_GPR_DW; n++) {
3968 *cs++ = CS_GPR(engine, n);
3969 *cs++ = STACK_MAGIC;
3973 intel_ring_advance(rq, cs);
3974 i915_request_add(rq);
3979 static int __live_gpr_clear(struct intel_engine_cs *engine,
3980 struct i915_vma *scratch)
3982 struct intel_context *ce;
3983 struct i915_request *rq;
3988 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3989 return 0; /* GPR only on rcs0 for gen8 */
3991 err = gpr_make_dirty(engine);
3995 ce = intel_context_create(engine);
3999 rq = intel_context_create_request(ce);
4005 cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
4008 i915_request_add(rq);
4012 for (n = 0; n < NUM_GPR_DW; n++) {
4013 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4014 *cs++ = CS_GPR(engine, n);
4015 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4019 i915_request_get(rq);
4020 i915_request_add(rq);
4022 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4027 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4033 for (n = 0; n < NUM_GPR_DW; n++) {
4035 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4037 n / 2, n & 1 ? "udw" : "ldw",
4044 i915_gem_object_unpin_map(scratch->obj);
4047 i915_request_put(rq);
4049 intel_context_put(ce);
4053 static int live_gpr_clear(void *arg)
4055 struct intel_gt *gt = arg;
4056 struct intel_engine_cs *engine;
4057 struct i915_vma *scratch;
4058 enum intel_engine_id id;
4062 * Check that GPR registers are cleared in new contexts as we need
4063 * to avoid leaking any information from previous contexts.
4066 scratch = create_scratch(gt);
4067 if (IS_ERR(scratch))
4068 return PTR_ERR(scratch);
4070 for_each_engine(engine, gt, id) {
4071 err = __live_gpr_clear(engine, scratch);
4076 if (igt_flush_test(gt->i915))
4079 i915_vma_unpin_and_release(&scratch, 0);
4083 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4085 static const struct i915_subtest tests[] = {
4086 SUBTEST(live_lrc_layout),
4087 SUBTEST(live_lrc_fixed),
4088 SUBTEST(live_lrc_state),
4089 SUBTEST(live_gpr_clear),
4092 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4095 return intel_gt_live_subtests(tests, &i915->gt);